1 // Scintilla source code edit control
5 // Copyright 2001- by Clemens Wyss <wys@helbling.ch>
6 // The License.txt file describes the conditions under which this software may be distributed.
19 #include "Scintilla.h"
23 using namespace Scintilla
;
26 //XXX Identical to Perl, put in common area
27 static inline bool isEOLChar(char ch
) {
28 return (ch
== '\r') || (ch
== '\n');
31 #define isSafeASCII(ch) ((unsigned int)(ch) <= 127)
32 // This one's redundant, but makes for more readable code
33 #define isHighBitChar(ch) ((unsigned int)(ch) > 127)
35 static inline bool isSafeAlpha(char ch
) {
36 return (isSafeASCII(ch
) && isalpha(ch
)) || ch
== '_';
39 static inline bool isSafeAlnum(char ch
) {
40 return (isSafeASCII(ch
) && isalnum(ch
)) || ch
== '_';
43 static inline bool isSafeAlnumOrHigh(char ch
) {
44 return isHighBitChar(ch
) || isalnum(ch
) || ch
== '_';
47 static inline bool isSafeDigit(char ch
) {
48 return isSafeASCII(ch
) && isdigit(ch
);
51 static inline bool isSafeWordcharOrHigh(char ch
) {
52 return isHighBitChar(ch
) || iswordchar(ch
);
55 static bool inline iswhitespace(char ch
) {
56 return ch
== ' ' || ch
== '\t';
59 #define MAX_KEYWORD_LENGTH 200
62 #define actual_style(style) (style & STYLE_MASK)
64 static bool followsDot(unsigned int pos
, Accessor
&styler
) {
66 for (; pos
>= 1; --pos
) {
67 int style
= actual_style(styler
.StyleAt(pos
));
72 if (ch
== ' ' || ch
== '\t') {
80 return styler
[pos
] == '.';
89 // Forward declarations
90 static bool keywordIsAmbiguous(const char *prevWord
);
91 static bool keywordDoStartsLoop(int pos
,
93 static bool keywordIsModifier(const char *word
,
97 static int ClassifyWordRb(unsigned int start
, unsigned int end
, WordList
&keywords
, Accessor
&styler
, char *prevWord
) {
100 unsigned int lim
= end
- start
+ 1; // num chars to copy
101 if (lim
>= MAX_KEYWORD_LENGTH
) {
102 lim
= MAX_KEYWORD_LENGTH
- 1;
104 for (i
= start
, j
= 0; j
< lim
; i
++, j
++) {
109 if (0 == strcmp(prevWord
, "class"))
110 chAttr
= SCE_RB_CLASSNAME
;
111 else if (0 == strcmp(prevWord
, "module"))
112 chAttr
= SCE_RB_MODULE_NAME
;
113 else if (0 == strcmp(prevWord
, "def"))
114 chAttr
= SCE_RB_DEFNAME
;
115 else if (keywords
.InList(s
) && !followsDot(start
- 1, styler
)) {
116 if (keywordIsAmbiguous(s
)
117 && keywordIsModifier(s
, start
, styler
)) {
119 // Demoted keywords are colored as keywords,
120 // but do not affect changes in indentation.
122 // Consider the word 'if':
123 // 1. <<if test ...>> : normal
124 // 2. <<stmt if test>> : demoted
125 // 3. <<lhs = if ...>> : normal: start a new indent level
126 // 4. <<obj.if = 10>> : color as identifer, since it follows '.'
128 chAttr
= SCE_RB_WORD_DEMOTED
;
130 chAttr
= SCE_RB_WORD
;
133 chAttr
= SCE_RB_IDENTIFIER
;
134 styler
.ColourTo(end
, chAttr
);
135 if (chAttr
== SCE_RB_WORD
) {
144 //XXX Identical to Perl, put in common area
145 static bool isMatch(Accessor
&styler
, int lengthDoc
, int pos
, const char *val
) {
146 if ((pos
+ static_cast<int>(strlen(val
))) >= lengthDoc
) {
150 if (*val
!= styler
[pos
++]) {
158 // Do Ruby better -- find the end of the line, work back,
159 // and then check for leading white space
161 // Precondition: the here-doc target can be indented
162 static bool lookingAtHereDocDelim(Accessor
&styler
,
165 const char *HereDocDelim
)
167 if (!isMatch(styler
, lengthDoc
, pos
, HereDocDelim
)) {
171 char ch
= styler
[pos
];
174 } else if (ch
!= ' ' && ch
!= '\t') {
181 //XXX Identical to Perl, put in common area
182 static char opposite(char ch
) {
194 // Null transitions when we see we've reached the end
195 // and need to relex the curr char.
197 static void redo_char(int &i
, char &ch
, char &chNext
, char &chNext2
,
202 state
= SCE_RB_DEFAULT
;
205 static void advance_char(int &i
, char &ch
, char &chNext
, char &chNext2
) {
211 // precondition: startPos points to one after the EOL char
212 static bool currLineContainsHereDelims(int& startPos
,
218 for (pos
= startPos
- 1; pos
> 0; pos
--) {
219 char ch
= styler
.SafeGetCharAt(pos
);
221 // Leave the pointers where they are -- there are no
222 // here doc delims on the current line, even if
223 // the EOL isn't default style
228 if (actual_style(styler
.StyleAt(pos
)) == SCE_RB_HERE_DELIM
) {
236 // Update the pointers so we don't have to re-analyze the string
242 static bool isEmptyLine(int pos
,
245 int lineCurrent
= styler
.GetLine(pos
);
246 int indentCurrent
= styler
.IndentAmount(lineCurrent
, &spaceFlags
, NULL
);
247 return (indentCurrent
& SC_FOLDLEVELWHITEFLAG
) != 0;
250 static bool RE_CanFollowKeyword(const char *keyword
) {
251 if (!strcmp(keyword
, "and")
252 || !strcmp(keyword
, "begin")
253 || !strcmp(keyword
, "break")
254 || !strcmp(keyword
, "case")
255 || !strcmp(keyword
, "do")
256 || !strcmp(keyword
, "else")
257 || !strcmp(keyword
, "elsif")
258 || !strcmp(keyword
, "if")
259 || !strcmp(keyword
, "next")
260 || !strcmp(keyword
, "return")
261 || !strcmp(keyword
, "when")
262 || !strcmp(keyword
, "unless")
263 || !strcmp(keyword
, "until")
264 || !strcmp(keyword
, "not")
265 || !strcmp(keyword
, "or")) {
271 // Look at chars up to but not including endPos
272 // Don't look at styles in case we're looking forward
274 static int skipWhitespace(int startPos
,
277 for (int i
= startPos
; i
< endPos
; i
++) {
278 if (!iswhitespace(styler
[i
])) {
285 // This routine looks for false positives like
287 // There aren't too many.
289 // iPrev points to the start of <<
291 static bool sureThisIsHeredoc(int iPrev
,
295 // Not so fast, since Ruby's so dynamic. Check the context
296 // to make sure we're OK.
298 int lineStart
= styler
.GetLine(iPrev
);
299 int lineStartPosn
= styler
.LineStart(lineStart
);
302 // Find the first word after some whitespace
303 int firstWordPosn
= skipWhitespace(lineStartPosn
, iPrev
, styler
);
304 if (firstWordPosn
>= iPrev
) {
305 // Have something like {^ <<}
306 //XXX Look at the first previous non-comment non-white line
307 // to establish the context. Not too likely though.
310 switch (prevStyle
= styler
.StyleAt(firstWordPosn
)) {
312 case SCE_RB_WORD_DEMOTED
:
313 case SCE_RB_IDENTIFIER
:
319 int firstWordEndPosn
= firstWordPosn
;
320 char *dst
= prevWord
;
322 if (firstWordEndPosn
>= iPrev
||
323 styler
.StyleAt(firstWordEndPosn
) != prevStyle
) {
327 *dst
++ = styler
[firstWordEndPosn
];
328 firstWordEndPosn
+= 1;
330 //XXX Write a style-aware thing to regex scintilla buffer objects
331 if (!strcmp(prevWord
, "undef")
332 || !strcmp(prevWord
, "def")
333 || !strcmp(prevWord
, "alias")) {
334 // These keywords are what we were looking for
340 // Routine that saves us from allocating a buffer for the here-doc target
341 // targetEndPos points one past the end of the current target
342 static bool haveTargetMatch(int currPos
,
347 if (lengthDoc
- currPos
< targetEndPos
- targetStartPos
) {
351 for (i
= targetStartPos
, j
= currPos
;
352 i
< targetEndPos
&& j
< lengthDoc
;
354 if (styler
[i
] != styler
[j
]) {
361 // We need a check because the form
362 // [identifier] <<[target]
363 // is ambiguous. The Ruby lexer/parser resolves it by
364 // looking to see if [identifier] names a variable or a
365 // function. If it's the first, it's the start of a here-doc.
366 // If it's a var, it's an operator. This lexer doesn't
367 // maintain a symbol table, so it looks ahead to see what's
368 // going on, in cases where we have
369 // ^[white-space]*[identifier([.|::]identifier)*][white-space]*<<[target]
371 // If there's no occurrence of [target] on a line, assume we don't.
373 // return true == yes, we have no heredocs
375 static bool sureThisIsNotHeredoc(int lt2StartPos
,
378 // Use full document, not just part we're styling
379 int lengthDoc
= styler
.Length();
380 int lineStart
= styler
.GetLine(lt2StartPos
);
381 int lineStartPosn
= styler
.LineStart(lineStart
);
383 const bool definitely_not_a_here_doc
= true;
384 const bool looks_like_a_here_doc
= false;
386 // Find the first word after some whitespace
387 int firstWordPosn
= skipWhitespace(lineStartPosn
, lt2StartPos
, styler
);
388 if (firstWordPosn
>= lt2StartPos
) {
389 return definitely_not_a_here_doc
;
391 prevStyle
= styler
.StyleAt(firstWordPosn
);
392 // If we have '<<' following a keyword, it's not a heredoc
393 if (prevStyle
!= SCE_RB_IDENTIFIER
) {
394 return definitely_not_a_here_doc
;
396 int newStyle
= prevStyle
;
397 // Some compilers incorrectly warn about uninit newStyle
398 for (firstWordPosn
+= 1; firstWordPosn
<= lt2StartPos
; firstWordPosn
+= 1) {
399 // Inner loop looks at the name
400 for (; firstWordPosn
<= lt2StartPos
; firstWordPosn
+= 1) {
401 newStyle
= styler
.StyleAt(firstWordPosn
);
402 if (newStyle
!= prevStyle
) {
406 // Do we have '::' or '.'?
407 if (firstWordPosn
< lt2StartPos
&& newStyle
== SCE_RB_OPERATOR
) {
408 char ch
= styler
[firstWordPosn
];
411 } else if (ch
== ':') {
412 if (styler
.StyleAt(++firstWordPosn
) != SCE_RB_OPERATOR
) {
413 return definitely_not_a_here_doc
;
414 } else if (styler
[firstWordPosn
] != ':') {
415 return definitely_not_a_here_doc
;
424 // Skip next batch of white-space
425 firstWordPosn
= skipWhitespace(firstWordPosn
, lt2StartPos
, styler
);
426 if (firstWordPosn
!= lt2StartPos
) {
427 // Have [[^ws[identifier]ws[*something_else*]ws<<
428 return definitely_not_a_here_doc
;
430 // OK, now 'j' will point to the current spot moving ahead
431 int j
= firstWordPosn
+ 1;
432 if (styler
.StyleAt(j
) != SCE_RB_OPERATOR
|| styler
[j
] != '<') {
433 // This shouldn't happen
434 return definitely_not_a_here_doc
;
436 int nextLineStartPosn
= styler
.LineStart(lineStart
+ 1);
437 if (nextLineStartPosn
>= lengthDoc
) {
438 return definitely_not_a_here_doc
;
440 j
= skipWhitespace(j
+ 1, nextLineStartPosn
, styler
);
441 if (j
>= lengthDoc
) {
442 return definitely_not_a_here_doc
;
445 int target_start
, target_end
;
446 // From this point on no more styling, since we're looking ahead
447 if (styler
[j
] == '-') {
451 allow_indent
= false;
454 // Allow for quoted targets.
455 char target_quote
= 0;
460 target_quote
= styler
[j
];
464 if (isSafeAlnum(styler
[j
])) {
465 // Init target_end because some compilers think it won't
466 // be initialized by the time it's used
467 target_start
= target_end
= j
;
470 return definitely_not_a_here_doc
;
472 for (; j
< lengthDoc
; j
++) {
473 if (!isSafeAlnum(styler
[j
])) {
474 if (target_quote
&& styler
[j
] != target_quote
) {
476 return definitely_not_a_here_doc
;
479 // And for now make sure that it's a newline
480 // don't handle arbitrary expressions yet
484 // Now we can move to the character after the string delimiter.
487 j
= skipWhitespace(j
, lengthDoc
, styler
);
488 if (j
>= lengthDoc
) {
489 return definitely_not_a_here_doc
;
492 if (ch
== '#' || isEOLChar(ch
)) {
493 // This is OK, so break and continue;
496 return definitely_not_a_here_doc
;
502 // Just look at the start of each line
503 int last_line
= styler
.GetLine(lengthDoc
- 1);
504 // But don't go too far
505 if (last_line
> lineStart
+ 50) {
506 last_line
= lineStart
+ 50;
508 for (int line_num
= lineStart
+ 1; line_num
<= last_line
; line_num
++) {
510 j
= skipWhitespace(styler
.LineStart(line_num
), lengthDoc
, styler
);
512 j
= styler
.LineStart(line_num
);
514 // target_end is one past the end
515 if (haveTargetMatch(j
, lengthDoc
, target_start
, target_end
, styler
)) {
517 return looks_like_a_here_doc
;
520 return definitely_not_a_here_doc
;
523 //todo: if we aren't looking at a stdio character,
524 // move to the start of the first line that is not in a
525 // multi-line construct
527 static void synchronizeDocStart(unsigned int& startPos
,
531 bool skipWhiteSpace
=false) {
534 int style
= actual_style(styler
.StyleAt(startPos
));
539 // Don't do anything else with these.
544 // Quick way to characterize each line
546 for (lineStart
= styler
.GetLine(pos
); lineStart
> 0; lineStart
--) {
547 // Now look at the style before the previous line's EOL
548 pos
= styler
.LineStart(lineStart
) - 1;
553 char ch
= styler
.SafeGetCharAt(pos
);
554 char chPrev
= styler
.SafeGetCharAt(pos
- 1);
555 if (ch
== '\n' && chPrev
== '\r') {
558 if (styler
.SafeGetCharAt(pos
- 1) == '\\') {
559 // Continuation line -- keep going
560 } else if (actual_style(styler
.StyleAt(pos
)) != SCE_RB_DEFAULT
) {
561 // Part of multi-line construct -- keep going
562 } else if (currLineContainsHereDelims(pos
, styler
)) {
563 // Keep going, with pos and length now pointing
564 // at the end of the here-doc delimiter
565 } else if (skipWhiteSpace
&& isEmptyLine(pos
, styler
)) {
571 pos
= styler
.LineStart(lineStart
);
572 length
+= (startPos
- pos
);
574 initStyle
= SCE_RB_DEFAULT
;
577 static void ColouriseRbDoc(unsigned int startPos
, int length
, int initStyle
,
578 WordList
*keywordlists
[], Accessor
&styler
) {
580 // Lexer for Ruby often has to backtrack to start of current style to determine
581 // which characters are being used as quotes, how deeply nested is the
582 // start position and what the termination string is for here documents
584 WordList
&keywords
= *keywordlists
[0];
590 // 0: '<<' encountered
591 // 1: collect the delimiter
592 // 1b: text between the end of the delimiter and the EOL
593 // 2: here doc text (lines after the delimiter)
594 char Quote
; // the char after '<<'
595 bool Quoted
; // true if Quote in ('\'','"','`')
596 int DelimiterLength
; // strlen(Delimiter)
597 char Delimiter
[256]; // the Delimiter, limit of 256: from Perl
603 CanBeIndented
= false;
629 int numDots
= 0; // For numbers --
630 // Don't start lexing in the middle of a num
632 synchronizeDocStart(startPos
, length
, initStyle
, styler
, // ref args
635 bool preferRE
= true;
636 int state
= initStyle
;
637 int lengthDoc
= startPos
+ length
;
639 char prevWord
[MAX_KEYWORD_LENGTH
+ 1]; // 1 byte for zero
644 char chPrev
= styler
.SafeGetCharAt(startPos
- 1);
645 char chNext
= styler
.SafeGetCharAt(startPos
);
646 // Ruby uses a different mask because bad indentation is marked by oring with 32
647 styler
.StartAt(startPos
, 127);
648 styler
.StartSegment(startPos
);
650 static int q_states
[] = {SCE_RB_STRING_Q
,
656 static const char* q_chars
= "qQrwWx";
658 for (int i
= startPos
; i
< lengthDoc
; i
++) {
660 chNext
= styler
.SafeGetCharAt(i
+ 1);
661 char chNext2
= styler
.SafeGetCharAt(i
+ 2);
663 if (styler
.IsLeadByte(ch
)) {
670 // skip on DOS/Windows
671 //No, don't, because some things will get tagged on,
672 // so we won't recognize keywords, for example
674 if (ch
== '\r' && chNext
== '\n') {
679 if (HereDoc
.State
== 1 && isEOLChar(ch
)) {
680 // Begin of here-doc (the line after the here-doc delimiter):
682 styler
.ColourTo(i
-1, state
);
683 // Don't check for a missing quote, just jump into
684 // the here-doc state
685 state
= SCE_RB_HERE_Q
;
688 // Regular transitions
689 if (state
== SCE_RB_DEFAULT
) {
690 if (isSafeDigit(ch
)) {
691 styler
.ColourTo(i
- 1, state
);
692 state
= SCE_RB_NUMBER
;
694 } else if (isHighBitChar(ch
) || iswordstart(ch
)) {
695 styler
.ColourTo(i
- 1, state
);
697 } else if (ch
== '#') {
698 styler
.ColourTo(i
- 1, state
);
699 state
= SCE_RB_COMMENTLINE
;
700 } else if (ch
== '=') {
701 // =begin indicates the start of a comment (doc) block
702 if (i
== 0 || isEOLChar(chPrev
)
704 && styler
.SafeGetCharAt(i
+ 2) == 'e'
705 && styler
.SafeGetCharAt(i
+ 3) == 'g'
706 && styler
.SafeGetCharAt(i
+ 4) == 'i'
707 && styler
.SafeGetCharAt(i
+ 5) == 'n'
708 && !isSafeWordcharOrHigh(styler
.SafeGetCharAt(i
+ 6))) {
709 styler
.ColourTo(i
- 1, state
);
712 styler
.ColourTo(i
- 1, state
);
713 styler
.ColourTo(i
, SCE_RB_OPERATOR
);
716 } else if (ch
== '"') {
717 styler
.ColourTo(i
- 1, state
);
718 state
= SCE_RB_STRING
;
721 } else if (ch
== '\'') {
722 styler
.ColourTo(i
- 1, state
);
723 state
= SCE_RB_CHARACTER
;
726 } else if (ch
== '`') {
727 styler
.ColourTo(i
- 1, state
);
728 state
= SCE_RB_BACKTICKS
;
731 } else if (ch
== '@') {
732 // Instance or class var
733 styler
.ColourTo(i
- 1, state
);
735 state
= SCE_RB_CLASS_VAR
;
736 advance_char(i
, ch
, chNext
, chNext2
); // pass by ref
738 state
= SCE_RB_INSTANCE_VAR
;
740 } else if (ch
== '$') {
741 // Check for a builtin global
742 styler
.ColourTo(i
- 1, state
);
743 // Recognize it bit by bit
744 state
= SCE_RB_GLOBAL
;
745 } else if (ch
== '/' && preferRE
) {
747 styler
.ColourTo(i
- 1, state
);
748 state
= SCE_RB_REGEX
;
751 } else if (ch
== '<' && chNext
== '<' && chNext2
!= '=') {
753 // Recognise the '<<' symbol - either a here document or a binary op
754 styler
.ColourTo(i
- 1, state
);
757 styler
.ColourTo(i
, SCE_RB_OPERATOR
);
759 if (! (strchr("\"\'`_-", chNext2
) || isSafeAlpha(chNext2
))) {
760 // It's definitely not a here-doc,
761 // based on Ruby's lexer/parser in the
762 // heredoc_identifier routine.
763 // Nothing else to do.
764 } else if (preferRE
) {
765 if (sureThisIsHeredoc(i
- 1, styler
, prevWord
)) {
766 state
= SCE_RB_HERE_DELIM
;
769 // else leave it in default state
771 if (sureThisIsNotHeredoc(i
- 1, styler
)) {
772 // leave state as default
773 // We don't have all the heuristics Perl has for indications
774 // of a here-doc, because '<<' is overloadable and used
775 // for so many other classes.
777 state
= SCE_RB_HERE_DELIM
;
781 preferRE
= (state
!= SCE_RB_HERE_DELIM
);
782 } else if (ch
== ':') {
783 styler
.ColourTo(i
- 1, state
);
785 // Mark "::" as an operator, not symbol start
786 styler
.ColourTo(i
+ 1, SCE_RB_OPERATOR
);
787 advance_char(i
, ch
, chNext
, chNext2
); // pass by ref
788 state
= SCE_RB_DEFAULT
;
790 } else if (isSafeWordcharOrHigh(chNext
)) {
791 state
= SCE_RB_SYMBOL
;
792 } else if (strchr("[*!~+-*/%=<>&^|", chNext
)) {
793 // Do the operator analysis in-line, looking ahead
794 // Based on the table in pickaxe 2nd ed., page 339
795 bool doColoring
= true;
798 if (chNext2
== ']' ) {
799 char ch_tmp
= styler
.SafeGetCharAt(i
+ 3);
803 chNext
= styler
.SafeGetCharAt(i
+ 1);
815 if (chNext2
== '*') {
818 chNext
= styler
.SafeGetCharAt(i
+ 1);
820 advance_char(i
, ch
, chNext
, chNext2
);
825 if (chNext2
== '=' || chNext2
== '~') {
828 chNext
= styler
.SafeGetCharAt(i
+ 1);
830 advance_char(i
, ch
, chNext
, chNext2
);
835 if (chNext2
== '<') {
838 chNext
= styler
.SafeGetCharAt(i
+ 1);
839 } else if (chNext2
== '=') {
840 char ch_tmp
= styler
.SafeGetCharAt(i
+ 3);
841 if (ch_tmp
== '>') { // <=> operator
844 chNext
= styler
.SafeGetCharAt(i
+ 1);
851 advance_char(i
, ch
, chNext
, chNext2
);
856 // Simple one-character operators
857 advance_char(i
, ch
, chNext
, chNext2
);
861 styler
.ColourTo(i
, SCE_RB_SYMBOL
);
862 state
= SCE_RB_DEFAULT
;
864 } else if (!preferRE
) {
865 // Don't color symbol strings (yet)
866 // Just color the ":" and color rest as string
867 styler
.ColourTo(i
, SCE_RB_SYMBOL
);
868 state
= SCE_RB_DEFAULT
;
870 styler
.ColourTo(i
, SCE_RB_OPERATOR
);
871 state
= SCE_RB_DEFAULT
;
874 } else if (ch
== '%') {
875 styler
.ColourTo(i
- 1, state
);
876 bool have_string
= false;
877 if (strchr(q_chars
, chNext
) && !isSafeWordcharOrHigh(chNext2
)) {
879 const char *hit
= strchr(q_chars
, chNext
);
881 state
= q_states
[hit
- q_chars
];
885 chNext
= styler
.SafeGetCharAt(i
+ 1);
888 } else if (!isSafeWordcharOrHigh(chNext
)) {
889 // Ruby doesn't allow high bit chars here,
890 // but the editor host might
891 state
= SCE_RB_STRING_QQ
;
893 advance_char(i
, ch
, chNext
, chNext2
); // pass by ref
897 styler
.ColourTo(i
, SCE_RB_OPERATOR
);
901 } else if (isoperator(ch
) || ch
== '.') {
902 styler
.ColourTo(i
- 1, state
);
903 styler
.ColourTo(i
, SCE_RB_OPERATOR
);
904 // If we're ending an expression or block,
905 // assume it ends an object, and the ambivalent
906 // constructs are binary operators
908 // So if we don't have one of these chars,
909 // we aren't ending an object exp'n, and ops
910 // like : << / are unary operators.
912 preferRE
= (strchr(")}].", ch
) == NULL
);
913 // Stay in default state
914 } else if (isEOLChar(ch
)) {
915 // Make sure it's a true line-end, with no backslash
916 if ((ch
== '\r' || (ch
== '\n' && chPrev
!= '\r'))
918 // Assume we've hit the end of the statement.
922 } else if (state
== SCE_RB_WORD
) {
923 if (ch
== '.' || !isSafeWordcharOrHigh(ch
)) {
924 // Words include x? in all contexts,
925 // and <letters>= after either 'def' or a dot
926 // Move along until a complete word is on our left
928 // Default accessor treats '.' as word-chars,
929 // but we don't for now.
932 && isSafeWordcharOrHigh(chPrev
)
934 || strchr(" \t\n\r", chNext
) != NULL
)
935 && (!strcmp(prevWord
, "def")
936 || followsDot(styler
.GetStartSegment(), styler
))) {
937 // <name>= is a name only when being def'd -- Get it the next time
938 // This means that <name>=<name> is always lexed as
939 // <name>, (op, =), <name>
940 } else if ((ch
== '?' || ch
== '!')
941 && isSafeWordcharOrHigh(chPrev
)
942 && !isSafeWordcharOrHigh(chNext
)) {
943 // <name>? is a name -- Get it the next time
944 // But <name>?<name> is always lexed as
945 // <name>, (op, ?), <name>
946 // Same with <name>! to indicate a method that
947 // modifies its target
948 } else if (isEOLChar(ch
)
949 && isMatch(styler
, lengthDoc
, i
- 7, "__END__")) {
950 styler
.ColourTo(i
, SCE_RB_DATASECTION
);
951 state
= SCE_RB_DATASECTION
;
952 // No need to handle this state -- we'll just move to the end
955 int wordStartPos
= styler
.GetStartSegment();
956 int word_style
= ClassifyWordRb(wordStartPos
, i
- 1, keywords
, styler
, prevWord
);
957 switch (word_style
) {
959 preferRE
= RE_CanFollowKeyword(prevWord
);
962 case SCE_RB_WORD_DEMOTED
:
966 case SCE_RB_IDENTIFIER
:
967 if (isMatch(styler
, lengthDoc
, wordStartPos
, "print")) {
969 } else if (isEOLChar(ch
)) {
979 // We might be redefining an operator-method
982 // And if it's the first
983 redo_char(i
, ch
, chNext
, chNext2
, state
); // pass by ref
986 } else if (state
== SCE_RB_NUMBER
) {
987 if (isSafeAlnumOrHigh(ch
) || ch
== '_') {
989 } else if (ch
== '.' && ++numDots
== 1) {
992 styler
.ColourTo(i
- 1, state
);
993 redo_char(i
, ch
, chNext
, chNext2
, state
); // pass by ref
996 } else if (state
== SCE_RB_COMMENTLINE
) {
998 styler
.ColourTo(i
- 1, state
);
999 state
= SCE_RB_DEFAULT
;
1000 // Use whatever setting we had going into the comment
1002 } else if (state
== SCE_RB_HERE_DELIM
) {
1003 // See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx
1004 // Slightly different: if we find an immediate '-',
1005 // the target can appear indented.
1007 if (HereDoc
.State
== 0) { // '<<' encountered
1009 HereDoc
.DelimiterLength
= 0;
1011 HereDoc
.CanBeIndented
= true;
1012 advance_char(i
, ch
, chNext
, chNext2
); // pass by ref
1014 HereDoc
.CanBeIndented
= false;
1016 if (isEOLChar(ch
)) {
1017 // Bail out of doing a here doc if there's no target
1018 state
= SCE_RB_DEFAULT
;
1023 if (ch
== '\'' || ch
== '"' || ch
== '`') {
1024 HereDoc
.Quoted
= true;
1025 HereDoc
.Delimiter
[0] = '\0';
1027 HereDoc
.Quoted
= false;
1028 HereDoc
.Delimiter
[0] = ch
;
1029 HereDoc
.Delimiter
[1] = '\0';
1030 HereDoc
.DelimiterLength
= 1;
1033 } else if (HereDoc
.State
== 1) { // collect the delimiter
1034 if (isEOLChar(ch
)) {
1035 // End the quote now, and go back for more
1036 styler
.ColourTo(i
- 1, state
);
1037 state
= SCE_RB_DEFAULT
;
1042 } else if (HereDoc
.Quoted
) {
1043 if (ch
== HereDoc
.Quote
) { // closing quote => end of delimiter
1044 styler
.ColourTo(i
, state
);
1045 state
= SCE_RB_DEFAULT
;
1048 if (ch
== '\\' && !isEOLChar(chNext
)) {
1049 advance_char(i
, ch
, chNext
, chNext2
);
1051 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
++] = ch
;
1052 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
1054 } else { // an unquoted here-doc delimiter
1055 if (isSafeAlnumOrHigh(ch
) || ch
== '_') {
1056 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
++] = ch
;
1057 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
1059 styler
.ColourTo(i
- 1, state
);
1060 redo_char(i
, ch
, chNext
, chNext2
, state
);
1064 if (HereDoc
.DelimiterLength
>= static_cast<int>(sizeof(HereDoc
.Delimiter
)) - 1) {
1065 styler
.ColourTo(i
- 1, state
);
1066 state
= SCE_RB_ERROR
;
1070 } else if (state
== SCE_RB_HERE_Q
) {
1071 // Not needed: HereDoc.State == 2
1072 // Indentable here docs: look backwards
1073 // Non-indentable: look forwards, like in Perl
1075 // Why: so we can quickly resolve things like <<-" abc"
1077 if (!HereDoc
.CanBeIndented
) {
1078 if (isEOLChar(chPrev
)
1079 && isMatch(styler
, lengthDoc
, i
, HereDoc
.Delimiter
)) {
1080 styler
.ColourTo(i
- 1, state
);
1081 i
+= HereDoc
.DelimiterLength
- 1;
1082 chNext
= styler
.SafeGetCharAt(i
+ 1);
1083 if (isEOLChar(chNext
)) {
1084 styler
.ColourTo(i
, SCE_RB_HERE_DELIM
);
1085 state
= SCE_RB_DEFAULT
;
1089 // Otherwise we skipped through the here doc faster.
1091 } else if (isEOLChar(chNext
)
1092 && lookingAtHereDocDelim(styler
,
1093 i
- HereDoc
.DelimiterLength
+ 1,
1095 HereDoc
.Delimiter
)) {
1096 styler
.ColourTo(i
- 1 - HereDoc
.DelimiterLength
, state
);
1097 styler
.ColourTo(i
, SCE_RB_HERE_DELIM
);
1098 state
= SCE_RB_DEFAULT
;
1102 } else if (state
== SCE_RB_CLASS_VAR
1103 || state
== SCE_RB_INSTANCE_VAR
1104 || state
== SCE_RB_SYMBOL
) {
1105 if (!isSafeWordcharOrHigh(ch
)) {
1106 styler
.ColourTo(i
- 1, state
);
1107 redo_char(i
, ch
, chNext
, chNext2
, state
); // pass by ref
1110 } else if (state
== SCE_RB_GLOBAL
) {
1111 if (!isSafeWordcharOrHigh(ch
)) {
1112 // handle special globals here as well
1113 if (chPrev
== '$') {
1115 // Include the next char, like $-a
1116 advance_char(i
, ch
, chNext
, chNext2
);
1118 styler
.ColourTo(i
, state
);
1119 state
= SCE_RB_DEFAULT
;
1121 styler
.ColourTo(i
- 1, state
);
1122 redo_char(i
, ch
, chNext
, chNext2
, state
); // pass by ref
1126 } else if (state
== SCE_RB_POD
) {
1127 // PODs end with ^=end\s, -- any whitespace can follow =end
1128 if (strchr(" \t\n\r", ch
) != NULL
1130 && isEOLChar(styler
[i
- 5])
1131 && isMatch(styler
, lengthDoc
, i
- 4, "=end")) {
1132 styler
.ColourTo(i
- 1, state
);
1133 state
= SCE_RB_DEFAULT
;
1136 } else if (state
== SCE_RB_REGEX
|| state
== SCE_RB_STRING_QR
) {
1137 if (ch
== '\\' && Quote
.Up
!= '\\') {
1139 advance_char(i
, ch
, chNext
, chNext2
);
1140 } else if (ch
== Quote
.Down
) {
1142 if (Quote
.Count
== 0) {
1143 // Include the options
1144 while (isSafeAlpha(chNext
)) {
1147 chNext
= styler
.SafeGetCharAt(i
+ 1);
1149 styler
.ColourTo(i
, state
);
1150 state
= SCE_RB_DEFAULT
;
1153 } else if (ch
== Quote
.Up
) {
1154 // Only if close quoter != open quoter
1157 } else if (ch
== '#' ) {
1158 //todo: distinguish comments from pound chars
1159 // for now, handle as comment
1160 styler
.ColourTo(i
- 1, state
);
1161 bool inEscape
= false;
1162 while (++i
< lengthDoc
) {
1163 ch
= styler
.SafeGetCharAt(i
);
1166 } else if (isEOLChar(ch
)) {
1167 // Comment inside a regex
1168 styler
.ColourTo(i
- 1, SCE_RB_COMMENTLINE
);
1170 } else if (inEscape
) {
1171 inEscape
= false; // don't look at char
1172 } else if (ch
== Quote
.Down
) {
1173 // Have the regular handler deal with this
1174 // to get trailing modifiers.
1180 chNext
= styler
.SafeGetCharAt(i
+ 1);
1181 chNext2
= styler
.SafeGetCharAt(i
+ 2);
1183 // Quotes of all kinds...
1184 } else if (state
== SCE_RB_STRING_Q
|| state
== SCE_RB_STRING_QQ
||
1185 state
== SCE_RB_STRING_QX
|| state
== SCE_RB_STRING_QW
||
1186 state
== SCE_RB_STRING
|| state
== SCE_RB_CHARACTER
||
1187 state
== SCE_RB_BACKTICKS
) {
1188 if (!Quote
.Down
&& !isspacechar(ch
)) {
1190 } else if (ch
== '\\' && Quote
.Up
!= '\\') {
1191 //Riddle me this: Is it safe to skip *every* escaped char?
1192 advance_char(i
, ch
, chNext
, chNext2
);
1193 } else if (ch
== Quote
.Down
) {
1195 if (Quote
.Count
== 0) {
1196 styler
.ColourTo(i
, state
);
1197 state
= SCE_RB_DEFAULT
;
1200 } else if (ch
== Quote
.Up
) {
1205 if (state
== SCE_RB_ERROR
) {
1210 if (state
== SCE_RB_WORD
) {
1211 // We've ended on a word, possibly at EOF, and need to
1213 (void) ClassifyWordRb(styler
.GetStartSegment(), lengthDoc
- 1, keywords
, styler
, prevWord
);
1215 styler
.ColourTo(lengthDoc
- 1, state
);
1219 // Helper functions for folding, disambiguation keywords
1220 // Assert that there are no high-bit chars
1222 static void getPrevWord(int pos
,
1229 for (i
= pos
- 1; i
> 0; i
--) {
1230 if (actual_style(styler
.StyleAt(i
)) != word_state
) {
1235 if (i
< pos
- MAX_KEYWORD_LENGTH
) // overflow
1236 i
= pos
- MAX_KEYWORD_LENGTH
;
1237 char *dst
= prevWord
;
1238 for (; i
<= pos
; i
++) {
1244 static bool keywordIsAmbiguous(const char *prevWord
)
1246 // Order from most likely used to least likely
1247 // Lots of ways to do a loop in Ruby besides 'while/until'
1248 if (!strcmp(prevWord
, "if")
1249 || !strcmp(prevWord
, "do")
1250 || !strcmp(prevWord
, "while")
1251 || !strcmp(prevWord
, "unless")
1252 || !strcmp(prevWord
, "until")) {
1259 // Demote keywords in the following conditions:
1260 // if, while, unless, until modify a statement
1261 // do after a while or until, as a noise word (like then after if)
1263 static bool keywordIsModifier(const char *word
,
1267 if (word
[0] == 'd' && word
[1] == 'o' && !word
[2]) {
1268 return keywordDoStartsLoop(pos
, styler
);
1271 int style
= SCE_RB_DEFAULT
;
1272 int lineStart
= styler
.GetLine(pos
);
1273 int lineStartPosn
= styler
.LineStart(lineStart
);
1275 while (--pos
>= lineStartPosn
) {
1276 style
= actual_style(styler
.StyleAt(pos
));
1277 if (style
== SCE_RB_DEFAULT
) {
1278 if (iswhitespace(ch
= styler
[pos
])) {
1280 } else if (ch
== '\r' || ch
== '\n') {
1281 // Scintilla's LineStart() and GetLine() routines aren't
1282 // platform-independent, so if we have text prepared with
1283 // a different system we can't rely on it.
1290 if (pos
< lineStartPosn
) {
1291 return false; //XXX not quite right if the prev line is a continuation
1293 // First things where the action is unambiguous
1295 case SCE_RB_DEFAULT
:
1296 case SCE_RB_COMMENTLINE
:
1298 case SCE_RB_CLASSNAME
:
1299 case SCE_RB_DEFNAME
:
1300 case SCE_RB_MODULE_NAME
:
1302 case SCE_RB_OPERATOR
:
1305 // Watch out for uses of 'else if'
1306 //XXX: Make a list of other keywords where 'if' isn't a modifier
1307 // and can appear legitimately
1308 // Formulate this to avoid warnings from most compilers
1309 if (strcmp(word
, "if") == 0) {
1310 char prevWord
[MAX_KEYWORD_LENGTH
+ 1];
1311 getPrevWord(pos
, prevWord
, styler
, SCE_RB_WORD
);
1312 return strcmp(prevWord
, "else") != 0;
1318 // Assume that if the keyword follows an operator,
1319 // usually it's a block assignment, like
1320 // a << if x then y else z
1333 #define WHILE_BACKWARDS "elihw"
1334 #define UNTIL_BACKWARDS "litnu"
1336 // Nothing fancy -- look to see if we follow a while/until somewhere
1337 // on the current line
1339 static bool keywordDoStartsLoop(int pos
,
1344 int lineStart
= styler
.GetLine(pos
);
1345 int lineStartPosn
= styler
.LineStart(lineStart
);
1347 while (--pos
>= lineStartPosn
) {
1348 style
= actual_style(styler
.StyleAt(pos
));
1349 if (style
== SCE_RB_DEFAULT
) {
1350 if ((ch
= styler
[pos
]) == '\r' || ch
== '\n') {
1351 // Scintilla's LineStart() and GetLine() routines aren't
1352 // platform-independent, so if we have text prepared with
1353 // a different system we can't rely on it.
1356 } else if (style
== SCE_RB_WORD
) {
1357 // Check for while or until, but write the word in backwards
1358 char prevWord
[MAX_KEYWORD_LENGTH
+ 1]; // 1 byte for zero
1359 char *dst
= prevWord
;
1362 for (start_word
= pos
;
1363 start_word
>= lineStartPosn
&& actual_style(styler
.StyleAt(start_word
)) == SCE_RB_WORD
;
1365 if (++wordLen
< MAX_KEYWORD_LENGTH
) {
1366 *dst
++ = styler
[start_word
];
1370 // Did we see our keyword?
1371 if (!strcmp(prevWord
, WHILE_BACKWARDS
)
1372 || !strcmp(prevWord
, UNTIL_BACKWARDS
)) {
1375 // We can move pos to the beginning of the keyword, and then
1376 // accept another decrement, as we can never have two contiguous
1380 // <- move to start_word
1382 // <- loop decrement
1383 // ^ # pointing to end of word1 is fine
1393 * The language is quite complex to analyze without a full parse.
1394 * For example, this line shouldn't affect fold level:
1396 * print "hello" if feeling_friendly?
1398 * Neither should this:
1401 * if feeling_friendly?
1406 * if feeling_friendly? #++
1411 * So we cheat, by actually looking at the existing indentation
1412 * levels for each line, and just echoing it back. Like Python.
1413 * Then if we get better at it, we'll take braces into consideration,
1414 * which always affect folding levels.
1416 * How the keywords should work:
1418 * __FILE__ __LINE__ BEGIN END alias and
1419 * defined? false in nil not or self super then
1423 * begin class def do for module when {
1428 * Increment if these start a statement
1429 * if unless until while -- do nothing if they're modifiers
1431 * These end a block if there's no modifier, but don't bother
1432 * break next redo retry return yield
1434 * These temporarily de-indent, but re-indent
1435 * case else elsif ensure rescue
1437 * This means that the folder reflects indentation rather
1438 * than setting it. The language-service updates indentation
1439 * when users type return and finishes entering de-denters.
1441 * Later offer to fold POD, here-docs, strings, and blocks of comments
1444 static void FoldRbDoc(unsigned int startPos
, int length
, int initStyle
,
1445 WordList
*[], Accessor
&styler
) {
1446 const bool foldCompact
= styler
.GetPropertyInt("fold.compact", 1) != 0;
1447 bool foldComment
= styler
.GetPropertyInt("fold.comment") != 0;
1449 synchronizeDocStart(startPos
, length
, initStyle
, styler
, // ref args
1451 unsigned int endPos
= startPos
+ length
;
1452 int visibleChars
= 0;
1453 int lineCurrent
= styler
.GetLine(startPos
);
1454 int levelPrev
= startPos
== 0 ? 0 : (styler
.LevelAt(lineCurrent
)
1455 & SC_FOLDLEVELNUMBERMASK
1456 & ~SC_FOLDLEVELBASE
);
1457 int levelCurrent
= levelPrev
;
1458 char chNext
= styler
[startPos
];
1459 int styleNext
= styler
.StyleAt(startPos
);
1460 int stylePrev
= startPos
<= 1 ? SCE_RB_DEFAULT
: styler
.StyleAt(startPos
- 1);
1461 bool buffer_ends_with_eol
= false;
1462 for (unsigned int i
= startPos
; i
< endPos
; i
++) {
1464 chNext
= styler
.SafeGetCharAt(i
+ 1);
1465 int style
= styleNext
;
1466 styleNext
= styler
.StyleAt(i
+ 1);
1467 bool atEOL
= (ch
== '\r' && chNext
!= '\n') || (ch
== '\n');
1468 if (style
== SCE_RB_COMMENTLINE
) {
1469 if (foldComment
&& stylePrev
!= SCE_RB_COMMENTLINE
) {
1470 if (chNext
== '{') {
1472 } else if (chNext
== '}') {
1476 } else if (style
== SCE_RB_OPERATOR
) {
1477 if (strchr("[{(", ch
)) {
1479 } else if (strchr(")}]", ch
)) {
1480 // Don't decrement below 0
1481 if (levelCurrent
> 0)
1484 } else if (style
== SCE_RB_WORD
&& styleNext
!= SCE_RB_WORD
) {
1485 // Look at the keyword on the left and decide what to do
1486 char prevWord
[MAX_KEYWORD_LENGTH
+ 1]; // 1 byte for zero
1488 getPrevWord(i
, prevWord
, styler
, SCE_RB_WORD
);
1489 if (!strcmp(prevWord
, "end")) {
1490 // Don't decrement below 0
1491 if (levelCurrent
> 0)
1493 } else if ( !strcmp(prevWord
, "if")
1494 || !strcmp(prevWord
, "def")
1495 || !strcmp(prevWord
, "class")
1496 || !strcmp(prevWord
, "module")
1497 || !strcmp(prevWord
, "begin")
1498 || !strcmp(prevWord
, "case")
1499 || !strcmp(prevWord
, "do")
1500 || !strcmp(prevWord
, "while")
1501 || !strcmp(prevWord
, "unless")
1502 || !strcmp(prevWord
, "until")
1503 || !strcmp(prevWord
, "for")
1509 int lev
= levelPrev
;
1510 if (visibleChars
== 0 && foldCompact
)
1511 lev
|= SC_FOLDLEVELWHITEFLAG
;
1512 if ((levelCurrent
> levelPrev
) && (visibleChars
> 0))
1513 lev
|= SC_FOLDLEVELHEADERFLAG
;
1514 styler
.SetLevel(lineCurrent
, lev
|SC_FOLDLEVELBASE
);
1516 levelPrev
= levelCurrent
;
1518 buffer_ends_with_eol
= true;
1519 } else if (!isspacechar(ch
)) {
1521 buffer_ends_with_eol
= false;
1524 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1525 if (!buffer_ends_with_eol
) {
1527 int new_lev
= levelCurrent
;
1528 if (visibleChars
== 0 && foldCompact
)
1529 new_lev
|= SC_FOLDLEVELWHITEFLAG
;
1530 if ((levelCurrent
> levelPrev
) && (visibleChars
> 0))
1531 new_lev
|= SC_FOLDLEVELHEADERFLAG
;
1532 levelCurrent
= new_lev
;
1534 styler
.SetLevel(lineCurrent
, levelCurrent
|SC_FOLDLEVELBASE
);
1537 static const char * const rubyWordListDesc
[] = {
1542 LexerModule
lmRuby(SCLEX_RUBY
, ColouriseRbDoc
, "ruby", FoldRbDoc
, rubyWordListDesc
);