simplify code so it always returns the same object
[wxWidgets.git] / src / stc / scintilla / lexers / LexRuby.cxx
CommitLineData
65ec6247
RD
1// Scintilla source code edit control
2/** @file LexRuby.cxx
3 ** Lexer for Ruby.
4 **/
5// Copyright 2001- by Clemens Wyss <wys@helbling.ch>
6// The License.txt file describes the conditions under which this software may be distributed.
7
8#include <stdlib.h>
9#include <string.h>
65ec6247
RD
10#include <stdio.h>
11#include <stdarg.h>
1dcf666d
RD
12#include <assert.h>
13#include <ctype.h>
65ec6247 14
1dcf666d 15#include "ILexer.h"
65ec6247
RD
16#include "Scintilla.h"
17#include "SciLexer.h"
18
1dcf666d
RD
19#include "WordList.h"
20#include "LexAccessor.h"
21#include "Accessor.h"
22#include "StyleContext.h"
23#include "CharacterSet.h"
24#include "LexerModule.h"
25
1e9bafca
RD
26#ifdef SCI_NAMESPACE
27using namespace Scintilla;
28#endif
29
30//XXX Identical to Perl, put in common area
31static inline bool isEOLChar(char ch) {
32 return (ch == '\r') || (ch == '\n');
33}
34
35#define isSafeASCII(ch) ((unsigned int)(ch) <= 127)
36// This one's redundant, but makes for more readable code
37#define isHighBitChar(ch) ((unsigned int)(ch) > 127)
38
39static inline bool isSafeAlpha(char ch) {
40 return (isSafeASCII(ch) && isalpha(ch)) || ch == '_';
41}
42
43static inline bool isSafeAlnum(char ch) {
44 return (isSafeASCII(ch) && isalnum(ch)) || ch == '_';
45}
46
47static inline bool isSafeAlnumOrHigh(char ch) {
48 return isHighBitChar(ch) || isalnum(ch) || ch == '_';
49}
50
51static inline bool isSafeDigit(char ch) {
52 return isSafeASCII(ch) && isdigit(ch);
53}
54
55static inline bool isSafeWordcharOrHigh(char ch) {
7e0c58e9
RD
56 // Error: scintilla's KeyWords.h includes '.' as a word-char
57 // we want to separate things that can take methods from the
58 // methods.
59 return isHighBitChar(ch) || isalnum(ch) || ch == '_';
1e9bafca
RD
60}
61
62static bool inline iswhitespace(char ch) {
63 return ch == ' ' || ch == '\t';
64}
65
66#define MAX_KEYWORD_LENGTH 200
67
68#define STYLE_MASK 63
69#define actual_style(style) (style & STYLE_MASK)
70
71static bool followsDot(unsigned int pos, Accessor &styler) {
72 styler.Flush();
73 for (; pos >= 1; --pos) {
74 int style = actual_style(styler.StyleAt(pos));
75 char ch;
76 switch (style) {
77 case SCE_RB_DEFAULT:
78 ch = styler[pos];
79 if (ch == ' ' || ch == '\t') {
80 //continue
81 } else {
82 return false;
83 }
84 break;
1dcf666d 85
1e9bafca
RD
86 case SCE_RB_OPERATOR:
87 return styler[pos] == '.';
88
89 default:
90 return false;
91 }
92 }
93 return false;
94}
95
96// Forward declarations
97static bool keywordIsAmbiguous(const char *prevWord);
98static bool keywordDoStartsLoop(int pos,
99 Accessor &styler);
100static bool keywordIsModifier(const char *word,
101 int pos,
102 Accessor &styler);
103
104static int ClassifyWordRb(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord) {
7e0c58e9 105 char s[MAX_KEYWORD_LENGTH];
1e9bafca
RD
106 unsigned int i, j;
107 unsigned int lim = end - start + 1; // num chars to copy
108 if (lim >= MAX_KEYWORD_LENGTH) {
109 lim = MAX_KEYWORD_LENGTH - 1;
110 }
111 for (i = start, j = 0; j < lim; i++, j++) {
112 s[j] = styler[i];
65ec6247 113 }
1e9bafca
RD
114 s[j] = '\0';
115 int chAttr;
65ec6247 116 if (0 == strcmp(prevWord, "class"))
1e9bafca 117 chAttr = SCE_RB_CLASSNAME;
65ec6247 118 else if (0 == strcmp(prevWord, "module"))
1e9bafca 119 chAttr = SCE_RB_MODULE_NAME;
65ec6247 120 else if (0 == strcmp(prevWord, "def"))
1e9bafca
RD
121 chAttr = SCE_RB_DEFNAME;
122 else if (keywords.InList(s) && !followsDot(start - 1, styler)) {
123 if (keywordIsAmbiguous(s)
124 && keywordIsModifier(s, start, styler)) {
1dcf666d 125
1e9bafca
RD
126 // Demoted keywords are colored as keywords,
127 // but do not affect changes in indentation.
128 //
129 // Consider the word 'if':
130 // 1. <<if test ...>> : normal
131 // 2. <<stmt if test>> : demoted
132 // 3. <<lhs = if ...>> : normal: start a new indent level
133 // 4. <<obj.if = 10>> : color as identifer, since it follows '.'
1dcf666d 134
1e9bafca
RD
135 chAttr = SCE_RB_WORD_DEMOTED;
136 } else {
137 chAttr = SCE_RB_WORD;
138 }
139 } else
140 chAttr = SCE_RB_IDENTIFIER;
141 styler.ColourTo(end, chAttr);
142 if (chAttr == SCE_RB_WORD) {
143 strcpy(prevWord, s);
144 } else {
145 prevWord[0] = 0;
146 }
147 return chAttr;
148}
149
150
151//XXX Identical to Perl, put in common area
152static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
153 if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
154 return false;
155 }
156 while (*val) {
157 if (*val != styler[pos++]) {
158 return false;
65ec6247 159 }
1e9bafca 160 val++;
65ec6247 161 }
1e9bafca 162 return true;
65ec6247
RD
163}
164
1e9bafca
RD
165// Do Ruby better -- find the end of the line, work back,
166// and then check for leading white space
167
168// Precondition: the here-doc target can be indented
169static bool lookingAtHereDocDelim(Accessor &styler,
170 int pos,
171 int lengthDoc,
172 const char *HereDocDelim)
173{
174 if (!isMatch(styler, lengthDoc, pos, HereDocDelim)) {
175 return false;
176 }
177 while (--pos > 0) {
178 char ch = styler[pos];
179 if (isEOLChar(ch)) {
180 return true;
181 } else if (ch != ' ' && ch != '\t') {
182 return false;
183 }
184 }
185 return false;
65ec6247
RD
186}
187
1e9bafca
RD
188//XXX Identical to Perl, put in common area
189static char opposite(char ch) {
190 if (ch == '(')
191 return ')';
192 if (ch == '[')
193 return ']';
194 if (ch == '{')
195 return '}';
196 if (ch == '<')
197 return '>';
198 return ch;
199}
200
201// Null transitions when we see we've reached the end
202// and need to relex the curr char.
65ec6247 203
1e9bafca
RD
204static void redo_char(int &i, char &ch, char &chNext, char &chNext2,
205 int &state) {
206 i--;
207 chNext2 = chNext;
208 chNext = ch;
209 state = SCE_RB_DEFAULT;
65ec6247
RD
210}
211
1e9bafca
RD
212static void advance_char(int &i, char &ch, char &chNext, char &chNext2) {
213 i++;
214 ch = chNext;
215 chNext = chNext2;
65ec6247
RD
216}
217
1e9bafca
RD
218// precondition: startPos points to one after the EOL char
219static bool currLineContainsHereDelims(int& startPos,
220 Accessor &styler) {
221 if (startPos <= 1)
222 return false;
65ec6247 223
1e9bafca
RD
224 int pos;
225 for (pos = startPos - 1; pos > 0; pos--) {
226 char ch = styler.SafeGetCharAt(pos);
227 if (isEOLChar(ch)) {
228 // Leave the pointers where they are -- there are no
229 // here doc delims on the current line, even if
230 // the EOL isn't default style
1dcf666d 231
1e9bafca
RD
232 return false;
233 } else {
234 styler.Flush();
235 if (actual_style(styler.StyleAt(pos)) == SCE_RB_HERE_DELIM) {
236 break;
237 }
238 }
239 }
240 if (pos == 0) {
241 return false;
242 }
243 // Update the pointers so we don't have to re-analyze the string
244 startPos = pos;
245 return true;
246}
65ec6247 247
7e0c58e9
RD
248// This class is used by the enter and exit methods, so it needs
249// to be hoisted out of the function.
250
251class QuoteCls {
252 public:
253 int Count;
254 char Up;
255 char Down;
256 QuoteCls() {
257 this->New();
258 }
259 void New() {
260 Count = 0;
261 Up = '\0';
262 Down = '\0';
263 }
264 void Open(char u) {
265 Count++;
266 Up = u;
267 Down = opposite(Up);
268 }
269 QuoteCls(const QuoteCls& q) {
270 // copy constructor -- use this for copying in
271 Count = q.Count;
272 Up = q.Up;
273 Down = q.Down;
274 }
275 QuoteCls& operator=(const QuoteCls& q) { // assignment constructor
276 if (this != &q) {
277 Count = q.Count;
278 Up = q.Up;
279 Down = q.Down;
280 }
281 return *this;
282 }
1dcf666d 283
7e0c58e9
RD
284};
285
286
287static void enterInnerExpression(int *p_inner_string_types,
288 int *p_inner_expn_brace_counts,
289 QuoteCls *p_inner_quotes,
290 int& inner_string_count,
291 int& state,
292 int& brace_counts,
293 QuoteCls curr_quote
294 ) {
295 p_inner_string_types[inner_string_count] = state;
296 state = SCE_RB_DEFAULT;
297 p_inner_expn_brace_counts[inner_string_count] = brace_counts;
298 brace_counts = 0;
299 p_inner_quotes[inner_string_count] = curr_quote;
300 ++inner_string_count;
301}
302
303static void exitInnerExpression(int *p_inner_string_types,
304 int *p_inner_expn_brace_counts,
305 QuoteCls *p_inner_quotes,
306 int& inner_string_count,
307 int& state,
308 int& brace_counts,
309 QuoteCls& curr_quote
310 ) {
311 --inner_string_count;
312 state = p_inner_string_types[inner_string_count];
313 brace_counts = p_inner_expn_brace_counts[inner_string_count];
314 curr_quote = p_inner_quotes[inner_string_count];
315}
65ec6247 316
1e9bafca
RD
317static bool isEmptyLine(int pos,
318 Accessor &styler) {
319 int spaceFlags = 0;
320 int lineCurrent = styler.GetLine(pos);
321 int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
322 return (indentCurrent & SC_FOLDLEVELWHITEFLAG) != 0;
323}
8e54aaed 324
1e9bafca
RD
325static bool RE_CanFollowKeyword(const char *keyword) {
326 if (!strcmp(keyword, "and")
327 || !strcmp(keyword, "begin")
328 || !strcmp(keyword, "break")
329 || !strcmp(keyword, "case")
330 || !strcmp(keyword, "do")
331 || !strcmp(keyword, "else")
332 || !strcmp(keyword, "elsif")
333 || !strcmp(keyword, "if")
334 || !strcmp(keyword, "next")
335 || !strcmp(keyword, "return")
336 || !strcmp(keyword, "when")
337 || !strcmp(keyword, "unless")
338 || !strcmp(keyword, "until")
339 || !strcmp(keyword, "not")
340 || !strcmp(keyword, "or")) {
341 return true;
342 }
343 return false;
344}
65ec6247 345
1e9bafca
RD
346// Look at chars up to but not including endPos
347// Don't look at styles in case we're looking forward
65ec6247 348
1e9bafca
RD
349static int skipWhitespace(int startPos,
350 int endPos,
351 Accessor &styler) {
352 for (int i = startPos; i < endPos; i++) {
353 if (!iswhitespace(styler[i])) {
354 return i;
355 }
356 }
357 return endPos;
358}
1dcf666d 359
1e9bafca
RD
360// This routine looks for false positives like
361// undef foo, <<
362// There aren't too many.
363//
364// iPrev points to the start of <<
365
7e0c58e9 366static bool sureThisIsHeredoc(int iPrev,
1e9bafca
RD
367 Accessor &styler,
368 char *prevWord) {
1dcf666d 369
1e9bafca
RD
370 // Not so fast, since Ruby's so dynamic. Check the context
371 // to make sure we're OK.
372 int prevStyle;
373 int lineStart = styler.GetLine(iPrev);
374 int lineStartPosn = styler.LineStart(lineStart);
375 styler.Flush();
376
377 // Find the first word after some whitespace
378 int firstWordPosn = skipWhitespace(lineStartPosn, iPrev, styler);
379 if (firstWordPosn >= iPrev) {
380 // Have something like {^ <<}
381 //XXX Look at the first previous non-comment non-white line
382 // to establish the context. Not too likely though.
383 return true;
384 } else {
385 switch (prevStyle = styler.StyleAt(firstWordPosn)) {
386 case SCE_RB_WORD:
387 case SCE_RB_WORD_DEMOTED:
388 case SCE_RB_IDENTIFIER:
389 break;
390 default:
391 return true;
392 }
393 }
394 int firstWordEndPosn = firstWordPosn;
395 char *dst = prevWord;
396 for (;;) {
397 if (firstWordEndPosn >= iPrev ||
398 styler.StyleAt(firstWordEndPosn) != prevStyle) {
399 *dst = 0;
400 break;
401 }
402 *dst++ = styler[firstWordEndPosn];
403 firstWordEndPosn += 1;
404 }
405 //XXX Write a style-aware thing to regex scintilla buffer objects
406 if (!strcmp(prevWord, "undef")
407 || !strcmp(prevWord, "def")
408 || !strcmp(prevWord, "alias")) {
409 // These keywords are what we were looking for
410 return false;
411 }
412 return true;
413}
414
415// Routine that saves us from allocating a buffer for the here-doc target
416// targetEndPos points one past the end of the current target
417static bool haveTargetMatch(int currPos,
418 int lengthDoc,
419 int targetStartPos,
420 int targetEndPos,
421 Accessor &styler) {
422 if (lengthDoc - currPos < targetEndPos - targetStartPos) {
423 return false;
424 }
425 int i, j;
426 for (i = targetStartPos, j = currPos;
427 i < targetEndPos && j < lengthDoc;
428 i++, j++) {
429 if (styler[i] != styler[j]) {
430 return false;
431 }
432 }
433 return true;
434}
435
436// We need a check because the form
437// [identifier] <<[target]
438// is ambiguous. The Ruby lexer/parser resolves it by
439// looking to see if [identifier] names a variable or a
440// function. If it's the first, it's the start of a here-doc.
441// If it's a var, it's an operator. This lexer doesn't
442// maintain a symbol table, so it looks ahead to see what's
443// going on, in cases where we have
444// ^[white-space]*[identifier([.|::]identifier)*][white-space]*<<[target]
445//
446// If there's no occurrence of [target] on a line, assume we don't.
447
448// return true == yes, we have no heredocs
449
450static bool sureThisIsNotHeredoc(int lt2StartPos,
451 Accessor &styler) {
452 int prevStyle;
453 // Use full document, not just part we're styling
454 int lengthDoc = styler.Length();
455 int lineStart = styler.GetLine(lt2StartPos);
456 int lineStartPosn = styler.LineStart(lineStart);
457 styler.Flush();
458 const bool definitely_not_a_here_doc = true;
459 const bool looks_like_a_here_doc = false;
1dcf666d 460
1e9bafca
RD
461 // Find the first word after some whitespace
462 int firstWordPosn = skipWhitespace(lineStartPosn, lt2StartPos, styler);
463 if (firstWordPosn >= lt2StartPos) {
464 return definitely_not_a_here_doc;
465 }
466 prevStyle = styler.StyleAt(firstWordPosn);
467 // If we have '<<' following a keyword, it's not a heredoc
468 if (prevStyle != SCE_RB_IDENTIFIER) {
469 return definitely_not_a_here_doc;
470 }
471 int newStyle = prevStyle;
472 // Some compilers incorrectly warn about uninit newStyle
473 for (firstWordPosn += 1; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
474 // Inner loop looks at the name
475 for (; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
476 newStyle = styler.StyleAt(firstWordPosn);
477 if (newStyle != prevStyle) {
478 break;
479 }
480 }
481 // Do we have '::' or '.'?
482 if (firstWordPosn < lt2StartPos && newStyle == SCE_RB_OPERATOR) {
483 char ch = styler[firstWordPosn];
484 if (ch == '.') {
485 // yes
486 } else if (ch == ':') {
487 if (styler.StyleAt(++firstWordPosn) != SCE_RB_OPERATOR) {
488 return definitely_not_a_here_doc;
489 } else if (styler[firstWordPosn] != ':') {
490 return definitely_not_a_here_doc;
491 }
492 } else {
493 break;
494 }
495 } else {
496 break;
497 }
498 }
499 // Skip next batch of white-space
500 firstWordPosn = skipWhitespace(firstWordPosn, lt2StartPos, styler);
501 if (firstWordPosn != lt2StartPos) {
502 // Have [[^ws[identifier]ws[*something_else*]ws<<
503 return definitely_not_a_here_doc;
504 }
505 // OK, now 'j' will point to the current spot moving ahead
506 int j = firstWordPosn + 1;
507 if (styler.StyleAt(j) != SCE_RB_OPERATOR || styler[j] != '<') {
508 // This shouldn't happen
509 return definitely_not_a_here_doc;
510 }
511 int nextLineStartPosn = styler.LineStart(lineStart + 1);
512 if (nextLineStartPosn >= lengthDoc) {
513 return definitely_not_a_here_doc;
514 }
515 j = skipWhitespace(j + 1, nextLineStartPosn, styler);
516 if (j >= lengthDoc) {
517 return definitely_not_a_here_doc;
518 }
519 bool allow_indent;
520 int target_start, target_end;
521 // From this point on no more styling, since we're looking ahead
522 if (styler[j] == '-') {
523 allow_indent = true;
524 j++;
525 } else {
526 allow_indent = false;
527 }
528
529 // Allow for quoted targets.
530 char target_quote = 0;
531 switch (styler[j]) {
532 case '\'':
533 case '"':
534 case '`':
535 target_quote = styler[j];
536 j += 1;
537 }
1dcf666d 538
1e9bafca
RD
539 if (isSafeAlnum(styler[j])) {
540 // Init target_end because some compilers think it won't
541 // be initialized by the time it's used
542 target_start = target_end = j;
543 j++;
544 } else {
545 return definitely_not_a_here_doc;
546 }
547 for (; j < lengthDoc; j++) {
548 if (!isSafeAlnum(styler[j])) {
549 if (target_quote && styler[j] != target_quote) {
550 // unquoted end
551 return definitely_not_a_here_doc;
552 }
553
554 // And for now make sure that it's a newline
555 // don't handle arbitrary expressions yet
1dcf666d 556
1e9bafca
RD
557 target_end = j;
558 if (target_quote) {
559 // Now we can move to the character after the string delimiter.
560 j += 1;
561 }
562 j = skipWhitespace(j, lengthDoc, styler);
563 if (j >= lengthDoc) {
564 return definitely_not_a_here_doc;
565 } else {
566 char ch = styler[j];
567 if (ch == '#' || isEOLChar(ch)) {
568 // This is OK, so break and continue;
569 break;
570 } else {
571 return definitely_not_a_here_doc;
572 }
573 }
574 }
575 }
576
577 // Just look at the start of each line
578 int last_line = styler.GetLine(lengthDoc - 1);
579 // But don't go too far
580 if (last_line > lineStart + 50) {
581 last_line = lineStart + 50;
582 }
583 for (int line_num = lineStart + 1; line_num <= last_line; line_num++) {
584 if (allow_indent) {
585 j = skipWhitespace(styler.LineStart(line_num), lengthDoc, styler);
586 } else {
587 j = styler.LineStart(line_num);
588 }
589 // target_end is one past the end
590 if (haveTargetMatch(j, lengthDoc, target_start, target_end, styler)) {
591 // We got it
592 return looks_like_a_here_doc;
593 }
594 }
595 return definitely_not_a_here_doc;
596}
597
598//todo: if we aren't looking at a stdio character,
1dcf666d 599// move to the start of the first line that is not in a
1e9bafca
RD
600// multi-line construct
601
602static void synchronizeDocStart(unsigned int& startPos,
603 int &length,
604 int &initStyle,
605 Accessor &styler,
606 bool skipWhiteSpace=false) {
607
608 styler.Flush();
609 int style = actual_style(styler.StyleAt(startPos));
610 switch (style) {
611 case SCE_RB_STDIN:
612 case SCE_RB_STDOUT:
613 case SCE_RB_STDERR:
614 // Don't do anything else with these.
615 return;
616 }
1dcf666d 617
1e9bafca
RD
618 int pos = startPos;
619 // Quick way to characterize each line
620 int lineStart;
621 for (lineStart = styler.GetLine(pos); lineStart > 0; lineStart--) {
622 // Now look at the style before the previous line's EOL
623 pos = styler.LineStart(lineStart) - 1;
624 if (pos <= 10) {
625 lineStart = 0;
626 break;
627 }
628 char ch = styler.SafeGetCharAt(pos);
629 char chPrev = styler.SafeGetCharAt(pos - 1);
630 if (ch == '\n' && chPrev == '\r') {
631 pos--;
632 }
633 if (styler.SafeGetCharAt(pos - 1) == '\\') {
634 // Continuation line -- keep going
635 } else if (actual_style(styler.StyleAt(pos)) != SCE_RB_DEFAULT) {
636 // Part of multi-line construct -- keep going
637 } else if (currLineContainsHereDelims(pos, styler)) {
638 // Keep going, with pos and length now pointing
639 // at the end of the here-doc delimiter
640 } else if (skipWhiteSpace && isEmptyLine(pos, styler)) {
641 // Keep going
642 } else {
643 break;
644 }
645 }
646 pos = styler.LineStart(lineStart);
647 length += (startPos - pos);
648 startPos = pos;
649 initStyle = SCE_RB_DEFAULT;
65ec6247
RD
650}
651
652static void ColouriseRbDoc(unsigned int startPos, int length, int initStyle,
653 WordList *keywordlists[], Accessor &styler) {
654
1e9bafca
RD
655 // Lexer for Ruby often has to backtrack to start of current style to determine
656 // which characters are being used as quotes, how deeply nested is the
657 // start position and what the termination string is for here documents
1dcf666d 658
1e9bafca 659 WordList &keywords = *keywordlists[0];
65ec6247 660
1e9bafca
RD
661 class HereDocCls {
662 public:
663 int State;
664 // States
665 // 0: '<<' encountered
666 // 1: collect the delimiter
667 // 1b: text between the end of the delimiter and the EOL
668 // 2: here doc text (lines after the delimiter)
669 char Quote; // the char after '<<'
670 bool Quoted; // true if Quote in ('\'','"','`')
671 int DelimiterLength; // strlen(Delimiter)
672 char Delimiter[256]; // the Delimiter, limit of 256: from Perl
673 bool CanBeIndented;
674 HereDocCls() {
675 State = 0;
676 DelimiterLength = 0;
677 Delimiter[0] = '\0';
678 CanBeIndented = false;
65ec6247 679 }
1e9bafca 680 };
1dcf666d 681 HereDocCls HereDoc;
65ec6247 682
1e9bafca 683 QuoteCls Quote;
65ec6247 684
1e9bafca
RD
685 int numDots = 0; // For numbers --
686 // Don't start lexing in the middle of a num
687
688 synchronizeDocStart(startPos, length, initStyle, styler, // ref args
689 false);
65ec6247 690
1e9bafca
RD
691 bool preferRE = true;
692 int state = initStyle;
693 int lengthDoc = startPos + length;
694
695 char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
65ec6247
RD
696 prevWord[0] = '\0';
697 if (length == 0)
1e9bafca 698 return;
65ec6247 699
1e9bafca
RD
700 char chPrev = styler.SafeGetCharAt(startPos - 1);
701 char chNext = styler.SafeGetCharAt(startPos);
7e0c58e9 702 bool is_real_number = true; // Differentiate between constants and ?-sequences.
1e9bafca
RD
703 // Ruby uses a different mask because bad indentation is marked by oring with 32
704 styler.StartAt(startPos, 127);
65ec6247 705 styler.StartSegment(startPos);
65ec6247 706
1e9bafca
RD
707 static int q_states[] = {SCE_RB_STRING_Q,
708 SCE_RB_STRING_QQ,
709 SCE_RB_STRING_QR,
710 SCE_RB_STRING_QW,
711 SCE_RB_STRING_QW,
712 SCE_RB_STRING_QX};
713 static const char* q_chars = "qQrwWx";
7e0c58e9
RD
714
715 // In most cases a value of 2 should be ample for the code in the
716 // Ruby library, and the code the user is likely to enter.
717 // For example,
718 // fu_output_message "mkdir #{options[:mode] ? ('-m %03o ' % options[:mode]) : ''}#{list.join ' '}"
719 // if options[:verbose]
720 // from fileutils.rb nests to a level of 2
721 // If the user actually hits a 6th occurrence of '#{' in a double-quoted
722 // string (including regex'es, %Q, %<sym>, %w, and other strings
723 // that interpolate), it will stay as a string. The problem with this
724 // is that quotes might flip, a 7th '#{' will look like a comment,
725 // and code-folding might be wrong.
726
727 // If anyone runs into this problem, I recommend raising this
728 // value slightly higher to replacing the fixed array with a linked
729 // list. Keep in mind this code will be called everytime the lexer
730 // is invoked.
731
732#define INNER_STRINGS_MAX_COUNT 5
733 // These vars track our instances of "...#{,,,%Q<..#{,,,}...>,,,}..."
734 int inner_string_types[INNER_STRINGS_MAX_COUNT];
735 // Track # braces when we push a new #{ thing
736 int inner_expn_brace_counts[INNER_STRINGS_MAX_COUNT];
737 QuoteCls inner_quotes[INNER_STRINGS_MAX_COUNT];
738 int inner_string_count = 0;
739 int brace_counts = 0; // Number of #{ ... } things within an expression
740
741 int i;
742 for (i = 0; i < INNER_STRINGS_MAX_COUNT; i++) {
743 inner_string_types[i] = 0;
744 inner_expn_brace_counts[i] = 0;
745 }
746 for (i = startPos; i < lengthDoc; i++) {
65ec6247
RD
747 char ch = chNext;
748 chNext = styler.SafeGetCharAt(i + 1);
749 char chNext2 = styler.SafeGetCharAt(i + 2);
750
1e9bafca
RD
751 if (styler.IsLeadByte(ch)) {
752 chNext = chNext2;
65ec6247 753 chPrev = ' ';
65ec6247
RD
754 i += 1;
755 continue;
756 }
1dcf666d 757
1e9bafca
RD
758 // skip on DOS/Windows
759 //No, don't, because some things will get tagged on,
760 // so we won't recognize keywords, for example
761#if 0
762 if (ch == '\r' && chNext == '\n') {
763 continue;
764 }
765#endif
1dcf666d 766
1e9bafca
RD
767 if (HereDoc.State == 1 && isEOLChar(ch)) {
768 // Begin of here-doc (the line after the here-doc delimiter):
769 HereDoc.State = 2;
770 styler.ColourTo(i-1, state);
771 // Don't check for a missing quote, just jump into
772 // the here-doc state
773 state = SCE_RB_HERE_Q;
774 }
65ec6247 775
1e9bafca
RD
776 // Regular transitions
777 if (state == SCE_RB_DEFAULT) {
778 if (isSafeDigit(ch)) {
779 styler.ColourTo(i - 1, state);
780 state = SCE_RB_NUMBER;
7e0c58e9 781 is_real_number = true;
1e9bafca
RD
782 numDots = 0;
783 } else if (isHighBitChar(ch) || iswordstart(ch)) {
784 styler.ColourTo(i - 1, state);
785 state = SCE_RB_WORD;
65ec6247
RD
786 } else if (ch == '#') {
787 styler.ColourTo(i - 1, state);
1e9bafca
RD
788 state = SCE_RB_COMMENTLINE;
789 } else if (ch == '=') {
65ec6247 790 // =begin indicates the start of a comment (doc) block
1dcf666d 791 if ((i == 0 || isEOLChar(chPrev))
1e9bafca
RD
792 && chNext == 'b'
793 && styler.SafeGetCharAt(i + 2) == 'e'
794 && styler.SafeGetCharAt(i + 3) == 'g'
795 && styler.SafeGetCharAt(i + 4) == 'i'
796 && styler.SafeGetCharAt(i + 5) == 'n'
1dcf666d 797 && !isSafeWordcharOrHigh(styler.SafeGetCharAt(i + 6))) {
1e9bafca
RD
798 styler.ColourTo(i - 1, state);
799 state = SCE_RB_POD;
800 } else {
65ec6247 801 styler.ColourTo(i - 1, state);
1e9bafca
RD
802 styler.ColourTo(i, SCE_RB_OPERATOR);
803 preferRE = true;
65ec6247 804 }
1e9bafca 805 } else if (ch == '"') {
65ec6247 806 styler.ColourTo(i - 1, state);
1e9bafca
RD
807 state = SCE_RB_STRING;
808 Quote.New();
809 Quote.Open(ch);
810 } else if (ch == '\'') {
811 styler.ColourTo(i - 1, state);
812 state = SCE_RB_CHARACTER;
813 Quote.New();
814 Quote.Open(ch);
815 } else if (ch == '`') {
65ec6247 816 styler.ColourTo(i - 1, state);
1e9bafca
RD
817 state = SCE_RB_BACKTICKS;
818 Quote.New();
819 Quote.Open(ch);
820 } else if (ch == '@') {
821 // Instance or class var
822 styler.ColourTo(i - 1, state);
823 if (chNext == '@') {
824 state = SCE_RB_CLASS_VAR;
825 advance_char(i, ch, chNext, chNext2); // pass by ref
826 } else {
827 state = SCE_RB_INSTANCE_VAR;
828 }
829 } else if (ch == '$') {
830 // Check for a builtin global
831 styler.ColourTo(i - 1, state);
832 // Recognize it bit by bit
833 state = SCE_RB_GLOBAL;
834 } else if (ch == '/' && preferRE) {
835 // Ambigous operator
836 styler.ColourTo(i - 1, state);
837 state = SCE_RB_REGEX;
838 Quote.New();
839 Quote.Open(ch);
840 } else if (ch == '<' && chNext == '<' && chNext2 != '=') {
841
842 // Recognise the '<<' symbol - either a here document or a binary op
843 styler.ColourTo(i - 1, state);
844 i++;
845 chNext = chNext2;
846 styler.ColourTo(i, SCE_RB_OPERATOR);
847
848 if (! (strchr("\"\'`_-", chNext2) || isSafeAlpha(chNext2))) {
849 // It's definitely not a here-doc,
850 // based on Ruby's lexer/parser in the
851 // heredoc_identifier routine.
852 // Nothing else to do.
853 } else if (preferRE) {
854 if (sureThisIsHeredoc(i - 1, styler, prevWord)) {
855 state = SCE_RB_HERE_DELIM;
856 HereDoc.State = 0;
857 }
858 // else leave it in default state
859 } else {
860 if (sureThisIsNotHeredoc(i - 1, styler)) {
861 // leave state as default
862 // We don't have all the heuristics Perl has for indications
863 // of a here-doc, because '<<' is overloadable and used
864 // for so many other classes.
865 } else {
866 state = SCE_RB_HERE_DELIM;
867 HereDoc.State = 0;
868 }
869 }
870 preferRE = (state != SCE_RB_HERE_DELIM);
871 } else if (ch == ':') {
872 styler.ColourTo(i - 1, state);
873 if (chNext == ':') {
874 // Mark "::" as an operator, not symbol start
875 styler.ColourTo(i + 1, SCE_RB_OPERATOR);
876 advance_char(i, ch, chNext, chNext2); // pass by ref
877 state = SCE_RB_DEFAULT;
878 preferRE = false;
879 } else if (isSafeWordcharOrHigh(chNext)) {
880 state = SCE_RB_SYMBOL;
881 } else if (strchr("[*!~+-*/%=<>&^|", chNext)) {
882 // Do the operator analysis in-line, looking ahead
883 // Based on the table in pickaxe 2nd ed., page 339
884 bool doColoring = true;
885 switch (chNext) {
886 case '[':
887 if (chNext2 == ']' ) {
888 char ch_tmp = styler.SafeGetCharAt(i + 3);
889 if (ch_tmp == '=') {
890 i += 3;
891 ch = ch_tmp;
892 chNext = styler.SafeGetCharAt(i + 1);
893 } else {
894 i += 2;
895 ch = chNext2;
896 chNext = ch_tmp;
897 }
898 } else {
899 doColoring = false;
900 }
901 break;
902
903 case '*':
904 if (chNext2 == '*') {
905 i += 2;
906 ch = chNext2;
907 chNext = styler.SafeGetCharAt(i + 1);
908 } else {
909 advance_char(i, ch, chNext, chNext2);
910 }
911 break;
912
913 case '!':
914 if (chNext2 == '=' || chNext2 == '~') {
915 i += 2;
916 ch = chNext2;
917 chNext = styler.SafeGetCharAt(i + 1);
918 } else {
919 advance_char(i, ch, chNext, chNext2);
920 }
921 break;
922
923 case '<':
924 if (chNext2 == '<') {
925 i += 2;
926 ch = chNext2;
927 chNext = styler.SafeGetCharAt(i + 1);
928 } else if (chNext2 == '=') {
929 char ch_tmp = styler.SafeGetCharAt(i + 3);
930 if (ch_tmp == '>') { // <=> operator
931 i += 3;
932 ch = ch_tmp;
933 chNext = styler.SafeGetCharAt(i + 1);
934 } else {
935 i += 2;
936 ch = chNext2;
937 chNext = ch_tmp;
938 }
939 } else {
940 advance_char(i, ch, chNext, chNext2);
941 }
942 break;
943
944 default:
945 // Simple one-character operators
946 advance_char(i, ch, chNext, chNext2);
947 break;
948 }
949 if (doColoring) {
950 styler.ColourTo(i, SCE_RB_SYMBOL);
951 state = SCE_RB_DEFAULT;
952 }
953 } else if (!preferRE) {
954 // Don't color symbol strings (yet)
955 // Just color the ":" and color rest as string
956 styler.ColourTo(i, SCE_RB_SYMBOL);
957 state = SCE_RB_DEFAULT;
958 } else {
959 styler.ColourTo(i, SCE_RB_OPERATOR);
960 state = SCE_RB_DEFAULT;
961 preferRE = true;
962 }
963 } else if (ch == '%') {
964 styler.ColourTo(i - 1, state);
965 bool have_string = false;
966 if (strchr(q_chars, chNext) && !isSafeWordcharOrHigh(chNext2)) {
967 Quote.New();
968 const char *hit = strchr(q_chars, chNext);
969 if (hit != NULL) {
970 state = q_states[hit - q_chars];
971 Quote.Open(chNext2);
972 i += 2;
973 ch = chNext2;
65ec6247 974 chNext = styler.SafeGetCharAt(i + 1);
1e9bafca
RD
975 have_string = true;
976 }
7e0c58e9 977 } else if (preferRE && !isSafeWordcharOrHigh(chNext)) {
1e9bafca
RD
978 // Ruby doesn't allow high bit chars here,
979 // but the editor host might
1dcf666d
RD
980 Quote.New();
981 state = SCE_RB_STRING_QQ;
982 Quote.Open(chNext);
983 advance_char(i, ch, chNext, chNext2); // pass by ref
984 have_string = true;
985 } else if (!isSafeWordcharOrHigh(chNext) && !iswhitespace(chNext) && !isEOLChar(chNext)) {
986 // Ruby doesn't allow high bit chars here,
987 // but the editor host might
988 Quote.New();
1e9bafca
RD
989 state = SCE_RB_STRING_QQ;
990 Quote.Open(chNext);
991 advance_char(i, ch, chNext, chNext2); // pass by ref
992 have_string = true;
993 }
994 if (!have_string) {
995 styler.ColourTo(i, SCE_RB_OPERATOR);
996 // stay in default
997 preferRE = true;
998 }
7e0c58e9
RD
999 } else if (ch == '?') {
1000 styler.ColourTo(i - 1, state);
1001 if (iswhitespace(chNext) || chNext == '\n' || chNext == '\r') {
1002 styler.ColourTo(i, SCE_RB_OPERATOR);
1003 } else {
1004 // It's the start of a character code escape sequence
1005 // Color it as a number.
1006 state = SCE_RB_NUMBER;
1007 is_real_number = false;
1008 }
1e9bafca
RD
1009 } else if (isoperator(ch) || ch == '.') {
1010 styler.ColourTo(i - 1, state);
1011 styler.ColourTo(i, SCE_RB_OPERATOR);
1012 // If we're ending an expression or block,
1013 // assume it ends an object, and the ambivalent
1014 // constructs are binary operators
1015 //
1016 // So if we don't have one of these chars,
1017 // we aren't ending an object exp'n, and ops
1018 // like : << / are unary operators.
1dcf666d 1019
7e0c58e9
RD
1020 if (ch == '{') {
1021 ++brace_counts;
1022 preferRE = true;
1023 } else if (ch == '}' && --brace_counts < 0
1024 && inner_string_count > 0) {
1025 styler.ColourTo(i, SCE_RB_OPERATOR);
1026 exitInnerExpression(inner_string_types,
1027 inner_expn_brace_counts,
1028 inner_quotes,
1029 inner_string_count,
1030 state, brace_counts, Quote);
1031 } else {
1032 preferRE = (strchr(")}].", ch) == NULL);
1033 }
1e9bafca
RD
1034 // Stay in default state
1035 } else if (isEOLChar(ch)) {
1036 // Make sure it's a true line-end, with no backslash
1037 if ((ch == '\r' || (ch == '\n' && chPrev != '\r'))
1038 && chPrev != '\\') {
1039 // Assume we've hit the end of the statement.
1040 preferRE = true;
1041 }
1042 }
1043 } else if (state == SCE_RB_WORD) {
1044 if (ch == '.' || !isSafeWordcharOrHigh(ch)) {
1045 // Words include x? in all contexts,
1046 // and <letters>= after either 'def' or a dot
1047 // Move along until a complete word is on our left
1048
1049 // Default accessor treats '.' as word-chars,
1050 // but we don't for now.
1dcf666d 1051
1e9bafca
RD
1052 if (ch == '='
1053 && isSafeWordcharOrHigh(chPrev)
1054 && (chNext == '('
1055 || strchr(" \t\n\r", chNext) != NULL)
1056 && (!strcmp(prevWord, "def")
1057 || followsDot(styler.GetStartSegment(), styler))) {
1058 // <name>= is a name only when being def'd -- Get it the next time
1059 // This means that <name>=<name> is always lexed as
1060 // <name>, (op, =), <name>
1061 } else if ((ch == '?' || ch == '!')
1062 && isSafeWordcharOrHigh(chPrev)
1063 && !isSafeWordcharOrHigh(chNext)) {
1064 // <name>? is a name -- Get it the next time
1065 // But <name>?<name> is always lexed as
1066 // <name>, (op, ?), <name>
1067 // Same with <name>! to indicate a method that
1068 // modifies its target
1069 } else if (isEOLChar(ch)
1070 && isMatch(styler, lengthDoc, i - 7, "__END__")) {
1071 styler.ColourTo(i, SCE_RB_DATASECTION);
1072 state = SCE_RB_DATASECTION;
1073 // No need to handle this state -- we'll just move to the end
1074 preferRE = false;
1075 } else {
1076 int wordStartPos = styler.GetStartSegment();
1077 int word_style = ClassifyWordRb(wordStartPos, i - 1, keywords, styler, prevWord);
1078 switch (word_style) {
1079 case SCE_RB_WORD:
1080 preferRE = RE_CanFollowKeyword(prevWord);
1081 break;
1dcf666d 1082
1e9bafca
RD
1083 case SCE_RB_WORD_DEMOTED:
1084 preferRE = true;
1085 break;
1dcf666d 1086
1e9bafca
RD
1087 case SCE_RB_IDENTIFIER:
1088 if (isMatch(styler, lengthDoc, wordStartPos, "print")) {
1089 preferRE = true;
1090 } else if (isEOLChar(ch)) {
1091 preferRE = true;
1092 } else {
1093 preferRE = false;
1094 }
1095 break;
1096 default:
1097 preferRE = false;
1098 }
1099 if (ch == '.') {
1100 // We might be redefining an operator-method
1101 preferRE = false;
1102 }
1dcf666d 1103 // And if it's the first
1e9bafca
RD
1104 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1105 }
1106 }
1107 } else if (state == SCE_RB_NUMBER) {
7e0c58e9
RD
1108 if (!is_real_number) {
1109 if (ch != '\\') {
1110 styler.ColourTo(i, state);
1111 state = SCE_RB_DEFAULT;
1112 preferRE = false;
1113 } else if (strchr("\\ntrfvaebs", chNext)) {
1114 // Terminal escape sequence -- handle it next time
1115 // Nothing more to do this time through the loop
1116 } else if (chNext == 'C' || chNext == 'M') {
1117 if (chNext2 != '-') {
1118 // \C or \M ends the sequence -- handle it next time
1119 } else {
1120 // Move from abc?\C-x
1121 // ^
1122 // to
1123 // ^
1124 i += 2;
1125 ch = chNext2;
1126 chNext = styler.SafeGetCharAt(i + 1);
1127 }
1128 } else if (chNext == 'c') {
1129 // Stay here, \c is a combining sequence
1130 advance_char(i, ch, chNext, chNext2); // pass by ref
1131 } else {
1132 // ?\x, including ?\\ is final.
1133 styler.ColourTo(i + 1, state);
1134 state = SCE_RB_DEFAULT;
1135 preferRE = false;
1136 advance_char(i, ch, chNext, chNext2);
1137 }
1138 } else if (isSafeAlnumOrHigh(ch) || ch == '_') {
1e9bafca 1139 // Keep going
1dcf666d
RD
1140 } else if (ch == '.' && chNext == '.') {
1141 ++numDots;
1142 styler.ColourTo(i - 1, state);
1143 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1e9bafca
RD
1144 } else if (ch == '.' && ++numDots == 1) {
1145 // Keep going
1146 } else {
1147 styler.ColourTo(i - 1, state);
1148 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1149 preferRE = false;
1150 }
1151 } else if (state == SCE_RB_COMMENTLINE) {
1152 if (isEOLChar(ch)) {
1153 styler.ColourTo(i - 1, state);
1154 state = SCE_RB_DEFAULT;
1155 // Use whatever setting we had going into the comment
1156 }
1157 } else if (state == SCE_RB_HERE_DELIM) {
1158 // See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx
1159 // Slightly different: if we find an immediate '-',
1160 // the target can appear indented.
1dcf666d 1161
1e9bafca
RD
1162 if (HereDoc.State == 0) { // '<<' encountered
1163 HereDoc.State = 1;
1164 HereDoc.DelimiterLength = 0;
1165 if (ch == '-') {
1166 HereDoc.CanBeIndented = true;
1167 advance_char(i, ch, chNext, chNext2); // pass by ref
1168 } else {
1169 HereDoc.CanBeIndented = false;
1170 }
1171 if (isEOLChar(ch)) {
1172 // Bail out of doing a here doc if there's no target
1173 state = SCE_RB_DEFAULT;
1174 preferRE = false;
1175 } else {
1176 HereDoc.Quote = ch;
1dcf666d 1177
1e9bafca
RD
1178 if (ch == '\'' || ch == '"' || ch == '`') {
1179 HereDoc.Quoted = true;
1180 HereDoc.Delimiter[0] = '\0';
1181 } else {
1182 HereDoc.Quoted = false;
1183 HereDoc.Delimiter[0] = ch;
1184 HereDoc.Delimiter[1] = '\0';
1185 HereDoc.DelimiterLength = 1;
1186 }
1187 }
1188 } else if (HereDoc.State == 1) { // collect the delimiter
1189 if (isEOLChar(ch)) {
1190 // End the quote now, and go back for more
1191 styler.ColourTo(i - 1, state);
1192 state = SCE_RB_DEFAULT;
1193 i--;
1194 chNext = ch;
1e9bafca
RD
1195 preferRE = false;
1196 } else if (HereDoc.Quoted) {
1197 if (ch == HereDoc.Quote) { // closing quote => end of delimiter
1198 styler.ColourTo(i, state);
1199 state = SCE_RB_DEFAULT;
1200 preferRE = false;
1201 } else {
1202 if (ch == '\\' && !isEOLChar(chNext)) {
1203 advance_char(i, ch, chNext, chNext2);
1204 }
1205 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1206 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
1207 }
1208 } else { // an unquoted here-doc delimiter
1209 if (isSafeAlnumOrHigh(ch) || ch == '_') {
1210 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1211 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
1212 } else {
1213 styler.ColourTo(i - 1, state);
1214 redo_char(i, ch, chNext, chNext2, state);
1215 preferRE = false;
65ec6247 1216 }
1e9bafca
RD
1217 }
1218 if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) {
65ec6247 1219 styler.ColourTo(i - 1, state);
1e9bafca
RD
1220 state = SCE_RB_ERROR;
1221 preferRE = false;
65ec6247 1222 }
1e9bafca
RD
1223 }
1224 } else if (state == SCE_RB_HERE_Q) {
1225 // Not needed: HereDoc.State == 2
1226 // Indentable here docs: look backwards
1227 // Non-indentable: look forwards, like in Perl
1228 //
1229 // Why: so we can quickly resolve things like <<-" abc"
1230
1231 if (!HereDoc.CanBeIndented) {
1232 if (isEOLChar(chPrev)
1233 && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
1234 styler.ColourTo(i - 1, state);
1235 i += HereDoc.DelimiterLength - 1;
1236 chNext = styler.SafeGetCharAt(i + 1);
1237 if (isEOLChar(chNext)) {
1238 styler.ColourTo(i, SCE_RB_HERE_DELIM);
1239 state = SCE_RB_DEFAULT;
1240 HereDoc.State = 0;
1241 preferRE = false;
1242 }
1243 // Otherwise we skipped through the here doc faster.
1244 }
1245 } else if (isEOLChar(chNext)
1246 && lookingAtHereDocDelim(styler,
1247 i - HereDoc.DelimiterLength + 1,
1248 lengthDoc,
1249 HereDoc.Delimiter)) {
1250 styler.ColourTo(i - 1 - HereDoc.DelimiterLength, state);
1251 styler.ColourTo(i, SCE_RB_HERE_DELIM);
1252 state = SCE_RB_DEFAULT;
1253 preferRE = false;
1254 HereDoc.State = 0;
1255 }
1256 } else if (state == SCE_RB_CLASS_VAR
1257 || state == SCE_RB_INSTANCE_VAR
1258 || state == SCE_RB_SYMBOL) {
1259 if (!isSafeWordcharOrHigh(ch)) {
1260 styler.ColourTo(i - 1, state);
1261 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1262 preferRE = false;
1263 }
1264 } else if (state == SCE_RB_GLOBAL) {
1265 if (!isSafeWordcharOrHigh(ch)) {
1266 // handle special globals here as well
1267 if (chPrev == '$') {
1268 if (ch == '-') {
1269 // Include the next char, like $-a
1270 advance_char(i, ch, chNext, chNext2);
1271 }
1272 styler.ColourTo(i, state);
1273 state = SCE_RB_DEFAULT;
1274 } else {
1275 styler.ColourTo(i - 1, state);
1276 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1277 }
1278 preferRE = false;
1279 }
1280 } else if (state == SCE_RB_POD) {
1281 // PODs end with ^=end\s, -- any whitespace can follow =end
1282 if (strchr(" \t\n\r", ch) != NULL
1283 && i > 5
1284 && isEOLChar(styler[i - 5])
1285 && isMatch(styler, lengthDoc, i - 4, "=end")) {
1286 styler.ColourTo(i - 1, state);
1287 state = SCE_RB_DEFAULT;
1288 preferRE = false;
1289 }
1290 } else if (state == SCE_RB_REGEX || state == SCE_RB_STRING_QR) {
1291 if (ch == '\\' && Quote.Up != '\\') {
1292 // Skip one
1293 advance_char(i, ch, chNext, chNext2);
1294 } else if (ch == Quote.Down) {
1295 Quote.Count--;
1296 if (Quote.Count == 0) {
1297 // Include the options
1298 while (isSafeAlpha(chNext)) {
1299 i++;
65ec6247 1300 ch = chNext;
1e9bafca
RD
1301 chNext = styler.SafeGetCharAt(i + 1);
1302 }
1303 styler.ColourTo(i, state);
1304 state = SCE_RB_DEFAULT;
1305 preferRE = false;
1306 }
1307 } else if (ch == Quote.Up) {
1308 // Only if close quoter != open quoter
1309 Quote.Count++;
1dcf666d 1310
1e9bafca 1311 } else if (ch == '#' ) {
7e0c58e9
RD
1312 if (chNext == '{'
1313 && inner_string_count < INNER_STRINGS_MAX_COUNT) {
1314 // process #{ ... }
1315 styler.ColourTo(i - 1, state);
1316 styler.ColourTo(i + 1, SCE_RB_OPERATOR);
1317 enterInnerExpression(inner_string_types,
1318 inner_expn_brace_counts,
1319 inner_quotes,
1320 inner_string_count,
1321 state,
1322 brace_counts,
1323 Quote);
1324 preferRE = true;
1325 // Skip one
1326 advance_char(i, ch, chNext, chNext2);
1327 } else {
1328 //todo: distinguish comments from pound chars
1329 // for now, handle as comment
1330 styler.ColourTo(i - 1, state);
1331 bool inEscape = false;
1332 while (++i < lengthDoc) {
1333 ch = styler.SafeGetCharAt(i);
1334 if (ch == '\\') {
1335 inEscape = true;
1336 } else if (isEOLChar(ch)) {
1337 // Comment inside a regex
1338 styler.ColourTo(i - 1, SCE_RB_COMMENTLINE);
1339 break;
1340 } else if (inEscape) {
1341 inEscape = false; // don't look at char
1342 } else if (ch == Quote.Down) {
1343 // Have the regular handler deal with this
1344 // to get trailing modifiers.
1345 i--;
1346 ch = styler[i];
1347 break;
1348 }
1e9bafca 1349 }
7e0c58e9 1350 chNext = styler.SafeGetCharAt(i + 1);
1e9bafca 1351 }
1e9bafca
RD
1352 }
1353 // Quotes of all kinds...
1dcf666d 1354 } else if (state == SCE_RB_STRING_Q || state == SCE_RB_STRING_QQ ||
1e9bafca
RD
1355 state == SCE_RB_STRING_QX || state == SCE_RB_STRING_QW ||
1356 state == SCE_RB_STRING || state == SCE_RB_CHARACTER ||
1357 state == SCE_RB_BACKTICKS) {
1358 if (!Quote.Down && !isspacechar(ch)) {
1359 Quote.Open(ch);
1360 } else if (ch == '\\' && Quote.Up != '\\') {
1361 //Riddle me this: Is it safe to skip *every* escaped char?
1362 advance_char(i, ch, chNext, chNext2);
1363 } else if (ch == Quote.Down) {
1364 Quote.Count--;
1365 if (Quote.Count == 0) {
1366 styler.ColourTo(i, state);
1367 state = SCE_RB_DEFAULT;
1368 preferRE = false;
1369 }
1370 } else if (ch == Quote.Up) {
1371 Quote.Count++;
7e0c58e9
RD
1372 } else if (ch == '#' && chNext == '{'
1373 && inner_string_count < INNER_STRINGS_MAX_COUNT
1374 && state != SCE_RB_CHARACTER
1375 && state != SCE_RB_STRING_Q) {
1376 // process #{ ... }
1377 styler.ColourTo(i - 1, state);
1378 styler.ColourTo(i + 1, SCE_RB_OPERATOR);
1379 enterInnerExpression(inner_string_types,
1380 inner_expn_brace_counts,
1381 inner_quotes,
1382 inner_string_count,
1383 state,
1384 brace_counts,
1385 Quote);
1386 preferRE = true;
1387 // Skip one
1388 advance_char(i, ch, chNext, chNext2);
1e9bafca
RD
1389 }
1390 }
1dcf666d 1391
1e9bafca
RD
1392 if (state == SCE_RB_ERROR) {
1393 break;
1394 }
1395 chPrev = ch;
1396 }
1397 if (state == SCE_RB_WORD) {
1398 // We've ended on a word, possibly at EOF, and need to
1399 // classify it.
1400 (void) ClassifyWordRb(styler.GetStartSegment(), lengthDoc - 1, keywords, styler, prevWord);
1401 } else {
1402 styler.ColourTo(lengthDoc - 1, state);
1403 }
1404}
1405
1406// Helper functions for folding, disambiguation keywords
1dcf666d 1407// Assert that there are no high-bit chars
1e9bafca
RD
1408
1409static void getPrevWord(int pos,
1410 char *prevWord,
1411 Accessor &styler,
1412 int word_state)
1413{
1414 int i;
1415 styler.Flush();
1416 for (i = pos - 1; i > 0; i--) {
1417 if (actual_style(styler.StyleAt(i)) != word_state) {
1418 i++;
1419 break;
1420 }
1421 }
1422 if (i < pos - MAX_KEYWORD_LENGTH) // overflow
1423 i = pos - MAX_KEYWORD_LENGTH;
1424 char *dst = prevWord;
1425 for (; i <= pos; i++) {
1426 *dst++ = styler[i];
1427 }
1428 *dst = 0;
1429}
1430
1431static bool keywordIsAmbiguous(const char *prevWord)
1432{
1433 // Order from most likely used to least likely
1434 // Lots of ways to do a loop in Ruby besides 'while/until'
1435 if (!strcmp(prevWord, "if")
1436 || !strcmp(prevWord, "do")
1437 || !strcmp(prevWord, "while")
1438 || !strcmp(prevWord, "unless")
1439 || !strcmp(prevWord, "until")) {
1440 return true;
1441 } else {
1442 return false;
1443 }
1444}
1445
1446// Demote keywords in the following conditions:
1447// if, while, unless, until modify a statement
1dcf666d 1448// do after a while or until, as a noise word (like then after if)
1e9bafca
RD
1449
1450static bool keywordIsModifier(const char *word,
1451 int pos,
1452 Accessor &styler)
1453{
1454 if (word[0] == 'd' && word[1] == 'o' && !word[2]) {
1455 return keywordDoStartsLoop(pos, styler);
1456 }
1dcf666d 1457 char ch, chPrev, chPrev2;
1e9bafca
RD
1458 int style = SCE_RB_DEFAULT;
1459 int lineStart = styler.GetLine(pos);
1460 int lineStartPosn = styler.LineStart(lineStart);
1dcf666d
RD
1461 // We want to step backwards until we don't care about the current
1462 // position. But first move lineStartPosn back behind any
1463 // continuations immediately above word.
1464 while (lineStartPosn > 0) {
1465 ch = styler[lineStartPosn-1];
1466 if (ch == '\n' || ch == '\r') {
1467 chPrev = styler.SafeGetCharAt(lineStartPosn-2);
1468 chPrev2 = styler.SafeGetCharAt(lineStartPosn-3);
1469 lineStart = styler.GetLine(lineStartPosn-1);
1470 // If we find a continuation line, include it in our analysis.
1471 if (chPrev == '\\') {
1472 lineStartPosn = styler.LineStart(lineStart);
1473 } else if (ch == '\n' && chPrev == '\r' && chPrev2 == '\\') {
1474 lineStartPosn = styler.LineStart(lineStart);
1475 } else {
1476 break;
1477 }
1478 } else {
1479 break;
1480 }
1481 }
1482
1e9bafca
RD
1483 styler.Flush();
1484 while (--pos >= lineStartPosn) {
1485 style = actual_style(styler.StyleAt(pos));
1486 if (style == SCE_RB_DEFAULT) {
1487 if (iswhitespace(ch = styler[pos])) {
1488 //continue
1489 } else if (ch == '\r' || ch == '\n') {
1490 // Scintilla's LineStart() and GetLine() routines aren't
1491 // platform-independent, so if we have text prepared with
1492 // a different system we can't rely on it.
1dcf666d
RD
1493
1494 // Also, lineStartPosn may have been moved to more than one
1495 // line above word's line while pushing past continuations.
1496 chPrev = styler.SafeGetCharAt(pos - 1);
1497 chPrev2 = styler.SafeGetCharAt(pos - 2);
1498 if (chPrev == '\\') {
1499 pos-=1; // gloss over the "\\"
1500 //continue
1501 } else if (ch == '\n' && chPrev == '\r' && chPrev2 == '\\') {
1502 pos-=2; // gloss over the "\\\r"
1503 //continue
1504 } else {
1505 return false;
1506 }
65ec6247 1507 }
1e9bafca
RD
1508 } else {
1509 break;
65ec6247 1510 }
1e9bafca
RD
1511 }
1512 if (pos < lineStartPosn) {
1dcf666d 1513 return false;
1e9bafca
RD
1514 }
1515 // First things where the action is unambiguous
1516 switch (style) {
1517 case SCE_RB_DEFAULT:
1518 case SCE_RB_COMMENTLINE:
1519 case SCE_RB_POD:
1520 case SCE_RB_CLASSNAME:
1521 case SCE_RB_DEFNAME:
1522 case SCE_RB_MODULE_NAME:
1523 return false;
1524 case SCE_RB_OPERATOR:
1525 break;
1526 case SCE_RB_WORD:
1527 // Watch out for uses of 'else if'
1528 //XXX: Make a list of other keywords where 'if' isn't a modifier
1529 // and can appear legitimately
1530 // Formulate this to avoid warnings from most compilers
1531 if (strcmp(word, "if") == 0) {
1532 char prevWord[MAX_KEYWORD_LENGTH + 1];
1533 getPrevWord(pos, prevWord, styler, SCE_RB_WORD);
1534 return strcmp(prevWord, "else") != 0;
1535 }
1536 return true;
1537 default:
1538 return true;
1539 }
1540 // Assume that if the keyword follows an operator,
1541 // usually it's a block assignment, like
1542 // a << if x then y else z
1dcf666d 1543
1e9bafca
RD
1544 ch = styler[pos];
1545 switch (ch) {
1546 case ')':
1547 case ']':
1548 case '}':
1549 return true;
1550 default:
1551 return false;
1552 }
65ec6247
RD
1553}
1554
1e9bafca
RD
1555#define WHILE_BACKWARDS "elihw"
1556#define UNTIL_BACKWARDS "litnu"
1557
1558// Nothing fancy -- look to see if we follow a while/until somewhere
1559// on the current line
1560
1561static bool keywordDoStartsLoop(int pos,
1562 Accessor &styler)
1563{
1564 char ch;
1565 int style;
1566 int lineStart = styler.GetLine(pos);
1567 int lineStartPosn = styler.LineStart(lineStart);
1568 styler.Flush();
1569 while (--pos >= lineStartPosn) {
1570 style = actual_style(styler.StyleAt(pos));
1571 if (style == SCE_RB_DEFAULT) {
1572 if ((ch = styler[pos]) == '\r' || ch == '\n') {
1573 // Scintilla's LineStart() and GetLine() routines aren't
1574 // platform-independent, so if we have text prepared with
1575 // a different system we can't rely on it.
1576 return false;
1577 }
1578 } else if (style == SCE_RB_WORD) {
1579 // Check for while or until, but write the word in backwards
1580 char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1581 char *dst = prevWord;
1582 int wordLen = 0;
1583 int start_word;
1584 for (start_word = pos;
1585 start_word >= lineStartPosn && actual_style(styler.StyleAt(start_word)) == SCE_RB_WORD;
1586 start_word--) {
1587 if (++wordLen < MAX_KEYWORD_LENGTH) {
1588 *dst++ = styler[start_word];
1589 }
1590 }
1591 *dst = 0;
1592 // Did we see our keyword?
1593 if (!strcmp(prevWord, WHILE_BACKWARDS)
1594 || !strcmp(prevWord, UNTIL_BACKWARDS)) {
1595 return true;
1596 }
1597 // We can move pos to the beginning of the keyword, and then
1598 // accept another decrement, as we can never have two contiguous
1599 // keywords:
1600 // word1 word2
1601 // ^
1602 // <- move to start_word
1603 // ^
1604 // <- loop decrement
1605 // ^ # pointing to end of word1 is fine
1606 pos = start_word;
1607 }
1608 }
1609 return false;
1610}
1611
1612/*
1613 * Folding Ruby
1dcf666d 1614 *
1e9bafca
RD
1615 * The language is quite complex to analyze without a full parse.
1616 * For example, this line shouldn't affect fold level:
1dcf666d 1617 *
1e9bafca 1618 * print "hello" if feeling_friendly?
1dcf666d 1619 *
1e9bafca 1620 * Neither should this:
1dcf666d 1621 *
1e9bafca
RD
1622 * print "hello" \
1623 * if feeling_friendly?
1dcf666d
RD
1624 *
1625 *
1e9bafca 1626 * But this should:
1dcf666d 1627 *
1e9bafca
RD
1628 * if feeling_friendly? #++
1629 * print "hello" \
1630 * print "goodbye"
1631 * end #--
1dcf666d 1632 *
1e9bafca
RD
1633 * So we cheat, by actually looking at the existing indentation
1634 * levels for each line, and just echoing it back. Like Python.
1635 * Then if we get better at it, we'll take braces into consideration,
1636 * which always affect folding levels.
1637
1638 * How the keywords should work:
1639 * No effect:
1dcf666d 1640 * __FILE__ __LINE__ BEGIN END alias and
1e9bafca
RD
1641 * defined? false in nil not or self super then
1642 * true undef
1643
1644 * Always increment:
1645 * begin class def do for module when {
1dcf666d 1646 *
1e9bafca
RD
1647 * Always decrement:
1648 * end }
1dcf666d 1649 *
1e9bafca
RD
1650 * Increment if these start a statement
1651 * if unless until while -- do nothing if they're modifiers
65ec6247 1652
1e9bafca
RD
1653 * These end a block if there's no modifier, but don't bother
1654 * break next redo retry return yield
1dcf666d 1655 *
1e9bafca
RD
1656 * These temporarily de-indent, but re-indent
1657 * case else elsif ensure rescue
1dcf666d 1658 *
1e9bafca
RD
1659 * This means that the folder reflects indentation rather
1660 * than setting it. The language-service updates indentation
1661 * when users type return and finishes entering de-denters.
1dcf666d 1662 *
1e9bafca
RD
1663 * Later offer to fold POD, here-docs, strings, and blocks of comments
1664 */
1665
1666static void FoldRbDoc(unsigned int startPos, int length, int initStyle,
1667 WordList *[], Accessor &styler) {
1668 const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
1669 bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
1dcf666d 1670
1e9bafca
RD
1671 synchronizeDocStart(startPos, length, initStyle, styler, // ref args
1672 false);
1673 unsigned int endPos = startPos + length;
1674 int visibleChars = 0;
65ec6247 1675 int lineCurrent = styler.GetLine(startPos);
1e9bafca
RD
1676 int levelPrev = startPos == 0 ? 0 : (styler.LevelAt(lineCurrent)
1677 & SC_FOLDLEVELNUMBERMASK
1678 & ~SC_FOLDLEVELBASE);
1679 int levelCurrent = levelPrev;
65ec6247 1680 char chNext = styler[startPos];
1e9bafca
RD
1681 int styleNext = styler.StyleAt(startPos);
1682 int stylePrev = startPos <= 1 ? SCE_RB_DEFAULT : styler.StyleAt(startPos - 1);
1683 bool buffer_ends_with_eol = false;
1684 for (unsigned int i = startPos; i < endPos; i++) {
65ec6247
RD
1685 char ch = chNext;
1686 chNext = styler.SafeGetCharAt(i + 1);
1e9bafca
RD
1687 int style = styleNext;
1688 styleNext = styler.StyleAt(i + 1);
1689 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1690 if (style == SCE_RB_COMMENTLINE) {
1691 if (foldComment && stylePrev != SCE_RB_COMMENTLINE) {
1692 if (chNext == '{') {
1693 levelCurrent++;
9e96e16f 1694 } else if (chNext == '}' && levelCurrent > 0) {
1e9bafca 1695 levelCurrent--;
65ec6247 1696 }
1e9bafca
RD
1697 }
1698 } else if (style == SCE_RB_OPERATOR) {
1699 if (strchr("[{(", ch)) {
1700 levelCurrent++;
1701 } else if (strchr(")}]", ch)) {
1702 // Don't decrement below 0
1703 if (levelCurrent > 0)
1704 levelCurrent--;
65ec6247 1705 }
1e9bafca
RD
1706 } else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) {
1707 // Look at the keyword on the left and decide what to do
1708 char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1709 prevWord[0] = 0;
1710 getPrevWord(i, prevWord, styler, SCE_RB_WORD);
1711 if (!strcmp(prevWord, "end")) {
1712 // Don't decrement below 0
1713 if (levelCurrent > 0)
1714 levelCurrent--;
1715 } else if ( !strcmp(prevWord, "if")
1716 || !strcmp(prevWord, "def")
1717 || !strcmp(prevWord, "class")
1718 || !strcmp(prevWord, "module")
1719 || !strcmp(prevWord, "begin")
1720 || !strcmp(prevWord, "case")
1721 || !strcmp(prevWord, "do")
1722 || !strcmp(prevWord, "while")
1723 || !strcmp(prevWord, "unless")
1724 || !strcmp(prevWord, "until")
1725 || !strcmp(prevWord, "for")
1726 ) {
1727 levelCurrent++;
1728 }
1dcf666d
RD
1729 } else if (style == SCE_RB_HERE_DELIM) {
1730 if (styler.SafeGetCharAt(i-2) == '<' && styler.SafeGetCharAt(i-1) == '<') {
1731 levelCurrent++;
1732 } else if (styleNext == SCE_RB_DEFAULT) {
1733 levelCurrent--;
1734 }
1735 }
1e9bafca
RD
1736 if (atEOL) {
1737 int lev = levelPrev;
1738 if (visibleChars == 0 && foldCompact)
1739 lev |= SC_FOLDLEVELWHITEFLAG;
1740 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1741 lev |= SC_FOLDLEVELHEADERFLAG;
1742 styler.SetLevel(lineCurrent, lev|SC_FOLDLEVELBASE);
65ec6247 1743 lineCurrent++;
1e9bafca
RD
1744 levelPrev = levelCurrent;
1745 visibleChars = 0;
1746 buffer_ends_with_eol = true;
1747 } else if (!isspacechar(ch)) {
1748 visibleChars++;
1749 buffer_ends_with_eol = false;
1750 }
9e96e16f 1751 stylePrev = style;
1e9bafca
RD
1752 }
1753 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1754 if (!buffer_ends_with_eol) {
1755 lineCurrent++;
1756 int new_lev = levelCurrent;
1757 if (visibleChars == 0 && foldCompact)
1758 new_lev |= SC_FOLDLEVELWHITEFLAG;
1759 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1760 new_lev |= SC_FOLDLEVELHEADERFLAG;
1761 levelCurrent = new_lev;
1762 }
1763 styler.SetLevel(lineCurrent, levelCurrent|SC_FOLDLEVELBASE);
65ec6247 1764}
9e730a78
RD
1765
1766static const char * const rubyWordListDesc[] = {
1767 "Keywords",
1768 0
1769};
1770
1dcf666d 1771LexerModule lmRuby(SCLEX_RUBY, ColouriseRbDoc, "ruby", FoldRbDoc, rubyWordListDesc, 6);