Commit | Line | Data |
---|---|---|
65ec6247 RD |
1 | // Scintilla source code edit control |
2 | /** @file LexRuby.cxx | |
3 | ** Lexer for Ruby. | |
4 | **/ | |
5 | // Copyright 2001- by Clemens Wyss <wys@helbling.ch> | |
6 | // The License.txt file describes the conditions under which this software may be distributed. | |
7 | ||
8 | #include <stdlib.h> | |
9 | #include <string.h> | |
65ec6247 RD |
10 | #include <stdio.h> |
11 | #include <stdarg.h> | |
1dcf666d RD |
12 | #include <assert.h> |
13 | #include <ctype.h> | |
65ec6247 | 14 | |
1dcf666d | 15 | #include "ILexer.h" |
65ec6247 RD |
16 | #include "Scintilla.h" |
17 | #include "SciLexer.h" | |
18 | ||
1dcf666d RD |
19 | #include "WordList.h" |
20 | #include "LexAccessor.h" | |
21 | #include "Accessor.h" | |
22 | #include "StyleContext.h" | |
23 | #include "CharacterSet.h" | |
24 | #include "LexerModule.h" | |
25 | ||
1e9bafca RD |
26 | #ifdef SCI_NAMESPACE |
27 | using namespace Scintilla; | |
28 | #endif | |
29 | ||
30 | //XXX Identical to Perl, put in common area | |
31 | static inline bool isEOLChar(char ch) { | |
32 | return (ch == '\r') || (ch == '\n'); | |
33 | } | |
34 | ||
35 | #define isSafeASCII(ch) ((unsigned int)(ch) <= 127) | |
36 | // This one's redundant, but makes for more readable code | |
37 | #define isHighBitChar(ch) ((unsigned int)(ch) > 127) | |
38 | ||
39 | static inline bool isSafeAlpha(char ch) { | |
40 | return (isSafeASCII(ch) && isalpha(ch)) || ch == '_'; | |
41 | } | |
42 | ||
43 | static inline bool isSafeAlnum(char ch) { | |
44 | return (isSafeASCII(ch) && isalnum(ch)) || ch == '_'; | |
45 | } | |
46 | ||
47 | static inline bool isSafeAlnumOrHigh(char ch) { | |
48 | return isHighBitChar(ch) || isalnum(ch) || ch == '_'; | |
49 | } | |
50 | ||
51 | static inline bool isSafeDigit(char ch) { | |
52 | return isSafeASCII(ch) && isdigit(ch); | |
53 | } | |
54 | ||
55 | static inline bool isSafeWordcharOrHigh(char ch) { | |
7e0c58e9 RD |
56 | // Error: scintilla's KeyWords.h includes '.' as a word-char |
57 | // we want to separate things that can take methods from the | |
58 | // methods. | |
59 | return isHighBitChar(ch) || isalnum(ch) || ch == '_'; | |
1e9bafca RD |
60 | } |
61 | ||
62 | static bool inline iswhitespace(char ch) { | |
63 | return ch == ' ' || ch == '\t'; | |
64 | } | |
65 | ||
66 | #define MAX_KEYWORD_LENGTH 200 | |
67 | ||
68 | #define STYLE_MASK 63 | |
69 | #define actual_style(style) (style & STYLE_MASK) | |
70 | ||
71 | static bool followsDot(unsigned int pos, Accessor &styler) { | |
72 | styler.Flush(); | |
73 | for (; pos >= 1; --pos) { | |
74 | int style = actual_style(styler.StyleAt(pos)); | |
75 | char ch; | |
76 | switch (style) { | |
77 | case SCE_RB_DEFAULT: | |
78 | ch = styler[pos]; | |
79 | if (ch == ' ' || ch == '\t') { | |
80 | //continue | |
81 | } else { | |
82 | return false; | |
83 | } | |
84 | break; | |
1dcf666d | 85 | |
1e9bafca RD |
86 | case SCE_RB_OPERATOR: |
87 | return styler[pos] == '.'; | |
88 | ||
89 | default: | |
90 | return false; | |
91 | } | |
92 | } | |
93 | return false; | |
94 | } | |
95 | ||
96 | // Forward declarations | |
97 | static bool keywordIsAmbiguous(const char *prevWord); | |
98 | static bool keywordDoStartsLoop(int pos, | |
99 | Accessor &styler); | |
100 | static bool keywordIsModifier(const char *word, | |
101 | int pos, | |
102 | Accessor &styler); | |
103 | ||
104 | static int ClassifyWordRb(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord) { | |
7e0c58e9 | 105 | char s[MAX_KEYWORD_LENGTH]; |
1e9bafca RD |
106 | unsigned int i, j; |
107 | unsigned int lim = end - start + 1; // num chars to copy | |
108 | if (lim >= MAX_KEYWORD_LENGTH) { | |
109 | lim = MAX_KEYWORD_LENGTH - 1; | |
110 | } | |
111 | for (i = start, j = 0; j < lim; i++, j++) { | |
112 | s[j] = styler[i]; | |
65ec6247 | 113 | } |
1e9bafca RD |
114 | s[j] = '\0'; |
115 | int chAttr; | |
65ec6247 | 116 | if (0 == strcmp(prevWord, "class")) |
1e9bafca | 117 | chAttr = SCE_RB_CLASSNAME; |
65ec6247 | 118 | else if (0 == strcmp(prevWord, "module")) |
1e9bafca | 119 | chAttr = SCE_RB_MODULE_NAME; |
65ec6247 | 120 | else if (0 == strcmp(prevWord, "def")) |
1e9bafca RD |
121 | chAttr = SCE_RB_DEFNAME; |
122 | else if (keywords.InList(s) && !followsDot(start - 1, styler)) { | |
123 | if (keywordIsAmbiguous(s) | |
124 | && keywordIsModifier(s, start, styler)) { | |
1dcf666d | 125 | |
1e9bafca RD |
126 | // Demoted keywords are colored as keywords, |
127 | // but do not affect changes in indentation. | |
128 | // | |
129 | // Consider the word 'if': | |
130 | // 1. <<if test ...>> : normal | |
131 | // 2. <<stmt if test>> : demoted | |
132 | // 3. <<lhs = if ...>> : normal: start a new indent level | |
133 | // 4. <<obj.if = 10>> : color as identifer, since it follows '.' | |
1dcf666d | 134 | |
1e9bafca RD |
135 | chAttr = SCE_RB_WORD_DEMOTED; |
136 | } else { | |
137 | chAttr = SCE_RB_WORD; | |
138 | } | |
139 | } else | |
140 | chAttr = SCE_RB_IDENTIFIER; | |
141 | styler.ColourTo(end, chAttr); | |
142 | if (chAttr == SCE_RB_WORD) { | |
143 | strcpy(prevWord, s); | |
144 | } else { | |
145 | prevWord[0] = 0; | |
146 | } | |
147 | return chAttr; | |
148 | } | |
149 | ||
150 | ||
151 | //XXX Identical to Perl, put in common area | |
152 | static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) { | |
153 | if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) { | |
154 | return false; | |
155 | } | |
156 | while (*val) { | |
157 | if (*val != styler[pos++]) { | |
158 | return false; | |
65ec6247 | 159 | } |
1e9bafca | 160 | val++; |
65ec6247 | 161 | } |
1e9bafca | 162 | return true; |
65ec6247 RD |
163 | } |
164 | ||
1e9bafca RD |
165 | // Do Ruby better -- find the end of the line, work back, |
166 | // and then check for leading white space | |
167 | ||
168 | // Precondition: the here-doc target can be indented | |
169 | static bool lookingAtHereDocDelim(Accessor &styler, | |
170 | int pos, | |
171 | int lengthDoc, | |
172 | const char *HereDocDelim) | |
173 | { | |
174 | if (!isMatch(styler, lengthDoc, pos, HereDocDelim)) { | |
175 | return false; | |
176 | } | |
177 | while (--pos > 0) { | |
178 | char ch = styler[pos]; | |
179 | if (isEOLChar(ch)) { | |
180 | return true; | |
181 | } else if (ch != ' ' && ch != '\t') { | |
182 | return false; | |
183 | } | |
184 | } | |
185 | return false; | |
65ec6247 RD |
186 | } |
187 | ||
1e9bafca RD |
188 | //XXX Identical to Perl, put in common area |
189 | static char opposite(char ch) { | |
190 | if (ch == '(') | |
191 | return ')'; | |
192 | if (ch == '[') | |
193 | return ']'; | |
194 | if (ch == '{') | |
195 | return '}'; | |
196 | if (ch == '<') | |
197 | return '>'; | |
198 | return ch; | |
199 | } | |
200 | ||
201 | // Null transitions when we see we've reached the end | |
202 | // and need to relex the curr char. | |
65ec6247 | 203 | |
1e9bafca RD |
204 | static void redo_char(int &i, char &ch, char &chNext, char &chNext2, |
205 | int &state) { | |
206 | i--; | |
207 | chNext2 = chNext; | |
208 | chNext = ch; | |
209 | state = SCE_RB_DEFAULT; | |
65ec6247 RD |
210 | } |
211 | ||
1e9bafca RD |
212 | static void advance_char(int &i, char &ch, char &chNext, char &chNext2) { |
213 | i++; | |
214 | ch = chNext; | |
215 | chNext = chNext2; | |
65ec6247 RD |
216 | } |
217 | ||
1e9bafca RD |
218 | // precondition: startPos points to one after the EOL char |
219 | static bool currLineContainsHereDelims(int& startPos, | |
220 | Accessor &styler) { | |
221 | if (startPos <= 1) | |
222 | return false; | |
65ec6247 | 223 | |
1e9bafca RD |
224 | int pos; |
225 | for (pos = startPos - 1; pos > 0; pos--) { | |
226 | char ch = styler.SafeGetCharAt(pos); | |
227 | if (isEOLChar(ch)) { | |
228 | // Leave the pointers where they are -- there are no | |
229 | // here doc delims on the current line, even if | |
230 | // the EOL isn't default style | |
1dcf666d | 231 | |
1e9bafca RD |
232 | return false; |
233 | } else { | |
234 | styler.Flush(); | |
235 | if (actual_style(styler.StyleAt(pos)) == SCE_RB_HERE_DELIM) { | |
236 | break; | |
237 | } | |
238 | } | |
239 | } | |
240 | if (pos == 0) { | |
241 | return false; | |
242 | } | |
243 | // Update the pointers so we don't have to re-analyze the string | |
244 | startPos = pos; | |
245 | return true; | |
246 | } | |
65ec6247 | 247 | |
7e0c58e9 RD |
248 | // This class is used by the enter and exit methods, so it needs |
249 | // to be hoisted out of the function. | |
250 | ||
251 | class QuoteCls { | |
252 | public: | |
253 | int Count; | |
254 | char Up; | |
255 | char Down; | |
256 | QuoteCls() { | |
257 | this->New(); | |
258 | } | |
259 | void New() { | |
260 | Count = 0; | |
261 | Up = '\0'; | |
262 | Down = '\0'; | |
263 | } | |
264 | void Open(char u) { | |
265 | Count++; | |
266 | Up = u; | |
267 | Down = opposite(Up); | |
268 | } | |
269 | QuoteCls(const QuoteCls& q) { | |
270 | // copy constructor -- use this for copying in | |
271 | Count = q.Count; | |
272 | Up = q.Up; | |
273 | Down = q.Down; | |
274 | } | |
275 | QuoteCls& operator=(const QuoteCls& q) { // assignment constructor | |
276 | if (this != &q) { | |
277 | Count = q.Count; | |
278 | Up = q.Up; | |
279 | Down = q.Down; | |
280 | } | |
281 | return *this; | |
282 | } | |
1dcf666d | 283 | |
7e0c58e9 RD |
284 | }; |
285 | ||
286 | ||
287 | static void enterInnerExpression(int *p_inner_string_types, | |
288 | int *p_inner_expn_brace_counts, | |
289 | QuoteCls *p_inner_quotes, | |
290 | int& inner_string_count, | |
291 | int& state, | |
292 | int& brace_counts, | |
293 | QuoteCls curr_quote | |
294 | ) { | |
295 | p_inner_string_types[inner_string_count] = state; | |
296 | state = SCE_RB_DEFAULT; | |
297 | p_inner_expn_brace_counts[inner_string_count] = brace_counts; | |
298 | brace_counts = 0; | |
299 | p_inner_quotes[inner_string_count] = curr_quote; | |
300 | ++inner_string_count; | |
301 | } | |
302 | ||
303 | static void exitInnerExpression(int *p_inner_string_types, | |
304 | int *p_inner_expn_brace_counts, | |
305 | QuoteCls *p_inner_quotes, | |
306 | int& inner_string_count, | |
307 | int& state, | |
308 | int& brace_counts, | |
309 | QuoteCls& curr_quote | |
310 | ) { | |
311 | --inner_string_count; | |
312 | state = p_inner_string_types[inner_string_count]; | |
313 | brace_counts = p_inner_expn_brace_counts[inner_string_count]; | |
314 | curr_quote = p_inner_quotes[inner_string_count]; | |
315 | } | |
65ec6247 | 316 | |
1e9bafca RD |
317 | static bool isEmptyLine(int pos, |
318 | Accessor &styler) { | |
319 | int spaceFlags = 0; | |
320 | int lineCurrent = styler.GetLine(pos); | |
321 | int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL); | |
322 | return (indentCurrent & SC_FOLDLEVELWHITEFLAG) != 0; | |
323 | } | |
8e54aaed | 324 | |
1e9bafca RD |
325 | static bool RE_CanFollowKeyword(const char *keyword) { |
326 | if (!strcmp(keyword, "and") | |
327 | || !strcmp(keyword, "begin") | |
328 | || !strcmp(keyword, "break") | |
329 | || !strcmp(keyword, "case") | |
330 | || !strcmp(keyword, "do") | |
331 | || !strcmp(keyword, "else") | |
332 | || !strcmp(keyword, "elsif") | |
333 | || !strcmp(keyword, "if") | |
334 | || !strcmp(keyword, "next") | |
335 | || !strcmp(keyword, "return") | |
336 | || !strcmp(keyword, "when") | |
337 | || !strcmp(keyword, "unless") | |
338 | || !strcmp(keyword, "until") | |
339 | || !strcmp(keyword, "not") | |
340 | || !strcmp(keyword, "or")) { | |
341 | return true; | |
342 | } | |
343 | return false; | |
344 | } | |
65ec6247 | 345 | |
1e9bafca RD |
346 | // Look at chars up to but not including endPos |
347 | // Don't look at styles in case we're looking forward | |
65ec6247 | 348 | |
1e9bafca RD |
349 | static int skipWhitespace(int startPos, |
350 | int endPos, | |
351 | Accessor &styler) { | |
352 | for (int i = startPos; i < endPos; i++) { | |
353 | if (!iswhitespace(styler[i])) { | |
354 | return i; | |
355 | } | |
356 | } | |
357 | return endPos; | |
358 | } | |
1dcf666d | 359 | |
1e9bafca RD |
360 | // This routine looks for false positives like |
361 | // undef foo, << | |
362 | // There aren't too many. | |
363 | // | |
364 | // iPrev points to the start of << | |
365 | ||
7e0c58e9 | 366 | static bool sureThisIsHeredoc(int iPrev, |
1e9bafca RD |
367 | Accessor &styler, |
368 | char *prevWord) { | |
1dcf666d | 369 | |
1e9bafca RD |
370 | // Not so fast, since Ruby's so dynamic. Check the context |
371 | // to make sure we're OK. | |
372 | int prevStyle; | |
373 | int lineStart = styler.GetLine(iPrev); | |
374 | int lineStartPosn = styler.LineStart(lineStart); | |
375 | styler.Flush(); | |
376 | ||
377 | // Find the first word after some whitespace | |
378 | int firstWordPosn = skipWhitespace(lineStartPosn, iPrev, styler); | |
379 | if (firstWordPosn >= iPrev) { | |
380 | // Have something like {^ <<} | |
381 | //XXX Look at the first previous non-comment non-white line | |
382 | // to establish the context. Not too likely though. | |
383 | return true; | |
384 | } else { | |
385 | switch (prevStyle = styler.StyleAt(firstWordPosn)) { | |
386 | case SCE_RB_WORD: | |
387 | case SCE_RB_WORD_DEMOTED: | |
388 | case SCE_RB_IDENTIFIER: | |
389 | break; | |
390 | default: | |
391 | return true; | |
392 | } | |
393 | } | |
394 | int firstWordEndPosn = firstWordPosn; | |
395 | char *dst = prevWord; | |
396 | for (;;) { | |
397 | if (firstWordEndPosn >= iPrev || | |
398 | styler.StyleAt(firstWordEndPosn) != prevStyle) { | |
399 | *dst = 0; | |
400 | break; | |
401 | } | |
402 | *dst++ = styler[firstWordEndPosn]; | |
403 | firstWordEndPosn += 1; | |
404 | } | |
405 | //XXX Write a style-aware thing to regex scintilla buffer objects | |
406 | if (!strcmp(prevWord, "undef") | |
407 | || !strcmp(prevWord, "def") | |
408 | || !strcmp(prevWord, "alias")) { | |
409 | // These keywords are what we were looking for | |
410 | return false; | |
411 | } | |
412 | return true; | |
413 | } | |
414 | ||
415 | // Routine that saves us from allocating a buffer for the here-doc target | |
416 | // targetEndPos points one past the end of the current target | |
417 | static bool haveTargetMatch(int currPos, | |
418 | int lengthDoc, | |
419 | int targetStartPos, | |
420 | int targetEndPos, | |
421 | Accessor &styler) { | |
422 | if (lengthDoc - currPos < targetEndPos - targetStartPos) { | |
423 | return false; | |
424 | } | |
425 | int i, j; | |
426 | for (i = targetStartPos, j = currPos; | |
427 | i < targetEndPos && j < lengthDoc; | |
428 | i++, j++) { | |
429 | if (styler[i] != styler[j]) { | |
430 | return false; | |
431 | } | |
432 | } | |
433 | return true; | |
434 | } | |
435 | ||
436 | // We need a check because the form | |
437 | // [identifier] <<[target] | |
438 | // is ambiguous. The Ruby lexer/parser resolves it by | |
439 | // looking to see if [identifier] names a variable or a | |
440 | // function. If it's the first, it's the start of a here-doc. | |
441 | // If it's a var, it's an operator. This lexer doesn't | |
442 | // maintain a symbol table, so it looks ahead to see what's | |
443 | // going on, in cases where we have | |
444 | // ^[white-space]*[identifier([.|::]identifier)*][white-space]*<<[target] | |
445 | // | |
446 | // If there's no occurrence of [target] on a line, assume we don't. | |
447 | ||
448 | // return true == yes, we have no heredocs | |
449 | ||
450 | static bool sureThisIsNotHeredoc(int lt2StartPos, | |
451 | Accessor &styler) { | |
452 | int prevStyle; | |
453 | // Use full document, not just part we're styling | |
454 | int lengthDoc = styler.Length(); | |
455 | int lineStart = styler.GetLine(lt2StartPos); | |
456 | int lineStartPosn = styler.LineStart(lineStart); | |
457 | styler.Flush(); | |
458 | const bool definitely_not_a_here_doc = true; | |
459 | const bool looks_like_a_here_doc = false; | |
1dcf666d | 460 | |
1e9bafca RD |
461 | // Find the first word after some whitespace |
462 | int firstWordPosn = skipWhitespace(lineStartPosn, lt2StartPos, styler); | |
463 | if (firstWordPosn >= lt2StartPos) { | |
464 | return definitely_not_a_here_doc; | |
465 | } | |
466 | prevStyle = styler.StyleAt(firstWordPosn); | |
467 | // If we have '<<' following a keyword, it's not a heredoc | |
468 | if (prevStyle != SCE_RB_IDENTIFIER) { | |
469 | return definitely_not_a_here_doc; | |
470 | } | |
471 | int newStyle = prevStyle; | |
472 | // Some compilers incorrectly warn about uninit newStyle | |
473 | for (firstWordPosn += 1; firstWordPosn <= lt2StartPos; firstWordPosn += 1) { | |
474 | // Inner loop looks at the name | |
475 | for (; firstWordPosn <= lt2StartPos; firstWordPosn += 1) { | |
476 | newStyle = styler.StyleAt(firstWordPosn); | |
477 | if (newStyle != prevStyle) { | |
478 | break; | |
479 | } | |
480 | } | |
481 | // Do we have '::' or '.'? | |
482 | if (firstWordPosn < lt2StartPos && newStyle == SCE_RB_OPERATOR) { | |
483 | char ch = styler[firstWordPosn]; | |
484 | if (ch == '.') { | |
485 | // yes | |
486 | } else if (ch == ':') { | |
487 | if (styler.StyleAt(++firstWordPosn) != SCE_RB_OPERATOR) { | |
488 | return definitely_not_a_here_doc; | |
489 | } else if (styler[firstWordPosn] != ':') { | |
490 | return definitely_not_a_here_doc; | |
491 | } | |
492 | } else { | |
493 | break; | |
494 | } | |
495 | } else { | |
496 | break; | |
497 | } | |
498 | } | |
499 | // Skip next batch of white-space | |
500 | firstWordPosn = skipWhitespace(firstWordPosn, lt2StartPos, styler); | |
501 | if (firstWordPosn != lt2StartPos) { | |
502 | // Have [[^ws[identifier]ws[*something_else*]ws<< | |
503 | return definitely_not_a_here_doc; | |
504 | } | |
505 | // OK, now 'j' will point to the current spot moving ahead | |
506 | int j = firstWordPosn + 1; | |
507 | if (styler.StyleAt(j) != SCE_RB_OPERATOR || styler[j] != '<') { | |
508 | // This shouldn't happen | |
509 | return definitely_not_a_here_doc; | |
510 | } | |
511 | int nextLineStartPosn = styler.LineStart(lineStart + 1); | |
512 | if (nextLineStartPosn >= lengthDoc) { | |
513 | return definitely_not_a_here_doc; | |
514 | } | |
515 | j = skipWhitespace(j + 1, nextLineStartPosn, styler); | |
516 | if (j >= lengthDoc) { | |
517 | return definitely_not_a_here_doc; | |
518 | } | |
519 | bool allow_indent; | |
520 | int target_start, target_end; | |
521 | // From this point on no more styling, since we're looking ahead | |
522 | if (styler[j] == '-') { | |
523 | allow_indent = true; | |
524 | j++; | |
525 | } else { | |
526 | allow_indent = false; | |
527 | } | |
528 | ||
529 | // Allow for quoted targets. | |
530 | char target_quote = 0; | |
531 | switch (styler[j]) { | |
532 | case '\'': | |
533 | case '"': | |
534 | case '`': | |
535 | target_quote = styler[j]; | |
536 | j += 1; | |
537 | } | |
1dcf666d | 538 | |
1e9bafca RD |
539 | if (isSafeAlnum(styler[j])) { |
540 | // Init target_end because some compilers think it won't | |
541 | // be initialized by the time it's used | |
542 | target_start = target_end = j; | |
543 | j++; | |
544 | } else { | |
545 | return definitely_not_a_here_doc; | |
546 | } | |
547 | for (; j < lengthDoc; j++) { | |
548 | if (!isSafeAlnum(styler[j])) { | |
549 | if (target_quote && styler[j] != target_quote) { | |
550 | // unquoted end | |
551 | return definitely_not_a_here_doc; | |
552 | } | |
553 | ||
554 | // And for now make sure that it's a newline | |
555 | // don't handle arbitrary expressions yet | |
1dcf666d | 556 | |
1e9bafca RD |
557 | target_end = j; |
558 | if (target_quote) { | |
559 | // Now we can move to the character after the string delimiter. | |
560 | j += 1; | |
561 | } | |
562 | j = skipWhitespace(j, lengthDoc, styler); | |
563 | if (j >= lengthDoc) { | |
564 | return definitely_not_a_here_doc; | |
565 | } else { | |
566 | char ch = styler[j]; | |
567 | if (ch == '#' || isEOLChar(ch)) { | |
568 | // This is OK, so break and continue; | |
569 | break; | |
570 | } else { | |
571 | return definitely_not_a_here_doc; | |
572 | } | |
573 | } | |
574 | } | |
575 | } | |
576 | ||
577 | // Just look at the start of each line | |
578 | int last_line = styler.GetLine(lengthDoc - 1); | |
579 | // But don't go too far | |
580 | if (last_line > lineStart + 50) { | |
581 | last_line = lineStart + 50; | |
582 | } | |
583 | for (int line_num = lineStart + 1; line_num <= last_line; line_num++) { | |
584 | if (allow_indent) { | |
585 | j = skipWhitespace(styler.LineStart(line_num), lengthDoc, styler); | |
586 | } else { | |
587 | j = styler.LineStart(line_num); | |
588 | } | |
589 | // target_end is one past the end | |
590 | if (haveTargetMatch(j, lengthDoc, target_start, target_end, styler)) { | |
591 | // We got it | |
592 | return looks_like_a_here_doc; | |
593 | } | |
594 | } | |
595 | return definitely_not_a_here_doc; | |
596 | } | |
597 | ||
598 | //todo: if we aren't looking at a stdio character, | |
1dcf666d | 599 | // move to the start of the first line that is not in a |
1e9bafca RD |
600 | // multi-line construct |
601 | ||
602 | static void synchronizeDocStart(unsigned int& startPos, | |
603 | int &length, | |
604 | int &initStyle, | |
605 | Accessor &styler, | |
606 | bool skipWhiteSpace=false) { | |
607 | ||
608 | styler.Flush(); | |
609 | int style = actual_style(styler.StyleAt(startPos)); | |
610 | switch (style) { | |
611 | case SCE_RB_STDIN: | |
612 | case SCE_RB_STDOUT: | |
613 | case SCE_RB_STDERR: | |
614 | // Don't do anything else with these. | |
615 | return; | |
616 | } | |
1dcf666d | 617 | |
1e9bafca RD |
618 | int pos = startPos; |
619 | // Quick way to characterize each line | |
620 | int lineStart; | |
621 | for (lineStart = styler.GetLine(pos); lineStart > 0; lineStart--) { | |
622 | // Now look at the style before the previous line's EOL | |
623 | pos = styler.LineStart(lineStart) - 1; | |
624 | if (pos <= 10) { | |
625 | lineStart = 0; | |
626 | break; | |
627 | } | |
628 | char ch = styler.SafeGetCharAt(pos); | |
629 | char chPrev = styler.SafeGetCharAt(pos - 1); | |
630 | if (ch == '\n' && chPrev == '\r') { | |
631 | pos--; | |
632 | } | |
633 | if (styler.SafeGetCharAt(pos - 1) == '\\') { | |
634 | // Continuation line -- keep going | |
635 | } else if (actual_style(styler.StyleAt(pos)) != SCE_RB_DEFAULT) { | |
636 | // Part of multi-line construct -- keep going | |
637 | } else if (currLineContainsHereDelims(pos, styler)) { | |
638 | // Keep going, with pos and length now pointing | |
639 | // at the end of the here-doc delimiter | |
640 | } else if (skipWhiteSpace && isEmptyLine(pos, styler)) { | |
641 | // Keep going | |
642 | } else { | |
643 | break; | |
644 | } | |
645 | } | |
646 | pos = styler.LineStart(lineStart); | |
647 | length += (startPos - pos); | |
648 | startPos = pos; | |
649 | initStyle = SCE_RB_DEFAULT; | |
65ec6247 RD |
650 | } |
651 | ||
652 | static void ColouriseRbDoc(unsigned int startPos, int length, int initStyle, | |
653 | WordList *keywordlists[], Accessor &styler) { | |
654 | ||
1e9bafca RD |
655 | // Lexer for Ruby often has to backtrack to start of current style to determine |
656 | // which characters are being used as quotes, how deeply nested is the | |
657 | // start position and what the termination string is for here documents | |
1dcf666d | 658 | |
1e9bafca | 659 | WordList &keywords = *keywordlists[0]; |
65ec6247 | 660 | |
1e9bafca RD |
661 | class HereDocCls { |
662 | public: | |
663 | int State; | |
664 | // States | |
665 | // 0: '<<' encountered | |
666 | // 1: collect the delimiter | |
667 | // 1b: text between the end of the delimiter and the EOL | |
668 | // 2: here doc text (lines after the delimiter) | |
669 | char Quote; // the char after '<<' | |
670 | bool Quoted; // true if Quote in ('\'','"','`') | |
671 | int DelimiterLength; // strlen(Delimiter) | |
672 | char Delimiter[256]; // the Delimiter, limit of 256: from Perl | |
673 | bool CanBeIndented; | |
674 | HereDocCls() { | |
675 | State = 0; | |
676 | DelimiterLength = 0; | |
677 | Delimiter[0] = '\0'; | |
678 | CanBeIndented = false; | |
65ec6247 | 679 | } |
1e9bafca | 680 | }; |
1dcf666d | 681 | HereDocCls HereDoc; |
65ec6247 | 682 | |
1e9bafca | 683 | QuoteCls Quote; |
65ec6247 | 684 | |
1e9bafca RD |
685 | int numDots = 0; // For numbers -- |
686 | // Don't start lexing in the middle of a num | |
687 | ||
688 | synchronizeDocStart(startPos, length, initStyle, styler, // ref args | |
689 | false); | |
65ec6247 | 690 | |
1e9bafca RD |
691 | bool preferRE = true; |
692 | int state = initStyle; | |
693 | int lengthDoc = startPos + length; | |
694 | ||
695 | char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero | |
65ec6247 RD |
696 | prevWord[0] = '\0'; |
697 | if (length == 0) | |
1e9bafca | 698 | return; |
65ec6247 | 699 | |
1e9bafca RD |
700 | char chPrev = styler.SafeGetCharAt(startPos - 1); |
701 | char chNext = styler.SafeGetCharAt(startPos); | |
7e0c58e9 | 702 | bool is_real_number = true; // Differentiate between constants and ?-sequences. |
1e9bafca RD |
703 | // Ruby uses a different mask because bad indentation is marked by oring with 32 |
704 | styler.StartAt(startPos, 127); | |
65ec6247 | 705 | styler.StartSegment(startPos); |
65ec6247 | 706 | |
1e9bafca RD |
707 | static int q_states[] = {SCE_RB_STRING_Q, |
708 | SCE_RB_STRING_QQ, | |
709 | SCE_RB_STRING_QR, | |
710 | SCE_RB_STRING_QW, | |
711 | SCE_RB_STRING_QW, | |
712 | SCE_RB_STRING_QX}; | |
713 | static const char* q_chars = "qQrwWx"; | |
7e0c58e9 RD |
714 | |
715 | // In most cases a value of 2 should be ample for the code in the | |
716 | // Ruby library, and the code the user is likely to enter. | |
717 | // For example, | |
718 | // fu_output_message "mkdir #{options[:mode] ? ('-m %03o ' % options[:mode]) : ''}#{list.join ' '}" | |
719 | // if options[:verbose] | |
720 | // from fileutils.rb nests to a level of 2 | |
721 | // If the user actually hits a 6th occurrence of '#{' in a double-quoted | |
722 | // string (including regex'es, %Q, %<sym>, %w, and other strings | |
723 | // that interpolate), it will stay as a string. The problem with this | |
724 | // is that quotes might flip, a 7th '#{' will look like a comment, | |
725 | // and code-folding might be wrong. | |
726 | ||
727 | // If anyone runs into this problem, I recommend raising this | |
728 | // value slightly higher to replacing the fixed array with a linked | |
729 | // list. Keep in mind this code will be called everytime the lexer | |
730 | // is invoked. | |
731 | ||
732 | #define INNER_STRINGS_MAX_COUNT 5 | |
733 | // These vars track our instances of "...#{,,,%Q<..#{,,,}...>,,,}..." | |
734 | int inner_string_types[INNER_STRINGS_MAX_COUNT]; | |
735 | // Track # braces when we push a new #{ thing | |
736 | int inner_expn_brace_counts[INNER_STRINGS_MAX_COUNT]; | |
737 | QuoteCls inner_quotes[INNER_STRINGS_MAX_COUNT]; | |
738 | int inner_string_count = 0; | |
739 | int brace_counts = 0; // Number of #{ ... } things within an expression | |
740 | ||
741 | int i; | |
742 | for (i = 0; i < INNER_STRINGS_MAX_COUNT; i++) { | |
743 | inner_string_types[i] = 0; | |
744 | inner_expn_brace_counts[i] = 0; | |
745 | } | |
746 | for (i = startPos; i < lengthDoc; i++) { | |
65ec6247 RD |
747 | char ch = chNext; |
748 | chNext = styler.SafeGetCharAt(i + 1); | |
749 | char chNext2 = styler.SafeGetCharAt(i + 2); | |
750 | ||
1e9bafca RD |
751 | if (styler.IsLeadByte(ch)) { |
752 | chNext = chNext2; | |
65ec6247 | 753 | chPrev = ' '; |
65ec6247 RD |
754 | i += 1; |
755 | continue; | |
756 | } | |
1dcf666d | 757 | |
1e9bafca RD |
758 | // skip on DOS/Windows |
759 | //No, don't, because some things will get tagged on, | |
760 | // so we won't recognize keywords, for example | |
761 | #if 0 | |
762 | if (ch == '\r' && chNext == '\n') { | |
763 | continue; | |
764 | } | |
765 | #endif | |
1dcf666d | 766 | |
1e9bafca RD |
767 | if (HereDoc.State == 1 && isEOLChar(ch)) { |
768 | // Begin of here-doc (the line after the here-doc delimiter): | |
769 | HereDoc.State = 2; | |
770 | styler.ColourTo(i-1, state); | |
771 | // Don't check for a missing quote, just jump into | |
772 | // the here-doc state | |
773 | state = SCE_RB_HERE_Q; | |
774 | } | |
65ec6247 | 775 | |
1e9bafca RD |
776 | // Regular transitions |
777 | if (state == SCE_RB_DEFAULT) { | |
778 | if (isSafeDigit(ch)) { | |
779 | styler.ColourTo(i - 1, state); | |
780 | state = SCE_RB_NUMBER; | |
7e0c58e9 | 781 | is_real_number = true; |
1e9bafca RD |
782 | numDots = 0; |
783 | } else if (isHighBitChar(ch) || iswordstart(ch)) { | |
784 | styler.ColourTo(i - 1, state); | |
785 | state = SCE_RB_WORD; | |
65ec6247 RD |
786 | } else if (ch == '#') { |
787 | styler.ColourTo(i - 1, state); | |
1e9bafca RD |
788 | state = SCE_RB_COMMENTLINE; |
789 | } else if (ch == '=') { | |
65ec6247 | 790 | // =begin indicates the start of a comment (doc) block |
1dcf666d | 791 | if ((i == 0 || isEOLChar(chPrev)) |
1e9bafca RD |
792 | && chNext == 'b' |
793 | && styler.SafeGetCharAt(i + 2) == 'e' | |
794 | && styler.SafeGetCharAt(i + 3) == 'g' | |
795 | && styler.SafeGetCharAt(i + 4) == 'i' | |
796 | && styler.SafeGetCharAt(i + 5) == 'n' | |
1dcf666d | 797 | && !isSafeWordcharOrHigh(styler.SafeGetCharAt(i + 6))) { |
1e9bafca RD |
798 | styler.ColourTo(i - 1, state); |
799 | state = SCE_RB_POD; | |
800 | } else { | |
65ec6247 | 801 | styler.ColourTo(i - 1, state); |
1e9bafca RD |
802 | styler.ColourTo(i, SCE_RB_OPERATOR); |
803 | preferRE = true; | |
65ec6247 | 804 | } |
1e9bafca | 805 | } else if (ch == '"') { |
65ec6247 | 806 | styler.ColourTo(i - 1, state); |
1e9bafca RD |
807 | state = SCE_RB_STRING; |
808 | Quote.New(); | |
809 | Quote.Open(ch); | |
810 | } else if (ch == '\'') { | |
811 | styler.ColourTo(i - 1, state); | |
812 | state = SCE_RB_CHARACTER; | |
813 | Quote.New(); | |
814 | Quote.Open(ch); | |
815 | } else if (ch == '`') { | |
65ec6247 | 816 | styler.ColourTo(i - 1, state); |
1e9bafca RD |
817 | state = SCE_RB_BACKTICKS; |
818 | Quote.New(); | |
819 | Quote.Open(ch); | |
820 | } else if (ch == '@') { | |
821 | // Instance or class var | |
822 | styler.ColourTo(i - 1, state); | |
823 | if (chNext == '@') { | |
824 | state = SCE_RB_CLASS_VAR; | |
825 | advance_char(i, ch, chNext, chNext2); // pass by ref | |
826 | } else { | |
827 | state = SCE_RB_INSTANCE_VAR; | |
828 | } | |
829 | } else if (ch == '$') { | |
830 | // Check for a builtin global | |
831 | styler.ColourTo(i - 1, state); | |
832 | // Recognize it bit by bit | |
833 | state = SCE_RB_GLOBAL; | |
834 | } else if (ch == '/' && preferRE) { | |
835 | // Ambigous operator | |
836 | styler.ColourTo(i - 1, state); | |
837 | state = SCE_RB_REGEX; | |
838 | Quote.New(); | |
839 | Quote.Open(ch); | |
840 | } else if (ch == '<' && chNext == '<' && chNext2 != '=') { | |
841 | ||
842 | // Recognise the '<<' symbol - either a here document or a binary op | |
843 | styler.ColourTo(i - 1, state); | |
844 | i++; | |
845 | chNext = chNext2; | |
846 | styler.ColourTo(i, SCE_RB_OPERATOR); | |
847 | ||
848 | if (! (strchr("\"\'`_-", chNext2) || isSafeAlpha(chNext2))) { | |
849 | // It's definitely not a here-doc, | |
850 | // based on Ruby's lexer/parser in the | |
851 | // heredoc_identifier routine. | |
852 | // Nothing else to do. | |
853 | } else if (preferRE) { | |
854 | if (sureThisIsHeredoc(i - 1, styler, prevWord)) { | |
855 | state = SCE_RB_HERE_DELIM; | |
856 | HereDoc.State = 0; | |
857 | } | |
858 | // else leave it in default state | |
859 | } else { | |
860 | if (sureThisIsNotHeredoc(i - 1, styler)) { | |
861 | // leave state as default | |
862 | // We don't have all the heuristics Perl has for indications | |
863 | // of a here-doc, because '<<' is overloadable and used | |
864 | // for so many other classes. | |
865 | } else { | |
866 | state = SCE_RB_HERE_DELIM; | |
867 | HereDoc.State = 0; | |
868 | } | |
869 | } | |
870 | preferRE = (state != SCE_RB_HERE_DELIM); | |
871 | } else if (ch == ':') { | |
872 | styler.ColourTo(i - 1, state); | |
873 | if (chNext == ':') { | |
874 | // Mark "::" as an operator, not symbol start | |
875 | styler.ColourTo(i + 1, SCE_RB_OPERATOR); | |
876 | advance_char(i, ch, chNext, chNext2); // pass by ref | |
877 | state = SCE_RB_DEFAULT; | |
878 | preferRE = false; | |
879 | } else if (isSafeWordcharOrHigh(chNext)) { | |
880 | state = SCE_RB_SYMBOL; | |
881 | } else if (strchr("[*!~+-*/%=<>&^|", chNext)) { | |
882 | // Do the operator analysis in-line, looking ahead | |
883 | // Based on the table in pickaxe 2nd ed., page 339 | |
884 | bool doColoring = true; | |
885 | switch (chNext) { | |
886 | case '[': | |
887 | if (chNext2 == ']' ) { | |
888 | char ch_tmp = styler.SafeGetCharAt(i + 3); | |
889 | if (ch_tmp == '=') { | |
890 | i += 3; | |
891 | ch = ch_tmp; | |
892 | chNext = styler.SafeGetCharAt(i + 1); | |
893 | } else { | |
894 | i += 2; | |
895 | ch = chNext2; | |
896 | chNext = ch_tmp; | |
897 | } | |
898 | } else { | |
899 | doColoring = false; | |
900 | } | |
901 | break; | |
902 | ||
903 | case '*': | |
904 | if (chNext2 == '*') { | |
905 | i += 2; | |
906 | ch = chNext2; | |
907 | chNext = styler.SafeGetCharAt(i + 1); | |
908 | } else { | |
909 | advance_char(i, ch, chNext, chNext2); | |
910 | } | |
911 | break; | |
912 | ||
913 | case '!': | |
914 | if (chNext2 == '=' || chNext2 == '~') { | |
915 | i += 2; | |
916 | ch = chNext2; | |
917 | chNext = styler.SafeGetCharAt(i + 1); | |
918 | } else { | |
919 | advance_char(i, ch, chNext, chNext2); | |
920 | } | |
921 | break; | |
922 | ||
923 | case '<': | |
924 | if (chNext2 == '<') { | |
925 | i += 2; | |
926 | ch = chNext2; | |
927 | chNext = styler.SafeGetCharAt(i + 1); | |
928 | } else if (chNext2 == '=') { | |
929 | char ch_tmp = styler.SafeGetCharAt(i + 3); | |
930 | if (ch_tmp == '>') { // <=> operator | |
931 | i += 3; | |
932 | ch = ch_tmp; | |
933 | chNext = styler.SafeGetCharAt(i + 1); | |
934 | } else { | |
935 | i += 2; | |
936 | ch = chNext2; | |
937 | chNext = ch_tmp; | |
938 | } | |
939 | } else { | |
940 | advance_char(i, ch, chNext, chNext2); | |
941 | } | |
942 | break; | |
943 | ||
944 | default: | |
945 | // Simple one-character operators | |
946 | advance_char(i, ch, chNext, chNext2); | |
947 | break; | |
948 | } | |
949 | if (doColoring) { | |
950 | styler.ColourTo(i, SCE_RB_SYMBOL); | |
951 | state = SCE_RB_DEFAULT; | |
952 | } | |
953 | } else if (!preferRE) { | |
954 | // Don't color symbol strings (yet) | |
955 | // Just color the ":" and color rest as string | |
956 | styler.ColourTo(i, SCE_RB_SYMBOL); | |
957 | state = SCE_RB_DEFAULT; | |
958 | } else { | |
959 | styler.ColourTo(i, SCE_RB_OPERATOR); | |
960 | state = SCE_RB_DEFAULT; | |
961 | preferRE = true; | |
962 | } | |
963 | } else if (ch == '%') { | |
964 | styler.ColourTo(i - 1, state); | |
965 | bool have_string = false; | |
966 | if (strchr(q_chars, chNext) && !isSafeWordcharOrHigh(chNext2)) { | |
967 | Quote.New(); | |
968 | const char *hit = strchr(q_chars, chNext); | |
969 | if (hit != NULL) { | |
970 | state = q_states[hit - q_chars]; | |
971 | Quote.Open(chNext2); | |
972 | i += 2; | |
973 | ch = chNext2; | |
65ec6247 | 974 | chNext = styler.SafeGetCharAt(i + 1); |
1e9bafca RD |
975 | have_string = true; |
976 | } | |
7e0c58e9 | 977 | } else if (preferRE && !isSafeWordcharOrHigh(chNext)) { |
1e9bafca RD |
978 | // Ruby doesn't allow high bit chars here, |
979 | // but the editor host might | |
1dcf666d RD |
980 | Quote.New(); |
981 | state = SCE_RB_STRING_QQ; | |
982 | Quote.Open(chNext); | |
983 | advance_char(i, ch, chNext, chNext2); // pass by ref | |
984 | have_string = true; | |
985 | } else if (!isSafeWordcharOrHigh(chNext) && !iswhitespace(chNext) && !isEOLChar(chNext)) { | |
986 | // Ruby doesn't allow high bit chars here, | |
987 | // but the editor host might | |
988 | Quote.New(); | |
1e9bafca RD |
989 | state = SCE_RB_STRING_QQ; |
990 | Quote.Open(chNext); | |
991 | advance_char(i, ch, chNext, chNext2); // pass by ref | |
992 | have_string = true; | |
993 | } | |
994 | if (!have_string) { | |
995 | styler.ColourTo(i, SCE_RB_OPERATOR); | |
996 | // stay in default | |
997 | preferRE = true; | |
998 | } | |
7e0c58e9 RD |
999 | } else if (ch == '?') { |
1000 | styler.ColourTo(i - 1, state); | |
1001 | if (iswhitespace(chNext) || chNext == '\n' || chNext == '\r') { | |
1002 | styler.ColourTo(i, SCE_RB_OPERATOR); | |
1003 | } else { | |
1004 | // It's the start of a character code escape sequence | |
1005 | // Color it as a number. | |
1006 | state = SCE_RB_NUMBER; | |
1007 | is_real_number = false; | |
1008 | } | |
1e9bafca RD |
1009 | } else if (isoperator(ch) || ch == '.') { |
1010 | styler.ColourTo(i - 1, state); | |
1011 | styler.ColourTo(i, SCE_RB_OPERATOR); | |
1012 | // If we're ending an expression or block, | |
1013 | // assume it ends an object, and the ambivalent | |
1014 | // constructs are binary operators | |
1015 | // | |
1016 | // So if we don't have one of these chars, | |
1017 | // we aren't ending an object exp'n, and ops | |
1018 | // like : << / are unary operators. | |
1dcf666d | 1019 | |
7e0c58e9 RD |
1020 | if (ch == '{') { |
1021 | ++brace_counts; | |
1022 | preferRE = true; | |
1023 | } else if (ch == '}' && --brace_counts < 0 | |
1024 | && inner_string_count > 0) { | |
1025 | styler.ColourTo(i, SCE_RB_OPERATOR); | |
1026 | exitInnerExpression(inner_string_types, | |
1027 | inner_expn_brace_counts, | |
1028 | inner_quotes, | |
1029 | inner_string_count, | |
1030 | state, brace_counts, Quote); | |
1031 | } else { | |
1032 | preferRE = (strchr(")}].", ch) == NULL); | |
1033 | } | |
1e9bafca RD |
1034 | // Stay in default state |
1035 | } else if (isEOLChar(ch)) { | |
1036 | // Make sure it's a true line-end, with no backslash | |
1037 | if ((ch == '\r' || (ch == '\n' && chPrev != '\r')) | |
1038 | && chPrev != '\\') { | |
1039 | // Assume we've hit the end of the statement. | |
1040 | preferRE = true; | |
1041 | } | |
1042 | } | |
1043 | } else if (state == SCE_RB_WORD) { | |
1044 | if (ch == '.' || !isSafeWordcharOrHigh(ch)) { | |
1045 | // Words include x? in all contexts, | |
1046 | // and <letters>= after either 'def' or a dot | |
1047 | // Move along until a complete word is on our left | |
1048 | ||
1049 | // Default accessor treats '.' as word-chars, | |
1050 | // but we don't for now. | |
1dcf666d | 1051 | |
1e9bafca RD |
1052 | if (ch == '=' |
1053 | && isSafeWordcharOrHigh(chPrev) | |
1054 | && (chNext == '(' | |
1055 | || strchr(" \t\n\r", chNext) != NULL) | |
1056 | && (!strcmp(prevWord, "def") | |
1057 | || followsDot(styler.GetStartSegment(), styler))) { | |
1058 | // <name>= is a name only when being def'd -- Get it the next time | |
1059 | // This means that <name>=<name> is always lexed as | |
1060 | // <name>, (op, =), <name> | |
1061 | } else if ((ch == '?' || ch == '!') | |
1062 | && isSafeWordcharOrHigh(chPrev) | |
1063 | && !isSafeWordcharOrHigh(chNext)) { | |
1064 | // <name>? is a name -- Get it the next time | |
1065 | // But <name>?<name> is always lexed as | |
1066 | // <name>, (op, ?), <name> | |
1067 | // Same with <name>! to indicate a method that | |
1068 | // modifies its target | |
1069 | } else if (isEOLChar(ch) | |
1070 | && isMatch(styler, lengthDoc, i - 7, "__END__")) { | |
1071 | styler.ColourTo(i, SCE_RB_DATASECTION); | |
1072 | state = SCE_RB_DATASECTION; | |
1073 | // No need to handle this state -- we'll just move to the end | |
1074 | preferRE = false; | |
1075 | } else { | |
1076 | int wordStartPos = styler.GetStartSegment(); | |
1077 | int word_style = ClassifyWordRb(wordStartPos, i - 1, keywords, styler, prevWord); | |
1078 | switch (word_style) { | |
1079 | case SCE_RB_WORD: | |
1080 | preferRE = RE_CanFollowKeyword(prevWord); | |
1081 | break; | |
1dcf666d | 1082 | |
1e9bafca RD |
1083 | case SCE_RB_WORD_DEMOTED: |
1084 | preferRE = true; | |
1085 | break; | |
1dcf666d | 1086 | |
1e9bafca RD |
1087 | case SCE_RB_IDENTIFIER: |
1088 | if (isMatch(styler, lengthDoc, wordStartPos, "print")) { | |
1089 | preferRE = true; | |
1090 | } else if (isEOLChar(ch)) { | |
1091 | preferRE = true; | |
1092 | } else { | |
1093 | preferRE = false; | |
1094 | } | |
1095 | break; | |
1096 | default: | |
1097 | preferRE = false; | |
1098 | } | |
1099 | if (ch == '.') { | |
1100 | // We might be redefining an operator-method | |
1101 | preferRE = false; | |
1102 | } | |
1dcf666d | 1103 | // And if it's the first |
1e9bafca RD |
1104 | redo_char(i, ch, chNext, chNext2, state); // pass by ref |
1105 | } | |
1106 | } | |
1107 | } else if (state == SCE_RB_NUMBER) { | |
7e0c58e9 RD |
1108 | if (!is_real_number) { |
1109 | if (ch != '\\') { | |
1110 | styler.ColourTo(i, state); | |
1111 | state = SCE_RB_DEFAULT; | |
1112 | preferRE = false; | |
1113 | } else if (strchr("\\ntrfvaebs", chNext)) { | |
1114 | // Terminal escape sequence -- handle it next time | |
1115 | // Nothing more to do this time through the loop | |
1116 | } else if (chNext == 'C' || chNext == 'M') { | |
1117 | if (chNext2 != '-') { | |
1118 | // \C or \M ends the sequence -- handle it next time | |
1119 | } else { | |
1120 | // Move from abc?\C-x | |
1121 | // ^ | |
1122 | // to | |
1123 | // ^ | |
1124 | i += 2; | |
1125 | ch = chNext2; | |
1126 | chNext = styler.SafeGetCharAt(i + 1); | |
1127 | } | |
1128 | } else if (chNext == 'c') { | |
1129 | // Stay here, \c is a combining sequence | |
1130 | advance_char(i, ch, chNext, chNext2); // pass by ref | |
1131 | } else { | |
1132 | // ?\x, including ?\\ is final. | |
1133 | styler.ColourTo(i + 1, state); | |
1134 | state = SCE_RB_DEFAULT; | |
1135 | preferRE = false; | |
1136 | advance_char(i, ch, chNext, chNext2); | |
1137 | } | |
1138 | } else if (isSafeAlnumOrHigh(ch) || ch == '_') { | |
1e9bafca | 1139 | // Keep going |
1dcf666d RD |
1140 | } else if (ch == '.' && chNext == '.') { |
1141 | ++numDots; | |
1142 | styler.ColourTo(i - 1, state); | |
1143 | redo_char(i, ch, chNext, chNext2, state); // pass by ref | |
1e9bafca RD |
1144 | } else if (ch == '.' && ++numDots == 1) { |
1145 | // Keep going | |
1146 | } else { | |
1147 | styler.ColourTo(i - 1, state); | |
1148 | redo_char(i, ch, chNext, chNext2, state); // pass by ref | |
1149 | preferRE = false; | |
1150 | } | |
1151 | } else if (state == SCE_RB_COMMENTLINE) { | |
1152 | if (isEOLChar(ch)) { | |
1153 | styler.ColourTo(i - 1, state); | |
1154 | state = SCE_RB_DEFAULT; | |
1155 | // Use whatever setting we had going into the comment | |
1156 | } | |
1157 | } else if (state == SCE_RB_HERE_DELIM) { | |
1158 | // See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx | |
1159 | // Slightly different: if we find an immediate '-', | |
1160 | // the target can appear indented. | |
1dcf666d | 1161 | |
1e9bafca RD |
1162 | if (HereDoc.State == 0) { // '<<' encountered |
1163 | HereDoc.State = 1; | |
1164 | HereDoc.DelimiterLength = 0; | |
1165 | if (ch == '-') { | |
1166 | HereDoc.CanBeIndented = true; | |
1167 | advance_char(i, ch, chNext, chNext2); // pass by ref | |
1168 | } else { | |
1169 | HereDoc.CanBeIndented = false; | |
1170 | } | |
1171 | if (isEOLChar(ch)) { | |
1172 | // Bail out of doing a here doc if there's no target | |
1173 | state = SCE_RB_DEFAULT; | |
1174 | preferRE = false; | |
1175 | } else { | |
1176 | HereDoc.Quote = ch; | |
1dcf666d | 1177 | |
1e9bafca RD |
1178 | if (ch == '\'' || ch == '"' || ch == '`') { |
1179 | HereDoc.Quoted = true; | |
1180 | HereDoc.Delimiter[0] = '\0'; | |
1181 | } else { | |
1182 | HereDoc.Quoted = false; | |
1183 | HereDoc.Delimiter[0] = ch; | |
1184 | HereDoc.Delimiter[1] = '\0'; | |
1185 | HereDoc.DelimiterLength = 1; | |
1186 | } | |
1187 | } | |
1188 | } else if (HereDoc.State == 1) { // collect the delimiter | |
1189 | if (isEOLChar(ch)) { | |
1190 | // End the quote now, and go back for more | |
1191 | styler.ColourTo(i - 1, state); | |
1192 | state = SCE_RB_DEFAULT; | |
1193 | i--; | |
1194 | chNext = ch; | |
1e9bafca RD |
1195 | preferRE = false; |
1196 | } else if (HereDoc.Quoted) { | |
1197 | if (ch == HereDoc.Quote) { // closing quote => end of delimiter | |
1198 | styler.ColourTo(i, state); | |
1199 | state = SCE_RB_DEFAULT; | |
1200 | preferRE = false; | |
1201 | } else { | |
1202 | if (ch == '\\' && !isEOLChar(chNext)) { | |
1203 | advance_char(i, ch, chNext, chNext2); | |
1204 | } | |
1205 | HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch; | |
1206 | HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; | |
1207 | } | |
1208 | } else { // an unquoted here-doc delimiter | |
1209 | if (isSafeAlnumOrHigh(ch) || ch == '_') { | |
1210 | HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch; | |
1211 | HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; | |
1212 | } else { | |
1213 | styler.ColourTo(i - 1, state); | |
1214 | redo_char(i, ch, chNext, chNext2, state); | |
1215 | preferRE = false; | |
65ec6247 | 1216 | } |
1e9bafca RD |
1217 | } |
1218 | if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) { | |
65ec6247 | 1219 | styler.ColourTo(i - 1, state); |
1e9bafca RD |
1220 | state = SCE_RB_ERROR; |
1221 | preferRE = false; | |
65ec6247 | 1222 | } |
1e9bafca RD |
1223 | } |
1224 | } else if (state == SCE_RB_HERE_Q) { | |
1225 | // Not needed: HereDoc.State == 2 | |
1226 | // Indentable here docs: look backwards | |
1227 | // Non-indentable: look forwards, like in Perl | |
1228 | // | |
1229 | // Why: so we can quickly resolve things like <<-" abc" | |
1230 | ||
1231 | if (!HereDoc.CanBeIndented) { | |
1232 | if (isEOLChar(chPrev) | |
1233 | && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) { | |
1234 | styler.ColourTo(i - 1, state); | |
1235 | i += HereDoc.DelimiterLength - 1; | |
1236 | chNext = styler.SafeGetCharAt(i + 1); | |
1237 | if (isEOLChar(chNext)) { | |
1238 | styler.ColourTo(i, SCE_RB_HERE_DELIM); | |
1239 | state = SCE_RB_DEFAULT; | |
1240 | HereDoc.State = 0; | |
1241 | preferRE = false; | |
1242 | } | |
1243 | // Otherwise we skipped through the here doc faster. | |
1244 | } | |
1245 | } else if (isEOLChar(chNext) | |
1246 | && lookingAtHereDocDelim(styler, | |
1247 | i - HereDoc.DelimiterLength + 1, | |
1248 | lengthDoc, | |
1249 | HereDoc.Delimiter)) { | |
1250 | styler.ColourTo(i - 1 - HereDoc.DelimiterLength, state); | |
1251 | styler.ColourTo(i, SCE_RB_HERE_DELIM); | |
1252 | state = SCE_RB_DEFAULT; | |
1253 | preferRE = false; | |
1254 | HereDoc.State = 0; | |
1255 | } | |
1256 | } else if (state == SCE_RB_CLASS_VAR | |
1257 | || state == SCE_RB_INSTANCE_VAR | |
1258 | || state == SCE_RB_SYMBOL) { | |
1259 | if (!isSafeWordcharOrHigh(ch)) { | |
1260 | styler.ColourTo(i - 1, state); | |
1261 | redo_char(i, ch, chNext, chNext2, state); // pass by ref | |
1262 | preferRE = false; | |
1263 | } | |
1264 | } else if (state == SCE_RB_GLOBAL) { | |
1265 | if (!isSafeWordcharOrHigh(ch)) { | |
1266 | // handle special globals here as well | |
1267 | if (chPrev == '$') { | |
1268 | if (ch == '-') { | |
1269 | // Include the next char, like $-a | |
1270 | advance_char(i, ch, chNext, chNext2); | |
1271 | } | |
1272 | styler.ColourTo(i, state); | |
1273 | state = SCE_RB_DEFAULT; | |
1274 | } else { | |
1275 | styler.ColourTo(i - 1, state); | |
1276 | redo_char(i, ch, chNext, chNext2, state); // pass by ref | |
1277 | } | |
1278 | preferRE = false; | |
1279 | } | |
1280 | } else if (state == SCE_RB_POD) { | |
1281 | // PODs end with ^=end\s, -- any whitespace can follow =end | |
1282 | if (strchr(" \t\n\r", ch) != NULL | |
1283 | && i > 5 | |
1284 | && isEOLChar(styler[i - 5]) | |
1285 | && isMatch(styler, lengthDoc, i - 4, "=end")) { | |
1286 | styler.ColourTo(i - 1, state); | |
1287 | state = SCE_RB_DEFAULT; | |
1288 | preferRE = false; | |
1289 | } | |
1290 | } else if (state == SCE_RB_REGEX || state == SCE_RB_STRING_QR) { | |
1291 | if (ch == '\\' && Quote.Up != '\\') { | |
1292 | // Skip one | |
1293 | advance_char(i, ch, chNext, chNext2); | |
1294 | } else if (ch == Quote.Down) { | |
1295 | Quote.Count--; | |
1296 | if (Quote.Count == 0) { | |
1297 | // Include the options | |
1298 | while (isSafeAlpha(chNext)) { | |
1299 | i++; | |
65ec6247 | 1300 | ch = chNext; |
1e9bafca RD |
1301 | chNext = styler.SafeGetCharAt(i + 1); |
1302 | } | |
1303 | styler.ColourTo(i, state); | |
1304 | state = SCE_RB_DEFAULT; | |
1305 | preferRE = false; | |
1306 | } | |
1307 | } else if (ch == Quote.Up) { | |
1308 | // Only if close quoter != open quoter | |
1309 | Quote.Count++; | |
1dcf666d | 1310 | |
1e9bafca | 1311 | } else if (ch == '#' ) { |
7e0c58e9 RD |
1312 | if (chNext == '{' |
1313 | && inner_string_count < INNER_STRINGS_MAX_COUNT) { | |
1314 | // process #{ ... } | |
1315 | styler.ColourTo(i - 1, state); | |
1316 | styler.ColourTo(i + 1, SCE_RB_OPERATOR); | |
1317 | enterInnerExpression(inner_string_types, | |
1318 | inner_expn_brace_counts, | |
1319 | inner_quotes, | |
1320 | inner_string_count, | |
1321 | state, | |
1322 | brace_counts, | |
1323 | Quote); | |
1324 | preferRE = true; | |
1325 | // Skip one | |
1326 | advance_char(i, ch, chNext, chNext2); | |
1327 | } else { | |
1328 | //todo: distinguish comments from pound chars | |
1329 | // for now, handle as comment | |
1330 | styler.ColourTo(i - 1, state); | |
1331 | bool inEscape = false; | |
1332 | while (++i < lengthDoc) { | |
1333 | ch = styler.SafeGetCharAt(i); | |
1334 | if (ch == '\\') { | |
1335 | inEscape = true; | |
1336 | } else if (isEOLChar(ch)) { | |
1337 | // Comment inside a regex | |
1338 | styler.ColourTo(i - 1, SCE_RB_COMMENTLINE); | |
1339 | break; | |
1340 | } else if (inEscape) { | |
1341 | inEscape = false; // don't look at char | |
1342 | } else if (ch == Quote.Down) { | |
1343 | // Have the regular handler deal with this | |
1344 | // to get trailing modifiers. | |
1345 | i--; | |
1346 | ch = styler[i]; | |
1347 | break; | |
1348 | } | |
1e9bafca | 1349 | } |
7e0c58e9 | 1350 | chNext = styler.SafeGetCharAt(i + 1); |
1e9bafca | 1351 | } |
1e9bafca RD |
1352 | } |
1353 | // Quotes of all kinds... | |
1dcf666d | 1354 | } else if (state == SCE_RB_STRING_Q || state == SCE_RB_STRING_QQ || |
1e9bafca RD |
1355 | state == SCE_RB_STRING_QX || state == SCE_RB_STRING_QW || |
1356 | state == SCE_RB_STRING || state == SCE_RB_CHARACTER || | |
1357 | state == SCE_RB_BACKTICKS) { | |
1358 | if (!Quote.Down && !isspacechar(ch)) { | |
1359 | Quote.Open(ch); | |
1360 | } else if (ch == '\\' && Quote.Up != '\\') { | |
1361 | //Riddle me this: Is it safe to skip *every* escaped char? | |
1362 | advance_char(i, ch, chNext, chNext2); | |
1363 | } else if (ch == Quote.Down) { | |
1364 | Quote.Count--; | |
1365 | if (Quote.Count == 0) { | |
1366 | styler.ColourTo(i, state); | |
1367 | state = SCE_RB_DEFAULT; | |
1368 | preferRE = false; | |
1369 | } | |
1370 | } else if (ch == Quote.Up) { | |
1371 | Quote.Count++; | |
7e0c58e9 RD |
1372 | } else if (ch == '#' && chNext == '{' |
1373 | && inner_string_count < INNER_STRINGS_MAX_COUNT | |
1374 | && state != SCE_RB_CHARACTER | |
1375 | && state != SCE_RB_STRING_Q) { | |
1376 | // process #{ ... } | |
1377 | styler.ColourTo(i - 1, state); | |
1378 | styler.ColourTo(i + 1, SCE_RB_OPERATOR); | |
1379 | enterInnerExpression(inner_string_types, | |
1380 | inner_expn_brace_counts, | |
1381 | inner_quotes, | |
1382 | inner_string_count, | |
1383 | state, | |
1384 | brace_counts, | |
1385 | Quote); | |
1386 | preferRE = true; | |
1387 | // Skip one | |
1388 | advance_char(i, ch, chNext, chNext2); | |
1e9bafca RD |
1389 | } |
1390 | } | |
1dcf666d | 1391 | |
1e9bafca RD |
1392 | if (state == SCE_RB_ERROR) { |
1393 | break; | |
1394 | } | |
1395 | chPrev = ch; | |
1396 | } | |
1397 | if (state == SCE_RB_WORD) { | |
1398 | // We've ended on a word, possibly at EOF, and need to | |
1399 | // classify it. | |
1400 | (void) ClassifyWordRb(styler.GetStartSegment(), lengthDoc - 1, keywords, styler, prevWord); | |
1401 | } else { | |
1402 | styler.ColourTo(lengthDoc - 1, state); | |
1403 | } | |
1404 | } | |
1405 | ||
1406 | // Helper functions for folding, disambiguation keywords | |
1dcf666d | 1407 | // Assert that there are no high-bit chars |
1e9bafca RD |
1408 | |
1409 | static void getPrevWord(int pos, | |
1410 | char *prevWord, | |
1411 | Accessor &styler, | |
1412 | int word_state) | |
1413 | { | |
1414 | int i; | |
1415 | styler.Flush(); | |
1416 | for (i = pos - 1; i > 0; i--) { | |
1417 | if (actual_style(styler.StyleAt(i)) != word_state) { | |
1418 | i++; | |
1419 | break; | |
1420 | } | |
1421 | } | |
1422 | if (i < pos - MAX_KEYWORD_LENGTH) // overflow | |
1423 | i = pos - MAX_KEYWORD_LENGTH; | |
1424 | char *dst = prevWord; | |
1425 | for (; i <= pos; i++) { | |
1426 | *dst++ = styler[i]; | |
1427 | } | |
1428 | *dst = 0; | |
1429 | } | |
1430 | ||
1431 | static bool keywordIsAmbiguous(const char *prevWord) | |
1432 | { | |
1433 | // Order from most likely used to least likely | |
1434 | // Lots of ways to do a loop in Ruby besides 'while/until' | |
1435 | if (!strcmp(prevWord, "if") | |
1436 | || !strcmp(prevWord, "do") | |
1437 | || !strcmp(prevWord, "while") | |
1438 | || !strcmp(prevWord, "unless") | |
1439 | || !strcmp(prevWord, "until")) { | |
1440 | return true; | |
1441 | } else { | |
1442 | return false; | |
1443 | } | |
1444 | } | |
1445 | ||
1446 | // Demote keywords in the following conditions: | |
1447 | // if, while, unless, until modify a statement | |
1dcf666d | 1448 | // do after a while or until, as a noise word (like then after if) |
1e9bafca RD |
1449 | |
1450 | static bool keywordIsModifier(const char *word, | |
1451 | int pos, | |
1452 | Accessor &styler) | |
1453 | { | |
1454 | if (word[0] == 'd' && word[1] == 'o' && !word[2]) { | |
1455 | return keywordDoStartsLoop(pos, styler); | |
1456 | } | |
1dcf666d | 1457 | char ch, chPrev, chPrev2; |
1e9bafca RD |
1458 | int style = SCE_RB_DEFAULT; |
1459 | int lineStart = styler.GetLine(pos); | |
1460 | int lineStartPosn = styler.LineStart(lineStart); | |
1dcf666d RD |
1461 | // We want to step backwards until we don't care about the current |
1462 | // position. But first move lineStartPosn back behind any | |
1463 | // continuations immediately above word. | |
1464 | while (lineStartPosn > 0) { | |
1465 | ch = styler[lineStartPosn-1]; | |
1466 | if (ch == '\n' || ch == '\r') { | |
1467 | chPrev = styler.SafeGetCharAt(lineStartPosn-2); | |
1468 | chPrev2 = styler.SafeGetCharAt(lineStartPosn-3); | |
1469 | lineStart = styler.GetLine(lineStartPosn-1); | |
1470 | // If we find a continuation line, include it in our analysis. | |
1471 | if (chPrev == '\\') { | |
1472 | lineStartPosn = styler.LineStart(lineStart); | |
1473 | } else if (ch == '\n' && chPrev == '\r' && chPrev2 == '\\') { | |
1474 | lineStartPosn = styler.LineStart(lineStart); | |
1475 | } else { | |
1476 | break; | |
1477 | } | |
1478 | } else { | |
1479 | break; | |
1480 | } | |
1481 | } | |
1482 | ||
1e9bafca RD |
1483 | styler.Flush(); |
1484 | while (--pos >= lineStartPosn) { | |
1485 | style = actual_style(styler.StyleAt(pos)); | |
1486 | if (style == SCE_RB_DEFAULT) { | |
1487 | if (iswhitespace(ch = styler[pos])) { | |
1488 | //continue | |
1489 | } else if (ch == '\r' || ch == '\n') { | |
1490 | // Scintilla's LineStart() and GetLine() routines aren't | |
1491 | // platform-independent, so if we have text prepared with | |
1492 | // a different system we can't rely on it. | |
1dcf666d RD |
1493 | |
1494 | // Also, lineStartPosn may have been moved to more than one | |
1495 | // line above word's line while pushing past continuations. | |
1496 | chPrev = styler.SafeGetCharAt(pos - 1); | |
1497 | chPrev2 = styler.SafeGetCharAt(pos - 2); | |
1498 | if (chPrev == '\\') { | |
1499 | pos-=1; // gloss over the "\\" | |
1500 | //continue | |
1501 | } else if (ch == '\n' && chPrev == '\r' && chPrev2 == '\\') { | |
1502 | pos-=2; // gloss over the "\\\r" | |
1503 | //continue | |
1504 | } else { | |
1505 | return false; | |
1506 | } | |
65ec6247 | 1507 | } |
1e9bafca RD |
1508 | } else { |
1509 | break; | |
65ec6247 | 1510 | } |
1e9bafca RD |
1511 | } |
1512 | if (pos < lineStartPosn) { | |
1dcf666d | 1513 | return false; |
1e9bafca RD |
1514 | } |
1515 | // First things where the action is unambiguous | |
1516 | switch (style) { | |
1517 | case SCE_RB_DEFAULT: | |
1518 | case SCE_RB_COMMENTLINE: | |
1519 | case SCE_RB_POD: | |
1520 | case SCE_RB_CLASSNAME: | |
1521 | case SCE_RB_DEFNAME: | |
1522 | case SCE_RB_MODULE_NAME: | |
1523 | return false; | |
1524 | case SCE_RB_OPERATOR: | |
1525 | break; | |
1526 | case SCE_RB_WORD: | |
1527 | // Watch out for uses of 'else if' | |
1528 | //XXX: Make a list of other keywords where 'if' isn't a modifier | |
1529 | // and can appear legitimately | |
1530 | // Formulate this to avoid warnings from most compilers | |
1531 | if (strcmp(word, "if") == 0) { | |
1532 | char prevWord[MAX_KEYWORD_LENGTH + 1]; | |
1533 | getPrevWord(pos, prevWord, styler, SCE_RB_WORD); | |
1534 | return strcmp(prevWord, "else") != 0; | |
1535 | } | |
1536 | return true; | |
1537 | default: | |
1538 | return true; | |
1539 | } | |
1540 | // Assume that if the keyword follows an operator, | |
1541 | // usually it's a block assignment, like | |
1542 | // a << if x then y else z | |
1dcf666d | 1543 | |
1e9bafca RD |
1544 | ch = styler[pos]; |
1545 | switch (ch) { | |
1546 | case ')': | |
1547 | case ']': | |
1548 | case '}': | |
1549 | return true; | |
1550 | default: | |
1551 | return false; | |
1552 | } | |
65ec6247 RD |
1553 | } |
1554 | ||
1e9bafca RD |
1555 | #define WHILE_BACKWARDS "elihw" |
1556 | #define UNTIL_BACKWARDS "litnu" | |
1557 | ||
1558 | // Nothing fancy -- look to see if we follow a while/until somewhere | |
1559 | // on the current line | |
1560 | ||
1561 | static bool keywordDoStartsLoop(int pos, | |
1562 | Accessor &styler) | |
1563 | { | |
1564 | char ch; | |
1565 | int style; | |
1566 | int lineStart = styler.GetLine(pos); | |
1567 | int lineStartPosn = styler.LineStart(lineStart); | |
1568 | styler.Flush(); | |
1569 | while (--pos >= lineStartPosn) { | |
1570 | style = actual_style(styler.StyleAt(pos)); | |
1571 | if (style == SCE_RB_DEFAULT) { | |
1572 | if ((ch = styler[pos]) == '\r' || ch == '\n') { | |
1573 | // Scintilla's LineStart() and GetLine() routines aren't | |
1574 | // platform-independent, so if we have text prepared with | |
1575 | // a different system we can't rely on it. | |
1576 | return false; | |
1577 | } | |
1578 | } else if (style == SCE_RB_WORD) { | |
1579 | // Check for while or until, but write the word in backwards | |
1580 | char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero | |
1581 | char *dst = prevWord; | |
1582 | int wordLen = 0; | |
1583 | int start_word; | |
1584 | for (start_word = pos; | |
1585 | start_word >= lineStartPosn && actual_style(styler.StyleAt(start_word)) == SCE_RB_WORD; | |
1586 | start_word--) { | |
1587 | if (++wordLen < MAX_KEYWORD_LENGTH) { | |
1588 | *dst++ = styler[start_word]; | |
1589 | } | |
1590 | } | |
1591 | *dst = 0; | |
1592 | // Did we see our keyword? | |
1593 | if (!strcmp(prevWord, WHILE_BACKWARDS) | |
1594 | || !strcmp(prevWord, UNTIL_BACKWARDS)) { | |
1595 | return true; | |
1596 | } | |
1597 | // We can move pos to the beginning of the keyword, and then | |
1598 | // accept another decrement, as we can never have two contiguous | |
1599 | // keywords: | |
1600 | // word1 word2 | |
1601 | // ^ | |
1602 | // <- move to start_word | |
1603 | // ^ | |
1604 | // <- loop decrement | |
1605 | // ^ # pointing to end of word1 is fine | |
1606 | pos = start_word; | |
1607 | } | |
1608 | } | |
1609 | return false; | |
1610 | } | |
1611 | ||
1612 | /* | |
1613 | * Folding Ruby | |
1dcf666d | 1614 | * |
1e9bafca RD |
1615 | * The language is quite complex to analyze without a full parse. |
1616 | * For example, this line shouldn't affect fold level: | |
1dcf666d | 1617 | * |
1e9bafca | 1618 | * print "hello" if feeling_friendly? |
1dcf666d | 1619 | * |
1e9bafca | 1620 | * Neither should this: |
1dcf666d | 1621 | * |
1e9bafca RD |
1622 | * print "hello" \ |
1623 | * if feeling_friendly? | |
1dcf666d RD |
1624 | * |
1625 | * | |
1e9bafca | 1626 | * But this should: |
1dcf666d | 1627 | * |
1e9bafca RD |
1628 | * if feeling_friendly? #++ |
1629 | * print "hello" \ | |
1630 | * print "goodbye" | |
1631 | * end #-- | |
1dcf666d | 1632 | * |
1e9bafca RD |
1633 | * So we cheat, by actually looking at the existing indentation |
1634 | * levels for each line, and just echoing it back. Like Python. | |
1635 | * Then if we get better at it, we'll take braces into consideration, | |
1636 | * which always affect folding levels. | |
1637 | ||
1638 | * How the keywords should work: | |
1639 | * No effect: | |
1dcf666d | 1640 | * __FILE__ __LINE__ BEGIN END alias and |
1e9bafca RD |
1641 | * defined? false in nil not or self super then |
1642 | * true undef | |
1643 | ||
1644 | * Always increment: | |
1645 | * begin class def do for module when { | |
1dcf666d | 1646 | * |
1e9bafca RD |
1647 | * Always decrement: |
1648 | * end } | |
1dcf666d | 1649 | * |
1e9bafca RD |
1650 | * Increment if these start a statement |
1651 | * if unless until while -- do nothing if they're modifiers | |
65ec6247 | 1652 | |
1e9bafca RD |
1653 | * These end a block if there's no modifier, but don't bother |
1654 | * break next redo retry return yield | |
1dcf666d | 1655 | * |
1e9bafca RD |
1656 | * These temporarily de-indent, but re-indent |
1657 | * case else elsif ensure rescue | |
1dcf666d | 1658 | * |
1e9bafca RD |
1659 | * This means that the folder reflects indentation rather |
1660 | * than setting it. The language-service updates indentation | |
1661 | * when users type return and finishes entering de-denters. | |
1dcf666d | 1662 | * |
1e9bafca RD |
1663 | * Later offer to fold POD, here-docs, strings, and blocks of comments |
1664 | */ | |
1665 | ||
1666 | static void FoldRbDoc(unsigned int startPos, int length, int initStyle, | |
1667 | WordList *[], Accessor &styler) { | |
1668 | const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0; | |
1669 | bool foldComment = styler.GetPropertyInt("fold.comment") != 0; | |
1dcf666d | 1670 | |
1e9bafca RD |
1671 | synchronizeDocStart(startPos, length, initStyle, styler, // ref args |
1672 | false); | |
1673 | unsigned int endPos = startPos + length; | |
1674 | int visibleChars = 0; | |
65ec6247 | 1675 | int lineCurrent = styler.GetLine(startPos); |
1e9bafca RD |
1676 | int levelPrev = startPos == 0 ? 0 : (styler.LevelAt(lineCurrent) |
1677 | & SC_FOLDLEVELNUMBERMASK | |
1678 | & ~SC_FOLDLEVELBASE); | |
1679 | int levelCurrent = levelPrev; | |
65ec6247 | 1680 | char chNext = styler[startPos]; |
1e9bafca RD |
1681 | int styleNext = styler.StyleAt(startPos); |
1682 | int stylePrev = startPos <= 1 ? SCE_RB_DEFAULT : styler.StyleAt(startPos - 1); | |
1683 | bool buffer_ends_with_eol = false; | |
1684 | for (unsigned int i = startPos; i < endPos; i++) { | |
65ec6247 RD |
1685 | char ch = chNext; |
1686 | chNext = styler.SafeGetCharAt(i + 1); | |
1e9bafca RD |
1687 | int style = styleNext; |
1688 | styleNext = styler.StyleAt(i + 1); | |
1689 | bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); | |
1690 | if (style == SCE_RB_COMMENTLINE) { | |
1691 | if (foldComment && stylePrev != SCE_RB_COMMENTLINE) { | |
1692 | if (chNext == '{') { | |
1693 | levelCurrent++; | |
9e96e16f | 1694 | } else if (chNext == '}' && levelCurrent > 0) { |
1e9bafca | 1695 | levelCurrent--; |
65ec6247 | 1696 | } |
1e9bafca RD |
1697 | } |
1698 | } else if (style == SCE_RB_OPERATOR) { | |
1699 | if (strchr("[{(", ch)) { | |
1700 | levelCurrent++; | |
1701 | } else if (strchr(")}]", ch)) { | |
1702 | // Don't decrement below 0 | |
1703 | if (levelCurrent > 0) | |
1704 | levelCurrent--; | |
65ec6247 | 1705 | } |
1e9bafca RD |
1706 | } else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) { |
1707 | // Look at the keyword on the left and decide what to do | |
1708 | char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero | |
1709 | prevWord[0] = 0; | |
1710 | getPrevWord(i, prevWord, styler, SCE_RB_WORD); | |
1711 | if (!strcmp(prevWord, "end")) { | |
1712 | // Don't decrement below 0 | |
1713 | if (levelCurrent > 0) | |
1714 | levelCurrent--; | |
1715 | } else if ( !strcmp(prevWord, "if") | |
1716 | || !strcmp(prevWord, "def") | |
1717 | || !strcmp(prevWord, "class") | |
1718 | || !strcmp(prevWord, "module") | |
1719 | || !strcmp(prevWord, "begin") | |
1720 | || !strcmp(prevWord, "case") | |
1721 | || !strcmp(prevWord, "do") | |
1722 | || !strcmp(prevWord, "while") | |
1723 | || !strcmp(prevWord, "unless") | |
1724 | || !strcmp(prevWord, "until") | |
1725 | || !strcmp(prevWord, "for") | |
1726 | ) { | |
1727 | levelCurrent++; | |
1728 | } | |
1dcf666d RD |
1729 | } else if (style == SCE_RB_HERE_DELIM) { |
1730 | if (styler.SafeGetCharAt(i-2) == '<' && styler.SafeGetCharAt(i-1) == '<') { | |
1731 | levelCurrent++; | |
1732 | } else if (styleNext == SCE_RB_DEFAULT) { | |
1733 | levelCurrent--; | |
1734 | } | |
1735 | } | |
1e9bafca RD |
1736 | if (atEOL) { |
1737 | int lev = levelPrev; | |
1738 | if (visibleChars == 0 && foldCompact) | |
1739 | lev |= SC_FOLDLEVELWHITEFLAG; | |
1740 | if ((levelCurrent > levelPrev) && (visibleChars > 0)) | |
1741 | lev |= SC_FOLDLEVELHEADERFLAG; | |
1742 | styler.SetLevel(lineCurrent, lev|SC_FOLDLEVELBASE); | |
65ec6247 | 1743 | lineCurrent++; |
1e9bafca RD |
1744 | levelPrev = levelCurrent; |
1745 | visibleChars = 0; | |
1746 | buffer_ends_with_eol = true; | |
1747 | } else if (!isspacechar(ch)) { | |
1748 | visibleChars++; | |
1749 | buffer_ends_with_eol = false; | |
1750 | } | |
9e96e16f | 1751 | stylePrev = style; |
1e9bafca RD |
1752 | } |
1753 | // Fill in the real level of the next line, keeping the current flags as they will be filled in later | |
1754 | if (!buffer_ends_with_eol) { | |
1755 | lineCurrent++; | |
1756 | int new_lev = levelCurrent; | |
1757 | if (visibleChars == 0 && foldCompact) | |
1758 | new_lev |= SC_FOLDLEVELWHITEFLAG; | |
1759 | if ((levelCurrent > levelPrev) && (visibleChars > 0)) | |
1760 | new_lev |= SC_FOLDLEVELHEADERFLAG; | |
1761 | levelCurrent = new_lev; | |
1762 | } | |
1763 | styler.SetLevel(lineCurrent, levelCurrent|SC_FOLDLEVELBASE); | |
65ec6247 | 1764 | } |
9e730a78 RD |
1765 | |
1766 | static const char * const rubyWordListDesc[] = { | |
1767 | "Keywords", | |
1768 | 0 | |
1769 | }; | |
1770 | ||
1dcf666d | 1771 | LexerModule lmRuby(SCLEX_RUBY, ColouriseRbDoc, "ruby", FoldRbDoc, rubyWordListDesc, 6); |