]>
Commit | Line | Data |
---|---|---|
65ec6247 RD |
1 | // Scintilla source code edit control |
2 | /** @file LexRuby.cxx | |
3 | ** Lexer for Ruby. | |
4 | **/ | |
5 | // Copyright 2001- by Clemens Wyss <wys@helbling.ch> | |
6 | // The License.txt file describes the conditions under which this software may be distributed. | |
7 | ||
8 | #include <stdlib.h> | |
9 | #include <string.h> | |
10 | #include <ctype.h> | |
11 | #include <stdio.h> | |
12 | #include <stdarg.h> | |
13 | ||
14 | #include "Platform.h" | |
15 | ||
16 | #include "PropSet.h" | |
17 | #include "Accessor.h" | |
18 | #include "KeyWords.h" | |
19 | #include "Scintilla.h" | |
20 | #include "SciLexer.h" | |
21 | ||
1e9bafca RD |
22 | #ifdef SCI_NAMESPACE |
23 | using namespace Scintilla; | |
24 | #endif | |
25 | ||
26 | //XXX Identical to Perl, put in common area | |
27 | static inline bool isEOLChar(char ch) { | |
28 | return (ch == '\r') || (ch == '\n'); | |
29 | } | |
30 | ||
31 | #define isSafeASCII(ch) ((unsigned int)(ch) <= 127) | |
32 | // This one's redundant, but makes for more readable code | |
33 | #define isHighBitChar(ch) ((unsigned int)(ch) > 127) | |
34 | ||
35 | static inline bool isSafeAlpha(char ch) { | |
36 | return (isSafeASCII(ch) && isalpha(ch)) || ch == '_'; | |
37 | } | |
38 | ||
39 | static inline bool isSafeAlnum(char ch) { | |
40 | return (isSafeASCII(ch) && isalnum(ch)) || ch == '_'; | |
41 | } | |
42 | ||
43 | static inline bool isSafeAlnumOrHigh(char ch) { | |
44 | return isHighBitChar(ch) || isalnum(ch) || ch == '_'; | |
45 | } | |
46 | ||
47 | static inline bool isSafeDigit(char ch) { | |
48 | return isSafeASCII(ch) && isdigit(ch); | |
49 | } | |
50 | ||
51 | static inline bool isSafeWordcharOrHigh(char ch) { | |
52 | return isHighBitChar(ch) || iswordchar(ch); | |
53 | } | |
54 | ||
55 | static bool inline iswhitespace(char ch) { | |
56 | return ch == ' ' || ch == '\t'; | |
57 | } | |
58 | ||
59 | #define MAX_KEYWORD_LENGTH 200 | |
60 | ||
61 | #define STYLE_MASK 63 | |
62 | #define actual_style(style) (style & STYLE_MASK) | |
63 | ||
64 | static bool followsDot(unsigned int pos, Accessor &styler) { | |
65 | styler.Flush(); | |
66 | for (; pos >= 1; --pos) { | |
67 | int style = actual_style(styler.StyleAt(pos)); | |
68 | char ch; | |
69 | switch (style) { | |
70 | case SCE_RB_DEFAULT: | |
71 | ch = styler[pos]; | |
72 | if (ch == ' ' || ch == '\t') { | |
73 | //continue | |
74 | } else { | |
75 | return false; | |
76 | } | |
77 | break; | |
78 | ||
79 | case SCE_RB_OPERATOR: | |
80 | return styler[pos] == '.'; | |
81 | ||
82 | default: | |
83 | return false; | |
84 | } | |
85 | } | |
86 | return false; | |
87 | } | |
88 | ||
89 | // Forward declarations | |
90 | static bool keywordIsAmbiguous(const char *prevWord); | |
91 | static bool keywordDoStartsLoop(int pos, | |
92 | Accessor &styler); | |
93 | static bool keywordIsModifier(const char *word, | |
94 | int pos, | |
95 | Accessor &styler); | |
96 | ||
97 | static int ClassifyWordRb(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord) { | |
65ec6247 | 98 | char s[100]; |
1e9bafca RD |
99 | unsigned int i, j; |
100 | unsigned int lim = end - start + 1; // num chars to copy | |
101 | if (lim >= MAX_KEYWORD_LENGTH) { | |
102 | lim = MAX_KEYWORD_LENGTH - 1; | |
103 | } | |
104 | for (i = start, j = 0; j < lim; i++, j++) { | |
105 | s[j] = styler[i]; | |
65ec6247 | 106 | } |
1e9bafca RD |
107 | s[j] = '\0'; |
108 | int chAttr; | |
65ec6247 | 109 | if (0 == strcmp(prevWord, "class")) |
1e9bafca | 110 | chAttr = SCE_RB_CLASSNAME; |
65ec6247 | 111 | else if (0 == strcmp(prevWord, "module")) |
1e9bafca | 112 | chAttr = SCE_RB_MODULE_NAME; |
65ec6247 | 113 | else if (0 == strcmp(prevWord, "def")) |
1e9bafca RD |
114 | chAttr = SCE_RB_DEFNAME; |
115 | else if (keywords.InList(s) && !followsDot(start - 1, styler)) { | |
116 | if (keywordIsAmbiguous(s) | |
117 | && keywordIsModifier(s, start, styler)) { | |
118 | ||
119 | // Demoted keywords are colored as keywords, | |
120 | // but do not affect changes in indentation. | |
121 | // | |
122 | // Consider the word 'if': | |
123 | // 1. <<if test ...>> : normal | |
124 | // 2. <<stmt if test>> : demoted | |
125 | // 3. <<lhs = if ...>> : normal: start a new indent level | |
126 | // 4. <<obj.if = 10>> : color as identifer, since it follows '.' | |
127 | ||
128 | chAttr = SCE_RB_WORD_DEMOTED; | |
129 | } else { | |
130 | chAttr = SCE_RB_WORD; | |
131 | } | |
132 | } else | |
133 | chAttr = SCE_RB_IDENTIFIER; | |
134 | styler.ColourTo(end, chAttr); | |
135 | if (chAttr == SCE_RB_WORD) { | |
136 | strcpy(prevWord, s); | |
137 | } else { | |
138 | prevWord[0] = 0; | |
139 | } | |
140 | return chAttr; | |
141 | } | |
142 | ||
143 | ||
144 | //XXX Identical to Perl, put in common area | |
145 | static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) { | |
146 | if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) { | |
147 | return false; | |
148 | } | |
149 | while (*val) { | |
150 | if (*val != styler[pos++]) { | |
151 | return false; | |
65ec6247 | 152 | } |
1e9bafca | 153 | val++; |
65ec6247 | 154 | } |
1e9bafca | 155 | return true; |
65ec6247 RD |
156 | } |
157 | ||
1e9bafca RD |
158 | // Do Ruby better -- find the end of the line, work back, |
159 | // and then check for leading white space | |
160 | ||
161 | // Precondition: the here-doc target can be indented | |
162 | static bool lookingAtHereDocDelim(Accessor &styler, | |
163 | int pos, | |
164 | int lengthDoc, | |
165 | const char *HereDocDelim) | |
166 | { | |
167 | if (!isMatch(styler, lengthDoc, pos, HereDocDelim)) { | |
168 | return false; | |
169 | } | |
170 | while (--pos > 0) { | |
171 | char ch = styler[pos]; | |
172 | if (isEOLChar(ch)) { | |
173 | return true; | |
174 | } else if (ch != ' ' && ch != '\t') { | |
175 | return false; | |
176 | } | |
177 | } | |
178 | return false; | |
65ec6247 RD |
179 | } |
180 | ||
1e9bafca RD |
181 | //XXX Identical to Perl, put in common area |
182 | static char opposite(char ch) { | |
183 | if (ch == '(') | |
184 | return ')'; | |
185 | if (ch == '[') | |
186 | return ']'; | |
187 | if (ch == '{') | |
188 | return '}'; | |
189 | if (ch == '<') | |
190 | return '>'; | |
191 | return ch; | |
192 | } | |
193 | ||
194 | // Null transitions when we see we've reached the end | |
195 | // and need to relex the curr char. | |
65ec6247 | 196 | |
1e9bafca RD |
197 | static void redo_char(int &i, char &ch, char &chNext, char &chNext2, |
198 | int &state) { | |
199 | i--; | |
200 | chNext2 = chNext; | |
201 | chNext = ch; | |
202 | state = SCE_RB_DEFAULT; | |
65ec6247 RD |
203 | } |
204 | ||
1e9bafca RD |
205 | static void advance_char(int &i, char &ch, char &chNext, char &chNext2) { |
206 | i++; | |
207 | ch = chNext; | |
208 | chNext = chNext2; | |
65ec6247 RD |
209 | } |
210 | ||
1e9bafca RD |
211 | // precondition: startPos points to one after the EOL char |
212 | static bool currLineContainsHereDelims(int& startPos, | |
213 | Accessor &styler) { | |
214 | if (startPos <= 1) | |
215 | return false; | |
65ec6247 | 216 | |
1e9bafca RD |
217 | int pos; |
218 | for (pos = startPos - 1; pos > 0; pos--) { | |
219 | char ch = styler.SafeGetCharAt(pos); | |
220 | if (isEOLChar(ch)) { | |
221 | // Leave the pointers where they are -- there are no | |
222 | // here doc delims on the current line, even if | |
223 | // the EOL isn't default style | |
224 | ||
225 | return false; | |
226 | } else { | |
227 | styler.Flush(); | |
228 | if (actual_style(styler.StyleAt(pos)) == SCE_RB_HERE_DELIM) { | |
229 | break; | |
230 | } | |
231 | } | |
232 | } | |
233 | if (pos == 0) { | |
234 | return false; | |
235 | } | |
236 | // Update the pointers so we don't have to re-analyze the string | |
237 | startPos = pos; | |
238 | return true; | |
239 | } | |
65ec6247 | 240 | |
65ec6247 | 241 | |
1e9bafca RD |
242 | static bool isEmptyLine(int pos, |
243 | Accessor &styler) { | |
244 | int spaceFlags = 0; | |
245 | int lineCurrent = styler.GetLine(pos); | |
246 | int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL); | |
247 | return (indentCurrent & SC_FOLDLEVELWHITEFLAG) != 0; | |
248 | } | |
8e54aaed | 249 | |
1e9bafca RD |
250 | static bool RE_CanFollowKeyword(const char *keyword) { |
251 | if (!strcmp(keyword, "and") | |
252 | || !strcmp(keyword, "begin") | |
253 | || !strcmp(keyword, "break") | |
254 | || !strcmp(keyword, "case") | |
255 | || !strcmp(keyword, "do") | |
256 | || !strcmp(keyword, "else") | |
257 | || !strcmp(keyword, "elsif") | |
258 | || !strcmp(keyword, "if") | |
259 | || !strcmp(keyword, "next") | |
260 | || !strcmp(keyword, "return") | |
261 | || !strcmp(keyword, "when") | |
262 | || !strcmp(keyword, "unless") | |
263 | || !strcmp(keyword, "until") | |
264 | || !strcmp(keyword, "not") | |
265 | || !strcmp(keyword, "or")) { | |
266 | return true; | |
267 | } | |
268 | return false; | |
269 | } | |
65ec6247 | 270 | |
1e9bafca RD |
271 | // Look at chars up to but not including endPos |
272 | // Don't look at styles in case we're looking forward | |
65ec6247 | 273 | |
1e9bafca RD |
274 | static int skipWhitespace(int startPos, |
275 | int endPos, | |
276 | Accessor &styler) { | |
277 | for (int i = startPos; i < endPos; i++) { | |
278 | if (!iswhitespace(styler[i])) { | |
279 | return i; | |
280 | } | |
281 | } | |
282 | return endPos; | |
283 | } | |
284 | ||
285 | // This routine looks for false positives like | |
286 | // undef foo, << | |
287 | // There aren't too many. | |
288 | // | |
289 | // iPrev points to the start of << | |
290 | ||
291 | static bool sureThisIsHeredoc(int iPrev, | |
292 | Accessor &styler, | |
293 | char *prevWord) { | |
294 | ||
295 | // Not so fast, since Ruby's so dynamic. Check the context | |
296 | // to make sure we're OK. | |
297 | int prevStyle; | |
298 | int lineStart = styler.GetLine(iPrev); | |
299 | int lineStartPosn = styler.LineStart(lineStart); | |
300 | styler.Flush(); | |
301 | ||
302 | // Find the first word after some whitespace | |
303 | int firstWordPosn = skipWhitespace(lineStartPosn, iPrev, styler); | |
304 | if (firstWordPosn >= iPrev) { | |
305 | // Have something like {^ <<} | |
306 | //XXX Look at the first previous non-comment non-white line | |
307 | // to establish the context. Not too likely though. | |
308 | return true; | |
309 | } else { | |
310 | switch (prevStyle = styler.StyleAt(firstWordPosn)) { | |
311 | case SCE_RB_WORD: | |
312 | case SCE_RB_WORD_DEMOTED: | |
313 | case SCE_RB_IDENTIFIER: | |
314 | break; | |
315 | default: | |
316 | return true; | |
317 | } | |
318 | } | |
319 | int firstWordEndPosn = firstWordPosn; | |
320 | char *dst = prevWord; | |
321 | for (;;) { | |
322 | if (firstWordEndPosn >= iPrev || | |
323 | styler.StyleAt(firstWordEndPosn) != prevStyle) { | |
324 | *dst = 0; | |
325 | break; | |
326 | } | |
327 | *dst++ = styler[firstWordEndPosn]; | |
328 | firstWordEndPosn += 1; | |
329 | } | |
330 | //XXX Write a style-aware thing to regex scintilla buffer objects | |
331 | if (!strcmp(prevWord, "undef") | |
332 | || !strcmp(prevWord, "def") | |
333 | || !strcmp(prevWord, "alias")) { | |
334 | // These keywords are what we were looking for | |
335 | return false; | |
336 | } | |
337 | return true; | |
338 | } | |
339 | ||
340 | // Routine that saves us from allocating a buffer for the here-doc target | |
341 | // targetEndPos points one past the end of the current target | |
342 | static bool haveTargetMatch(int currPos, | |
343 | int lengthDoc, | |
344 | int targetStartPos, | |
345 | int targetEndPos, | |
346 | Accessor &styler) { | |
347 | if (lengthDoc - currPos < targetEndPos - targetStartPos) { | |
348 | return false; | |
349 | } | |
350 | int i, j; | |
351 | for (i = targetStartPos, j = currPos; | |
352 | i < targetEndPos && j < lengthDoc; | |
353 | i++, j++) { | |
354 | if (styler[i] != styler[j]) { | |
355 | return false; | |
356 | } | |
357 | } | |
358 | return true; | |
359 | } | |
360 | ||
361 | // We need a check because the form | |
362 | // [identifier] <<[target] | |
363 | // is ambiguous. The Ruby lexer/parser resolves it by | |
364 | // looking to see if [identifier] names a variable or a | |
365 | // function. If it's the first, it's the start of a here-doc. | |
366 | // If it's a var, it's an operator. This lexer doesn't | |
367 | // maintain a symbol table, so it looks ahead to see what's | |
368 | // going on, in cases where we have | |
369 | // ^[white-space]*[identifier([.|::]identifier)*][white-space]*<<[target] | |
370 | // | |
371 | // If there's no occurrence of [target] on a line, assume we don't. | |
372 | ||
373 | // return true == yes, we have no heredocs | |
374 | ||
375 | static bool sureThisIsNotHeredoc(int lt2StartPos, | |
376 | Accessor &styler) { | |
377 | int prevStyle; | |
378 | // Use full document, not just part we're styling | |
379 | int lengthDoc = styler.Length(); | |
380 | int lineStart = styler.GetLine(lt2StartPos); | |
381 | int lineStartPosn = styler.LineStart(lineStart); | |
382 | styler.Flush(); | |
383 | const bool definitely_not_a_here_doc = true; | |
384 | const bool looks_like_a_here_doc = false; | |
385 | ||
386 | // Find the first word after some whitespace | |
387 | int firstWordPosn = skipWhitespace(lineStartPosn, lt2StartPos, styler); | |
388 | if (firstWordPosn >= lt2StartPos) { | |
389 | return definitely_not_a_here_doc; | |
390 | } | |
391 | prevStyle = styler.StyleAt(firstWordPosn); | |
392 | // If we have '<<' following a keyword, it's not a heredoc | |
393 | if (prevStyle != SCE_RB_IDENTIFIER) { | |
394 | return definitely_not_a_here_doc; | |
395 | } | |
396 | int newStyle = prevStyle; | |
397 | // Some compilers incorrectly warn about uninit newStyle | |
398 | for (firstWordPosn += 1; firstWordPosn <= lt2StartPos; firstWordPosn += 1) { | |
399 | // Inner loop looks at the name | |
400 | for (; firstWordPosn <= lt2StartPos; firstWordPosn += 1) { | |
401 | newStyle = styler.StyleAt(firstWordPosn); | |
402 | if (newStyle != prevStyle) { | |
403 | break; | |
404 | } | |
405 | } | |
406 | // Do we have '::' or '.'? | |
407 | if (firstWordPosn < lt2StartPos && newStyle == SCE_RB_OPERATOR) { | |
408 | char ch = styler[firstWordPosn]; | |
409 | if (ch == '.') { | |
410 | // yes | |
411 | } else if (ch == ':') { | |
412 | if (styler.StyleAt(++firstWordPosn) != SCE_RB_OPERATOR) { | |
413 | return definitely_not_a_here_doc; | |
414 | } else if (styler[firstWordPosn] != ':') { | |
415 | return definitely_not_a_here_doc; | |
416 | } | |
417 | } else { | |
418 | break; | |
419 | } | |
420 | } else { | |
421 | break; | |
422 | } | |
423 | } | |
424 | // Skip next batch of white-space | |
425 | firstWordPosn = skipWhitespace(firstWordPosn, lt2StartPos, styler); | |
426 | if (firstWordPosn != lt2StartPos) { | |
427 | // Have [[^ws[identifier]ws[*something_else*]ws<< | |
428 | return definitely_not_a_here_doc; | |
429 | } | |
430 | // OK, now 'j' will point to the current spot moving ahead | |
431 | int j = firstWordPosn + 1; | |
432 | if (styler.StyleAt(j) != SCE_RB_OPERATOR || styler[j] != '<') { | |
433 | // This shouldn't happen | |
434 | return definitely_not_a_here_doc; | |
435 | } | |
436 | int nextLineStartPosn = styler.LineStart(lineStart + 1); | |
437 | if (nextLineStartPosn >= lengthDoc) { | |
438 | return definitely_not_a_here_doc; | |
439 | } | |
440 | j = skipWhitespace(j + 1, nextLineStartPosn, styler); | |
441 | if (j >= lengthDoc) { | |
442 | return definitely_not_a_here_doc; | |
443 | } | |
444 | bool allow_indent; | |
445 | int target_start, target_end; | |
446 | // From this point on no more styling, since we're looking ahead | |
447 | if (styler[j] == '-') { | |
448 | allow_indent = true; | |
449 | j++; | |
450 | } else { | |
451 | allow_indent = false; | |
452 | } | |
453 | ||
454 | // Allow for quoted targets. | |
455 | char target_quote = 0; | |
456 | switch (styler[j]) { | |
457 | case '\'': | |
458 | case '"': | |
459 | case '`': | |
460 | target_quote = styler[j]; | |
461 | j += 1; | |
462 | } | |
463 | ||
464 | if (isSafeAlnum(styler[j])) { | |
465 | // Init target_end because some compilers think it won't | |
466 | // be initialized by the time it's used | |
467 | target_start = target_end = j; | |
468 | j++; | |
469 | } else { | |
470 | return definitely_not_a_here_doc; | |
471 | } | |
472 | for (; j < lengthDoc; j++) { | |
473 | if (!isSafeAlnum(styler[j])) { | |
474 | if (target_quote && styler[j] != target_quote) { | |
475 | // unquoted end | |
476 | return definitely_not_a_here_doc; | |
477 | } | |
478 | ||
479 | // And for now make sure that it's a newline | |
480 | // don't handle arbitrary expressions yet | |
481 | ||
482 | target_end = j; | |
483 | if (target_quote) { | |
484 | // Now we can move to the character after the string delimiter. | |
485 | j += 1; | |
486 | } | |
487 | j = skipWhitespace(j, lengthDoc, styler); | |
488 | if (j >= lengthDoc) { | |
489 | return definitely_not_a_here_doc; | |
490 | } else { | |
491 | char ch = styler[j]; | |
492 | if (ch == '#' || isEOLChar(ch)) { | |
493 | // This is OK, so break and continue; | |
494 | break; | |
495 | } else { | |
496 | return definitely_not_a_here_doc; | |
497 | } | |
498 | } | |
499 | } | |
500 | } | |
501 | ||
502 | // Just look at the start of each line | |
503 | int last_line = styler.GetLine(lengthDoc - 1); | |
504 | // But don't go too far | |
505 | if (last_line > lineStart + 50) { | |
506 | last_line = lineStart + 50; | |
507 | } | |
508 | for (int line_num = lineStart + 1; line_num <= last_line; line_num++) { | |
509 | if (allow_indent) { | |
510 | j = skipWhitespace(styler.LineStart(line_num), lengthDoc, styler); | |
511 | } else { | |
512 | j = styler.LineStart(line_num); | |
513 | } | |
514 | // target_end is one past the end | |
515 | if (haveTargetMatch(j, lengthDoc, target_start, target_end, styler)) { | |
516 | // We got it | |
517 | return looks_like_a_here_doc; | |
518 | } | |
519 | } | |
520 | return definitely_not_a_here_doc; | |
521 | } | |
522 | ||
523 | //todo: if we aren't looking at a stdio character, | |
524 | // move to the start of the first line that is not in a | |
525 | // multi-line construct | |
526 | ||
527 | static void synchronizeDocStart(unsigned int& startPos, | |
528 | int &length, | |
529 | int &initStyle, | |
530 | Accessor &styler, | |
531 | bool skipWhiteSpace=false) { | |
532 | ||
533 | styler.Flush(); | |
534 | int style = actual_style(styler.StyleAt(startPos)); | |
535 | switch (style) { | |
536 | case SCE_RB_STDIN: | |
537 | case SCE_RB_STDOUT: | |
538 | case SCE_RB_STDERR: | |
539 | // Don't do anything else with these. | |
540 | return; | |
541 | } | |
542 | ||
543 | int pos = startPos; | |
544 | // Quick way to characterize each line | |
545 | int lineStart; | |
546 | for (lineStart = styler.GetLine(pos); lineStart > 0; lineStart--) { | |
547 | // Now look at the style before the previous line's EOL | |
548 | pos = styler.LineStart(lineStart) - 1; | |
549 | if (pos <= 10) { | |
550 | lineStart = 0; | |
551 | break; | |
552 | } | |
553 | char ch = styler.SafeGetCharAt(pos); | |
554 | char chPrev = styler.SafeGetCharAt(pos - 1); | |
555 | if (ch == '\n' && chPrev == '\r') { | |
556 | pos--; | |
557 | } | |
558 | if (styler.SafeGetCharAt(pos - 1) == '\\') { | |
559 | // Continuation line -- keep going | |
560 | } else if (actual_style(styler.StyleAt(pos)) != SCE_RB_DEFAULT) { | |
561 | // Part of multi-line construct -- keep going | |
562 | } else if (currLineContainsHereDelims(pos, styler)) { | |
563 | // Keep going, with pos and length now pointing | |
564 | // at the end of the here-doc delimiter | |
565 | } else if (skipWhiteSpace && isEmptyLine(pos, styler)) { | |
566 | // Keep going | |
567 | } else { | |
568 | break; | |
569 | } | |
570 | } | |
571 | pos = styler.LineStart(lineStart); | |
572 | length += (startPos - pos); | |
573 | startPos = pos; | |
574 | initStyle = SCE_RB_DEFAULT; | |
65ec6247 RD |
575 | } |
576 | ||
577 | static void ColouriseRbDoc(unsigned int startPos, int length, int initStyle, | |
578 | WordList *keywordlists[], Accessor &styler) { | |
579 | ||
1e9bafca RD |
580 | // Lexer for Ruby often has to backtrack to start of current style to determine |
581 | // which characters are being used as quotes, how deeply nested is the | |
582 | // start position and what the termination string is for here documents | |
583 | ||
584 | WordList &keywords = *keywordlists[0]; | |
65ec6247 | 585 | |
1e9bafca RD |
586 | class HereDocCls { |
587 | public: | |
588 | int State; | |
589 | // States | |
590 | // 0: '<<' encountered | |
591 | // 1: collect the delimiter | |
592 | // 1b: text between the end of the delimiter and the EOL | |
593 | // 2: here doc text (lines after the delimiter) | |
594 | char Quote; // the char after '<<' | |
595 | bool Quoted; // true if Quote in ('\'','"','`') | |
596 | int DelimiterLength; // strlen(Delimiter) | |
597 | char Delimiter[256]; // the Delimiter, limit of 256: from Perl | |
598 | bool CanBeIndented; | |
599 | HereDocCls() { | |
600 | State = 0; | |
601 | DelimiterLength = 0; | |
602 | Delimiter[0] = '\0'; | |
603 | CanBeIndented = false; | |
65ec6247 | 604 | } |
1e9bafca RD |
605 | }; |
606 | HereDocCls HereDoc; | |
65ec6247 | 607 | |
1e9bafca RD |
608 | class QuoteCls { |
609 | public: | |
610 | int Count; | |
611 | char Up; | |
612 | char Down; | |
613 | QuoteCls() { | |
614 | this->New(); | |
615 | } | |
616 | void New() { | |
617 | Count = 0; | |
618 | Up = '\0'; | |
619 | Down = '\0'; | |
620 | } | |
621 | void Open(char u) { | |
622 | Count++; | |
623 | Up = u; | |
624 | Down = opposite(Up); | |
625 | } | |
626 | }; | |
627 | QuoteCls Quote; | |
65ec6247 | 628 | |
1e9bafca RD |
629 | int numDots = 0; // For numbers -- |
630 | // Don't start lexing in the middle of a num | |
631 | ||
632 | synchronizeDocStart(startPos, length, initStyle, styler, // ref args | |
633 | false); | |
65ec6247 | 634 | |
1e9bafca RD |
635 | bool preferRE = true; |
636 | int state = initStyle; | |
637 | int lengthDoc = startPos + length; | |
638 | ||
639 | char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero | |
65ec6247 RD |
640 | prevWord[0] = '\0'; |
641 | if (length == 0) | |
1e9bafca | 642 | return; |
65ec6247 | 643 | |
1e9bafca RD |
644 | char chPrev = styler.SafeGetCharAt(startPos - 1); |
645 | char chNext = styler.SafeGetCharAt(startPos); | |
646 | // Ruby uses a different mask because bad indentation is marked by oring with 32 | |
647 | styler.StartAt(startPos, 127); | |
65ec6247 | 648 | styler.StartSegment(startPos); |
65ec6247 | 649 | |
1e9bafca RD |
650 | static int q_states[] = {SCE_RB_STRING_Q, |
651 | SCE_RB_STRING_QQ, | |
652 | SCE_RB_STRING_QR, | |
653 | SCE_RB_STRING_QW, | |
654 | SCE_RB_STRING_QW, | |
655 | SCE_RB_STRING_QX}; | |
656 | static const char* q_chars = "qQrwWx"; | |
657 | ||
658 | for (int i = startPos; i < lengthDoc; i++) { | |
65ec6247 RD |
659 | char ch = chNext; |
660 | chNext = styler.SafeGetCharAt(i + 1); | |
661 | char chNext2 = styler.SafeGetCharAt(i + 2); | |
662 | ||
1e9bafca RD |
663 | if (styler.IsLeadByte(ch)) { |
664 | chNext = chNext2; | |
65ec6247 | 665 | chPrev = ' '; |
65ec6247 RD |
666 | i += 1; |
667 | continue; | |
668 | } | |
1e9bafca RD |
669 | |
670 | // skip on DOS/Windows | |
671 | //No, don't, because some things will get tagged on, | |
672 | // so we won't recognize keywords, for example | |
673 | #if 0 | |
674 | if (ch == '\r' && chNext == '\n') { | |
675 | continue; | |
676 | } | |
677 | #endif | |
678 | ||
679 | if (HereDoc.State == 1 && isEOLChar(ch)) { | |
680 | // Begin of here-doc (the line after the here-doc delimiter): | |
681 | HereDoc.State = 2; | |
682 | styler.ColourTo(i-1, state); | |
683 | // Don't check for a missing quote, just jump into | |
684 | // the here-doc state | |
685 | state = SCE_RB_HERE_Q; | |
686 | } | |
65ec6247 | 687 | |
1e9bafca RD |
688 | // Regular transitions |
689 | if (state == SCE_RB_DEFAULT) { | |
690 | if (isSafeDigit(ch)) { | |
691 | styler.ColourTo(i - 1, state); | |
692 | state = SCE_RB_NUMBER; | |
693 | numDots = 0; | |
694 | } else if (isHighBitChar(ch) || iswordstart(ch)) { | |
695 | styler.ColourTo(i - 1, state); | |
696 | state = SCE_RB_WORD; | |
65ec6247 RD |
697 | } else if (ch == '#') { |
698 | styler.ColourTo(i - 1, state); | |
1e9bafca RD |
699 | state = SCE_RB_COMMENTLINE; |
700 | } else if (ch == '=') { | |
65ec6247 | 701 | // =begin indicates the start of a comment (doc) block |
1e9bafca RD |
702 | if (i == 0 || isEOLChar(chPrev) |
703 | && chNext == 'b' | |
704 | && styler.SafeGetCharAt(i + 2) == 'e' | |
705 | && styler.SafeGetCharAt(i + 3) == 'g' | |
706 | && styler.SafeGetCharAt(i + 4) == 'i' | |
707 | && styler.SafeGetCharAt(i + 5) == 'n' | |
708 | && !isSafeWordcharOrHigh(styler.SafeGetCharAt(i + 6))) { | |
709 | styler.ColourTo(i - 1, state); | |
710 | state = SCE_RB_POD; | |
711 | } else { | |
65ec6247 | 712 | styler.ColourTo(i - 1, state); |
1e9bafca RD |
713 | styler.ColourTo(i, SCE_RB_OPERATOR); |
714 | preferRE = true; | |
65ec6247 | 715 | } |
1e9bafca | 716 | } else if (ch == '"') { |
65ec6247 | 717 | styler.ColourTo(i - 1, state); |
1e9bafca RD |
718 | state = SCE_RB_STRING; |
719 | Quote.New(); | |
720 | Quote.Open(ch); | |
721 | } else if (ch == '\'') { | |
722 | styler.ColourTo(i - 1, state); | |
723 | state = SCE_RB_CHARACTER; | |
724 | Quote.New(); | |
725 | Quote.Open(ch); | |
726 | } else if (ch == '`') { | |
65ec6247 | 727 | styler.ColourTo(i - 1, state); |
1e9bafca RD |
728 | state = SCE_RB_BACKTICKS; |
729 | Quote.New(); | |
730 | Quote.Open(ch); | |
731 | } else if (ch == '@') { | |
732 | // Instance or class var | |
733 | styler.ColourTo(i - 1, state); | |
734 | if (chNext == '@') { | |
735 | state = SCE_RB_CLASS_VAR; | |
736 | advance_char(i, ch, chNext, chNext2); // pass by ref | |
737 | } else { | |
738 | state = SCE_RB_INSTANCE_VAR; | |
739 | } | |
740 | } else if (ch == '$') { | |
741 | // Check for a builtin global | |
742 | styler.ColourTo(i - 1, state); | |
743 | // Recognize it bit by bit | |
744 | state = SCE_RB_GLOBAL; | |
745 | } else if (ch == '/' && preferRE) { | |
746 | // Ambigous operator | |
747 | styler.ColourTo(i - 1, state); | |
748 | state = SCE_RB_REGEX; | |
749 | Quote.New(); | |
750 | Quote.Open(ch); | |
751 | } else if (ch == '<' && chNext == '<' && chNext2 != '=') { | |
752 | ||
753 | // Recognise the '<<' symbol - either a here document or a binary op | |
754 | styler.ColourTo(i - 1, state); | |
755 | i++; | |
756 | chNext = chNext2; | |
757 | styler.ColourTo(i, SCE_RB_OPERATOR); | |
758 | ||
759 | if (! (strchr("\"\'`_-", chNext2) || isSafeAlpha(chNext2))) { | |
760 | // It's definitely not a here-doc, | |
761 | // based on Ruby's lexer/parser in the | |
762 | // heredoc_identifier routine. | |
763 | // Nothing else to do. | |
764 | } else if (preferRE) { | |
765 | if (sureThisIsHeredoc(i - 1, styler, prevWord)) { | |
766 | state = SCE_RB_HERE_DELIM; | |
767 | HereDoc.State = 0; | |
768 | } | |
769 | // else leave it in default state | |
770 | } else { | |
771 | if (sureThisIsNotHeredoc(i - 1, styler)) { | |
772 | // leave state as default | |
773 | // We don't have all the heuristics Perl has for indications | |
774 | // of a here-doc, because '<<' is overloadable and used | |
775 | // for so many other classes. | |
776 | } else { | |
777 | state = SCE_RB_HERE_DELIM; | |
778 | HereDoc.State = 0; | |
779 | } | |
780 | } | |
781 | preferRE = (state != SCE_RB_HERE_DELIM); | |
782 | } else if (ch == ':') { | |
783 | styler.ColourTo(i - 1, state); | |
784 | if (chNext == ':') { | |
785 | // Mark "::" as an operator, not symbol start | |
786 | styler.ColourTo(i + 1, SCE_RB_OPERATOR); | |
787 | advance_char(i, ch, chNext, chNext2); // pass by ref | |
788 | state = SCE_RB_DEFAULT; | |
789 | preferRE = false; | |
790 | } else if (isSafeWordcharOrHigh(chNext)) { | |
791 | state = SCE_RB_SYMBOL; | |
792 | } else if (strchr("[*!~+-*/%=<>&^|", chNext)) { | |
793 | // Do the operator analysis in-line, looking ahead | |
794 | // Based on the table in pickaxe 2nd ed., page 339 | |
795 | bool doColoring = true; | |
796 | switch (chNext) { | |
797 | case '[': | |
798 | if (chNext2 == ']' ) { | |
799 | char ch_tmp = styler.SafeGetCharAt(i + 3); | |
800 | if (ch_tmp == '=') { | |
801 | i += 3; | |
802 | ch = ch_tmp; | |
803 | chNext = styler.SafeGetCharAt(i + 1); | |
804 | } else { | |
805 | i += 2; | |
806 | ch = chNext2; | |
807 | chNext = ch_tmp; | |
808 | } | |
809 | } else { | |
810 | doColoring = false; | |
811 | } | |
812 | break; | |
813 | ||
814 | case '*': | |
815 | if (chNext2 == '*') { | |
816 | i += 2; | |
817 | ch = chNext2; | |
818 | chNext = styler.SafeGetCharAt(i + 1); | |
819 | } else { | |
820 | advance_char(i, ch, chNext, chNext2); | |
821 | } | |
822 | break; | |
823 | ||
824 | case '!': | |
825 | if (chNext2 == '=' || chNext2 == '~') { | |
826 | i += 2; | |
827 | ch = chNext2; | |
828 | chNext = styler.SafeGetCharAt(i + 1); | |
829 | } else { | |
830 | advance_char(i, ch, chNext, chNext2); | |
831 | } | |
832 | break; | |
833 | ||
834 | case '<': | |
835 | if (chNext2 == '<') { | |
836 | i += 2; | |
837 | ch = chNext2; | |
838 | chNext = styler.SafeGetCharAt(i + 1); | |
839 | } else if (chNext2 == '=') { | |
840 | char ch_tmp = styler.SafeGetCharAt(i + 3); | |
841 | if (ch_tmp == '>') { // <=> operator | |
842 | i += 3; | |
843 | ch = ch_tmp; | |
844 | chNext = styler.SafeGetCharAt(i + 1); | |
845 | } else { | |
846 | i += 2; | |
847 | ch = chNext2; | |
848 | chNext = ch_tmp; | |
849 | } | |
850 | } else { | |
851 | advance_char(i, ch, chNext, chNext2); | |
852 | } | |
853 | break; | |
854 | ||
855 | default: | |
856 | // Simple one-character operators | |
857 | advance_char(i, ch, chNext, chNext2); | |
858 | break; | |
859 | } | |
860 | if (doColoring) { | |
861 | styler.ColourTo(i, SCE_RB_SYMBOL); | |
862 | state = SCE_RB_DEFAULT; | |
863 | } | |
864 | } else if (!preferRE) { | |
865 | // Don't color symbol strings (yet) | |
866 | // Just color the ":" and color rest as string | |
867 | styler.ColourTo(i, SCE_RB_SYMBOL); | |
868 | state = SCE_RB_DEFAULT; | |
869 | } else { | |
870 | styler.ColourTo(i, SCE_RB_OPERATOR); | |
871 | state = SCE_RB_DEFAULT; | |
872 | preferRE = true; | |
873 | } | |
874 | } else if (ch == '%') { | |
875 | styler.ColourTo(i - 1, state); | |
876 | bool have_string = false; | |
877 | if (strchr(q_chars, chNext) && !isSafeWordcharOrHigh(chNext2)) { | |
878 | Quote.New(); | |
879 | const char *hit = strchr(q_chars, chNext); | |
880 | if (hit != NULL) { | |
881 | state = q_states[hit - q_chars]; | |
882 | Quote.Open(chNext2); | |
883 | i += 2; | |
884 | ch = chNext2; | |
65ec6247 | 885 | chNext = styler.SafeGetCharAt(i + 1); |
1e9bafca RD |
886 | have_string = true; |
887 | } | |
888 | } else if (!isSafeWordcharOrHigh(chNext)) { | |
889 | // Ruby doesn't allow high bit chars here, | |
890 | // but the editor host might | |
891 | state = SCE_RB_STRING_QQ; | |
892 | Quote.Open(chNext); | |
893 | advance_char(i, ch, chNext, chNext2); // pass by ref | |
894 | have_string = true; | |
895 | } | |
896 | if (!have_string) { | |
897 | styler.ColourTo(i, SCE_RB_OPERATOR); | |
898 | // stay in default | |
899 | preferRE = true; | |
900 | } | |
901 | } else if (isoperator(ch) || ch == '.') { | |
902 | styler.ColourTo(i - 1, state); | |
903 | styler.ColourTo(i, SCE_RB_OPERATOR); | |
904 | // If we're ending an expression or block, | |
905 | // assume it ends an object, and the ambivalent | |
906 | // constructs are binary operators | |
907 | // | |
908 | // So if we don't have one of these chars, | |
909 | // we aren't ending an object exp'n, and ops | |
910 | // like : << / are unary operators. | |
911 | ||
912 | preferRE = (strchr(")}].", ch) == NULL); | |
913 | // Stay in default state | |
914 | } else if (isEOLChar(ch)) { | |
915 | // Make sure it's a true line-end, with no backslash | |
916 | if ((ch == '\r' || (ch == '\n' && chPrev != '\r')) | |
917 | && chPrev != '\\') { | |
918 | // Assume we've hit the end of the statement. | |
919 | preferRE = true; | |
920 | } | |
921 | } | |
922 | } else if (state == SCE_RB_WORD) { | |
923 | if (ch == '.' || !isSafeWordcharOrHigh(ch)) { | |
924 | // Words include x? in all contexts, | |
925 | // and <letters>= after either 'def' or a dot | |
926 | // Move along until a complete word is on our left | |
927 | ||
928 | // Default accessor treats '.' as word-chars, | |
929 | // but we don't for now. | |
930 | ||
931 | if (ch == '=' | |
932 | && isSafeWordcharOrHigh(chPrev) | |
933 | && (chNext == '(' | |
934 | || strchr(" \t\n\r", chNext) != NULL) | |
935 | && (!strcmp(prevWord, "def") | |
936 | || followsDot(styler.GetStartSegment(), styler))) { | |
937 | // <name>= is a name only when being def'd -- Get it the next time | |
938 | // This means that <name>=<name> is always lexed as | |
939 | // <name>, (op, =), <name> | |
940 | } else if ((ch == '?' || ch == '!') | |
941 | && isSafeWordcharOrHigh(chPrev) | |
942 | && !isSafeWordcharOrHigh(chNext)) { | |
943 | // <name>? is a name -- Get it the next time | |
944 | // But <name>?<name> is always lexed as | |
945 | // <name>, (op, ?), <name> | |
946 | // Same with <name>! to indicate a method that | |
947 | // modifies its target | |
948 | } else if (isEOLChar(ch) | |
949 | && isMatch(styler, lengthDoc, i - 7, "__END__")) { | |
950 | styler.ColourTo(i, SCE_RB_DATASECTION); | |
951 | state = SCE_RB_DATASECTION; | |
952 | // No need to handle this state -- we'll just move to the end | |
953 | preferRE = false; | |
954 | } else { | |
955 | int wordStartPos = styler.GetStartSegment(); | |
956 | int word_style = ClassifyWordRb(wordStartPos, i - 1, keywords, styler, prevWord); | |
957 | switch (word_style) { | |
958 | case SCE_RB_WORD: | |
959 | preferRE = RE_CanFollowKeyword(prevWord); | |
960 | break; | |
961 | ||
962 | case SCE_RB_WORD_DEMOTED: | |
963 | preferRE = true; | |
964 | break; | |
965 | ||
966 | case SCE_RB_IDENTIFIER: | |
967 | if (isMatch(styler, lengthDoc, wordStartPos, "print")) { | |
968 | preferRE = true; | |
969 | } else if (isEOLChar(ch)) { | |
970 | preferRE = true; | |
971 | } else { | |
972 | preferRE = false; | |
973 | } | |
974 | break; | |
975 | default: | |
976 | preferRE = false; | |
977 | } | |
978 | if (ch == '.') { | |
979 | // We might be redefining an operator-method | |
980 | preferRE = false; | |
981 | } | |
982 | // And if it's the first | |
983 | redo_char(i, ch, chNext, chNext2, state); // pass by ref | |
984 | } | |
985 | } | |
986 | } else if (state == SCE_RB_NUMBER) { | |
987 | if (isSafeAlnumOrHigh(ch) || ch == '_') { | |
988 | // Keep going | |
989 | } else if (ch == '.' && ++numDots == 1) { | |
990 | // Keep going | |
991 | } else { | |
992 | styler.ColourTo(i - 1, state); | |
993 | redo_char(i, ch, chNext, chNext2, state); // pass by ref | |
994 | preferRE = false; | |
995 | } | |
996 | } else if (state == SCE_RB_COMMENTLINE) { | |
997 | if (isEOLChar(ch)) { | |
998 | styler.ColourTo(i - 1, state); | |
999 | state = SCE_RB_DEFAULT; | |
1000 | // Use whatever setting we had going into the comment | |
1001 | } | |
1002 | } else if (state == SCE_RB_HERE_DELIM) { | |
1003 | // See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx | |
1004 | // Slightly different: if we find an immediate '-', | |
1005 | // the target can appear indented. | |
1006 | ||
1007 | if (HereDoc.State == 0) { // '<<' encountered | |
1008 | HereDoc.State = 1; | |
1009 | HereDoc.DelimiterLength = 0; | |
1010 | if (ch == '-') { | |
1011 | HereDoc.CanBeIndented = true; | |
1012 | advance_char(i, ch, chNext, chNext2); // pass by ref | |
1013 | } else { | |
1014 | HereDoc.CanBeIndented = false; | |
1015 | } | |
1016 | if (isEOLChar(ch)) { | |
1017 | // Bail out of doing a here doc if there's no target | |
1018 | state = SCE_RB_DEFAULT; | |
1019 | preferRE = false; | |
1020 | } else { | |
1021 | HereDoc.Quote = ch; | |
1022 | ||
1023 | if (ch == '\'' || ch == '"' || ch == '`') { | |
1024 | HereDoc.Quoted = true; | |
1025 | HereDoc.Delimiter[0] = '\0'; | |
1026 | } else { | |
1027 | HereDoc.Quoted = false; | |
1028 | HereDoc.Delimiter[0] = ch; | |
1029 | HereDoc.Delimiter[1] = '\0'; | |
1030 | HereDoc.DelimiterLength = 1; | |
1031 | } | |
1032 | } | |
1033 | } else if (HereDoc.State == 1) { // collect the delimiter | |
1034 | if (isEOLChar(ch)) { | |
1035 | // End the quote now, and go back for more | |
1036 | styler.ColourTo(i - 1, state); | |
1037 | state = SCE_RB_DEFAULT; | |
1038 | i--; | |
1039 | chNext = ch; | |
1040 | chNext2 = chNext; | |
1041 | preferRE = false; | |
1042 | } else if (HereDoc.Quoted) { | |
1043 | if (ch == HereDoc.Quote) { // closing quote => end of delimiter | |
1044 | styler.ColourTo(i, state); | |
1045 | state = SCE_RB_DEFAULT; | |
1046 | preferRE = false; | |
1047 | } else { | |
1048 | if (ch == '\\' && !isEOLChar(chNext)) { | |
1049 | advance_char(i, ch, chNext, chNext2); | |
1050 | } | |
1051 | HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch; | |
1052 | HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; | |
1053 | } | |
1054 | } else { // an unquoted here-doc delimiter | |
1055 | if (isSafeAlnumOrHigh(ch) || ch == '_') { | |
1056 | HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch; | |
1057 | HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; | |
1058 | } else { | |
1059 | styler.ColourTo(i - 1, state); | |
1060 | redo_char(i, ch, chNext, chNext2, state); | |
1061 | preferRE = false; | |
65ec6247 | 1062 | } |
1e9bafca RD |
1063 | } |
1064 | if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) { | |
65ec6247 | 1065 | styler.ColourTo(i - 1, state); |
1e9bafca RD |
1066 | state = SCE_RB_ERROR; |
1067 | preferRE = false; | |
65ec6247 | 1068 | } |
1e9bafca RD |
1069 | } |
1070 | } else if (state == SCE_RB_HERE_Q) { | |
1071 | // Not needed: HereDoc.State == 2 | |
1072 | // Indentable here docs: look backwards | |
1073 | // Non-indentable: look forwards, like in Perl | |
1074 | // | |
1075 | // Why: so we can quickly resolve things like <<-" abc" | |
1076 | ||
1077 | if (!HereDoc.CanBeIndented) { | |
1078 | if (isEOLChar(chPrev) | |
1079 | && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) { | |
1080 | styler.ColourTo(i - 1, state); | |
1081 | i += HereDoc.DelimiterLength - 1; | |
1082 | chNext = styler.SafeGetCharAt(i + 1); | |
1083 | if (isEOLChar(chNext)) { | |
1084 | styler.ColourTo(i, SCE_RB_HERE_DELIM); | |
1085 | state = SCE_RB_DEFAULT; | |
1086 | HereDoc.State = 0; | |
1087 | preferRE = false; | |
1088 | } | |
1089 | // Otherwise we skipped through the here doc faster. | |
1090 | } | |
1091 | } else if (isEOLChar(chNext) | |
1092 | && lookingAtHereDocDelim(styler, | |
1093 | i - HereDoc.DelimiterLength + 1, | |
1094 | lengthDoc, | |
1095 | HereDoc.Delimiter)) { | |
1096 | styler.ColourTo(i - 1 - HereDoc.DelimiterLength, state); | |
1097 | styler.ColourTo(i, SCE_RB_HERE_DELIM); | |
1098 | state = SCE_RB_DEFAULT; | |
1099 | preferRE = false; | |
1100 | HereDoc.State = 0; | |
1101 | } | |
1102 | } else if (state == SCE_RB_CLASS_VAR | |
1103 | || state == SCE_RB_INSTANCE_VAR | |
1104 | || state == SCE_RB_SYMBOL) { | |
1105 | if (!isSafeWordcharOrHigh(ch)) { | |
1106 | styler.ColourTo(i - 1, state); | |
1107 | redo_char(i, ch, chNext, chNext2, state); // pass by ref | |
1108 | preferRE = false; | |
1109 | } | |
1110 | } else if (state == SCE_RB_GLOBAL) { | |
1111 | if (!isSafeWordcharOrHigh(ch)) { | |
1112 | // handle special globals here as well | |
1113 | if (chPrev == '$') { | |
1114 | if (ch == '-') { | |
1115 | // Include the next char, like $-a | |
1116 | advance_char(i, ch, chNext, chNext2); | |
1117 | } | |
1118 | styler.ColourTo(i, state); | |
1119 | state = SCE_RB_DEFAULT; | |
1120 | } else { | |
1121 | styler.ColourTo(i - 1, state); | |
1122 | redo_char(i, ch, chNext, chNext2, state); // pass by ref | |
1123 | } | |
1124 | preferRE = false; | |
1125 | } | |
1126 | } else if (state == SCE_RB_POD) { | |
1127 | // PODs end with ^=end\s, -- any whitespace can follow =end | |
1128 | if (strchr(" \t\n\r", ch) != NULL | |
1129 | && i > 5 | |
1130 | && isEOLChar(styler[i - 5]) | |
1131 | && isMatch(styler, lengthDoc, i - 4, "=end")) { | |
1132 | styler.ColourTo(i - 1, state); | |
1133 | state = SCE_RB_DEFAULT; | |
1134 | preferRE = false; | |
1135 | } | |
1136 | } else if (state == SCE_RB_REGEX || state == SCE_RB_STRING_QR) { | |
1137 | if (ch == '\\' && Quote.Up != '\\') { | |
1138 | // Skip one | |
1139 | advance_char(i, ch, chNext, chNext2); | |
1140 | } else if (ch == Quote.Down) { | |
1141 | Quote.Count--; | |
1142 | if (Quote.Count == 0) { | |
1143 | // Include the options | |
1144 | while (isSafeAlpha(chNext)) { | |
1145 | i++; | |
65ec6247 | 1146 | ch = chNext; |
1e9bafca RD |
1147 | chNext = styler.SafeGetCharAt(i + 1); |
1148 | } | |
1149 | styler.ColourTo(i, state); | |
1150 | state = SCE_RB_DEFAULT; | |
1151 | preferRE = false; | |
1152 | } | |
1153 | } else if (ch == Quote.Up) { | |
1154 | // Only if close quoter != open quoter | |
1155 | Quote.Count++; | |
1156 | ||
1157 | } else if (ch == '#' ) { | |
1158 | //todo: distinguish comments from pound chars | |
1159 | // for now, handle as comment | |
1160 | styler.ColourTo(i - 1, state); | |
1161 | bool inEscape = false; | |
1162 | while (++i < lengthDoc) { | |
1163 | ch = styler.SafeGetCharAt(i); | |
1164 | if (ch == '\\') { | |
1165 | inEscape = true; | |
1166 | } else if (isEOLChar(ch)) { | |
1167 | // Comment inside a regex | |
1168 | styler.ColourTo(i - 1, SCE_RB_COMMENTLINE); | |
1169 | break; | |
1170 | } else if (inEscape) { | |
1171 | inEscape = false; // don't look at char | |
1172 | } else if (ch == Quote.Down) { | |
1173 | // Have the regular handler deal with this | |
1174 | // to get trailing modifiers. | |
1175 | i--; | |
1176 | ch = styler[i]; | |
1177 | break; | |
1178 | } | |
1179 | } | |
1180 | chNext = styler.SafeGetCharAt(i + 1); | |
1181 | chNext2 = styler.SafeGetCharAt(i + 2); | |
1182 | } | |
1183 | // Quotes of all kinds... | |
1184 | } else if (state == SCE_RB_STRING_Q || state == SCE_RB_STRING_QQ || | |
1185 | state == SCE_RB_STRING_QX || state == SCE_RB_STRING_QW || | |
1186 | state == SCE_RB_STRING || state == SCE_RB_CHARACTER || | |
1187 | state == SCE_RB_BACKTICKS) { | |
1188 | if (!Quote.Down && !isspacechar(ch)) { | |
1189 | Quote.Open(ch); | |
1190 | } else if (ch == '\\' && Quote.Up != '\\') { | |
1191 | //Riddle me this: Is it safe to skip *every* escaped char? | |
1192 | advance_char(i, ch, chNext, chNext2); | |
1193 | } else if (ch == Quote.Down) { | |
1194 | Quote.Count--; | |
1195 | if (Quote.Count == 0) { | |
1196 | styler.ColourTo(i, state); | |
1197 | state = SCE_RB_DEFAULT; | |
1198 | preferRE = false; | |
1199 | } | |
1200 | } else if (ch == Quote.Up) { | |
1201 | Quote.Count++; | |
1202 | } | |
1203 | } | |
1204 | ||
1205 | if (state == SCE_RB_ERROR) { | |
1206 | break; | |
1207 | } | |
1208 | chPrev = ch; | |
1209 | } | |
1210 | if (state == SCE_RB_WORD) { | |
1211 | // We've ended on a word, possibly at EOF, and need to | |
1212 | // classify it. | |
1213 | (void) ClassifyWordRb(styler.GetStartSegment(), lengthDoc - 1, keywords, styler, prevWord); | |
1214 | } else { | |
1215 | styler.ColourTo(lengthDoc - 1, state); | |
1216 | } | |
1217 | } | |
1218 | ||
1219 | // Helper functions for folding, disambiguation keywords | |
1220 | // Assert that there are no high-bit chars | |
1221 | ||
1222 | static void getPrevWord(int pos, | |
1223 | char *prevWord, | |
1224 | Accessor &styler, | |
1225 | int word_state) | |
1226 | { | |
1227 | int i; | |
1228 | styler.Flush(); | |
1229 | for (i = pos - 1; i > 0; i--) { | |
1230 | if (actual_style(styler.StyleAt(i)) != word_state) { | |
1231 | i++; | |
1232 | break; | |
1233 | } | |
1234 | } | |
1235 | if (i < pos - MAX_KEYWORD_LENGTH) // overflow | |
1236 | i = pos - MAX_KEYWORD_LENGTH; | |
1237 | char *dst = prevWord; | |
1238 | for (; i <= pos; i++) { | |
1239 | *dst++ = styler[i]; | |
1240 | } | |
1241 | *dst = 0; | |
1242 | } | |
1243 | ||
1244 | static bool keywordIsAmbiguous(const char *prevWord) | |
1245 | { | |
1246 | // Order from most likely used to least likely | |
1247 | // Lots of ways to do a loop in Ruby besides 'while/until' | |
1248 | if (!strcmp(prevWord, "if") | |
1249 | || !strcmp(prevWord, "do") | |
1250 | || !strcmp(prevWord, "while") | |
1251 | || !strcmp(prevWord, "unless") | |
1252 | || !strcmp(prevWord, "until")) { | |
1253 | return true; | |
1254 | } else { | |
1255 | return false; | |
1256 | } | |
1257 | } | |
1258 | ||
1259 | // Demote keywords in the following conditions: | |
1260 | // if, while, unless, until modify a statement | |
1261 | // do after a while or until, as a noise word (like then after if) | |
1262 | ||
1263 | static bool keywordIsModifier(const char *word, | |
1264 | int pos, | |
1265 | Accessor &styler) | |
1266 | { | |
1267 | if (word[0] == 'd' && word[1] == 'o' && !word[2]) { | |
1268 | return keywordDoStartsLoop(pos, styler); | |
1269 | } | |
1270 | char ch; | |
1271 | int style = SCE_RB_DEFAULT; | |
1272 | int lineStart = styler.GetLine(pos); | |
1273 | int lineStartPosn = styler.LineStart(lineStart); | |
1274 | styler.Flush(); | |
1275 | while (--pos >= lineStartPosn) { | |
1276 | style = actual_style(styler.StyleAt(pos)); | |
1277 | if (style == SCE_RB_DEFAULT) { | |
1278 | if (iswhitespace(ch = styler[pos])) { | |
1279 | //continue | |
1280 | } else if (ch == '\r' || ch == '\n') { | |
1281 | // Scintilla's LineStart() and GetLine() routines aren't | |
1282 | // platform-independent, so if we have text prepared with | |
1283 | // a different system we can't rely on it. | |
1284 | return false; | |
65ec6247 | 1285 | } |
1e9bafca RD |
1286 | } else { |
1287 | break; | |
65ec6247 | 1288 | } |
1e9bafca RD |
1289 | } |
1290 | if (pos < lineStartPosn) { | |
1291 | return false; //XXX not quite right if the prev line is a continuation | |
1292 | } | |
1293 | // First things where the action is unambiguous | |
1294 | switch (style) { | |
1295 | case SCE_RB_DEFAULT: | |
1296 | case SCE_RB_COMMENTLINE: | |
1297 | case SCE_RB_POD: | |
1298 | case SCE_RB_CLASSNAME: | |
1299 | case SCE_RB_DEFNAME: | |
1300 | case SCE_RB_MODULE_NAME: | |
1301 | return false; | |
1302 | case SCE_RB_OPERATOR: | |
1303 | break; | |
1304 | case SCE_RB_WORD: | |
1305 | // Watch out for uses of 'else if' | |
1306 | //XXX: Make a list of other keywords where 'if' isn't a modifier | |
1307 | // and can appear legitimately | |
1308 | // Formulate this to avoid warnings from most compilers | |
1309 | if (strcmp(word, "if") == 0) { | |
1310 | char prevWord[MAX_KEYWORD_LENGTH + 1]; | |
1311 | getPrevWord(pos, prevWord, styler, SCE_RB_WORD); | |
1312 | return strcmp(prevWord, "else") != 0; | |
1313 | } | |
1314 | return true; | |
1315 | default: | |
1316 | return true; | |
1317 | } | |
1318 | // Assume that if the keyword follows an operator, | |
1319 | // usually it's a block assignment, like | |
1320 | // a << if x then y else z | |
1321 | ||
1322 | ch = styler[pos]; | |
1323 | switch (ch) { | |
1324 | case ')': | |
1325 | case ']': | |
1326 | case '}': | |
1327 | return true; | |
1328 | default: | |
1329 | return false; | |
1330 | } | |
65ec6247 RD |
1331 | } |
1332 | ||
1e9bafca RD |
1333 | #define WHILE_BACKWARDS "elihw" |
1334 | #define UNTIL_BACKWARDS "litnu" | |
1335 | ||
1336 | // Nothing fancy -- look to see if we follow a while/until somewhere | |
1337 | // on the current line | |
1338 | ||
1339 | static bool keywordDoStartsLoop(int pos, | |
1340 | Accessor &styler) | |
1341 | { | |
1342 | char ch; | |
1343 | int style; | |
1344 | int lineStart = styler.GetLine(pos); | |
1345 | int lineStartPosn = styler.LineStart(lineStart); | |
1346 | styler.Flush(); | |
1347 | while (--pos >= lineStartPosn) { | |
1348 | style = actual_style(styler.StyleAt(pos)); | |
1349 | if (style == SCE_RB_DEFAULT) { | |
1350 | if ((ch = styler[pos]) == '\r' || ch == '\n') { | |
1351 | // Scintilla's LineStart() and GetLine() routines aren't | |
1352 | // platform-independent, so if we have text prepared with | |
1353 | // a different system we can't rely on it. | |
1354 | return false; | |
1355 | } | |
1356 | } else if (style == SCE_RB_WORD) { | |
1357 | // Check for while or until, but write the word in backwards | |
1358 | char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero | |
1359 | char *dst = prevWord; | |
1360 | int wordLen = 0; | |
1361 | int start_word; | |
1362 | for (start_word = pos; | |
1363 | start_word >= lineStartPosn && actual_style(styler.StyleAt(start_word)) == SCE_RB_WORD; | |
1364 | start_word--) { | |
1365 | if (++wordLen < MAX_KEYWORD_LENGTH) { | |
1366 | *dst++ = styler[start_word]; | |
1367 | } | |
1368 | } | |
1369 | *dst = 0; | |
1370 | // Did we see our keyword? | |
1371 | if (!strcmp(prevWord, WHILE_BACKWARDS) | |
1372 | || !strcmp(prevWord, UNTIL_BACKWARDS)) { | |
1373 | return true; | |
1374 | } | |
1375 | // We can move pos to the beginning of the keyword, and then | |
1376 | // accept another decrement, as we can never have two contiguous | |
1377 | // keywords: | |
1378 | // word1 word2 | |
1379 | // ^ | |
1380 | // <- move to start_word | |
1381 | // ^ | |
1382 | // <- loop decrement | |
1383 | // ^ # pointing to end of word1 is fine | |
1384 | pos = start_word; | |
1385 | } | |
1386 | } | |
1387 | return false; | |
1388 | } | |
1389 | ||
1390 | /* | |
1391 | * Folding Ruby | |
1392 | * | |
1393 | * The language is quite complex to analyze without a full parse. | |
1394 | * For example, this line shouldn't affect fold level: | |
1395 | * | |
1396 | * print "hello" if feeling_friendly? | |
1397 | * | |
1398 | * Neither should this: | |
1399 | * | |
1400 | * print "hello" \ | |
1401 | * if feeling_friendly? | |
1402 | * | |
1403 | * | |
1404 | * But this should: | |
1405 | * | |
1406 | * if feeling_friendly? #++ | |
1407 | * print "hello" \ | |
1408 | * print "goodbye" | |
1409 | * end #-- | |
1410 | * | |
1411 | * So we cheat, by actually looking at the existing indentation | |
1412 | * levels for each line, and just echoing it back. Like Python. | |
1413 | * Then if we get better at it, we'll take braces into consideration, | |
1414 | * which always affect folding levels. | |
1415 | ||
1416 | * How the keywords should work: | |
1417 | * No effect: | |
1418 | * __FILE__ __LINE__ BEGIN END alias and | |
1419 | * defined? false in nil not or self super then | |
1420 | * true undef | |
1421 | ||
1422 | * Always increment: | |
1423 | * begin class def do for module when { | |
1424 | * | |
1425 | * Always decrement: | |
1426 | * end } | |
1427 | * | |
1428 | * Increment if these start a statement | |
1429 | * if unless until while -- do nothing if they're modifiers | |
65ec6247 | 1430 | |
1e9bafca RD |
1431 | * These end a block if there's no modifier, but don't bother |
1432 | * break next redo retry return yield | |
1433 | * | |
1434 | * These temporarily de-indent, but re-indent | |
1435 | * case else elsif ensure rescue | |
1436 | * | |
1437 | * This means that the folder reflects indentation rather | |
1438 | * than setting it. The language-service updates indentation | |
1439 | * when users type return and finishes entering de-denters. | |
1440 | * | |
1441 | * Later offer to fold POD, here-docs, strings, and blocks of comments | |
1442 | */ | |
1443 | ||
1444 | static void FoldRbDoc(unsigned int startPos, int length, int initStyle, | |
1445 | WordList *[], Accessor &styler) { | |
1446 | const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0; | |
1447 | bool foldComment = styler.GetPropertyInt("fold.comment") != 0; | |
1448 | ||
1449 | synchronizeDocStart(startPos, length, initStyle, styler, // ref args | |
1450 | false); | |
1451 | unsigned int endPos = startPos + length; | |
1452 | int visibleChars = 0; | |
65ec6247 | 1453 | int lineCurrent = styler.GetLine(startPos); |
1e9bafca RD |
1454 | int levelPrev = startPos == 0 ? 0 : (styler.LevelAt(lineCurrent) |
1455 | & SC_FOLDLEVELNUMBERMASK | |
1456 | & ~SC_FOLDLEVELBASE); | |
1457 | int levelCurrent = levelPrev; | |
65ec6247 | 1458 | char chNext = styler[startPos]; |
1e9bafca RD |
1459 | int styleNext = styler.StyleAt(startPos); |
1460 | int stylePrev = startPos <= 1 ? SCE_RB_DEFAULT : styler.StyleAt(startPos - 1); | |
1461 | bool buffer_ends_with_eol = false; | |
1462 | for (unsigned int i = startPos; i < endPos; i++) { | |
65ec6247 RD |
1463 | char ch = chNext; |
1464 | chNext = styler.SafeGetCharAt(i + 1); | |
1e9bafca RD |
1465 | int style = styleNext; |
1466 | styleNext = styler.StyleAt(i + 1); | |
1467 | bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); | |
1468 | if (style == SCE_RB_COMMENTLINE) { | |
1469 | if (foldComment && stylePrev != SCE_RB_COMMENTLINE) { | |
1470 | if (chNext == '{') { | |
1471 | levelCurrent++; | |
1472 | } else if (chNext == '}') { | |
1473 | levelCurrent--; | |
65ec6247 | 1474 | } |
1e9bafca RD |
1475 | } |
1476 | } else if (style == SCE_RB_OPERATOR) { | |
1477 | if (strchr("[{(", ch)) { | |
1478 | levelCurrent++; | |
1479 | } else if (strchr(")}]", ch)) { | |
1480 | // Don't decrement below 0 | |
1481 | if (levelCurrent > 0) | |
1482 | levelCurrent--; | |
65ec6247 | 1483 | } |
1e9bafca RD |
1484 | } else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) { |
1485 | // Look at the keyword on the left and decide what to do | |
1486 | char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero | |
1487 | prevWord[0] = 0; | |
1488 | getPrevWord(i, prevWord, styler, SCE_RB_WORD); | |
1489 | if (!strcmp(prevWord, "end")) { | |
1490 | // Don't decrement below 0 | |
1491 | if (levelCurrent > 0) | |
1492 | levelCurrent--; | |
1493 | } else if ( !strcmp(prevWord, "if") | |
1494 | || !strcmp(prevWord, "def") | |
1495 | || !strcmp(prevWord, "class") | |
1496 | || !strcmp(prevWord, "module") | |
1497 | || !strcmp(prevWord, "begin") | |
1498 | || !strcmp(prevWord, "case") | |
1499 | || !strcmp(prevWord, "do") | |
1500 | || !strcmp(prevWord, "while") | |
1501 | || !strcmp(prevWord, "unless") | |
1502 | || !strcmp(prevWord, "until") | |
1503 | || !strcmp(prevWord, "for") | |
1504 | ) { | |
1505 | levelCurrent++; | |
1506 | } | |
1507 | } | |
1508 | if (atEOL) { | |
1509 | int lev = levelPrev; | |
1510 | if (visibleChars == 0 && foldCompact) | |
1511 | lev |= SC_FOLDLEVELWHITEFLAG; | |
1512 | if ((levelCurrent > levelPrev) && (visibleChars > 0)) | |
1513 | lev |= SC_FOLDLEVELHEADERFLAG; | |
1514 | styler.SetLevel(lineCurrent, lev|SC_FOLDLEVELBASE); | |
65ec6247 | 1515 | lineCurrent++; |
1e9bafca RD |
1516 | levelPrev = levelCurrent; |
1517 | visibleChars = 0; | |
1518 | buffer_ends_with_eol = true; | |
1519 | } else if (!isspacechar(ch)) { | |
1520 | visibleChars++; | |
1521 | buffer_ends_with_eol = false; | |
1522 | } | |
1523 | } | |
1524 | // Fill in the real level of the next line, keeping the current flags as they will be filled in later | |
1525 | if (!buffer_ends_with_eol) { | |
1526 | lineCurrent++; | |
1527 | int new_lev = levelCurrent; | |
1528 | if (visibleChars == 0 && foldCompact) | |
1529 | new_lev |= SC_FOLDLEVELWHITEFLAG; | |
1530 | if ((levelCurrent > levelPrev) && (visibleChars > 0)) | |
1531 | new_lev |= SC_FOLDLEVELHEADERFLAG; | |
1532 | levelCurrent = new_lev; | |
1533 | } | |
1534 | styler.SetLevel(lineCurrent, levelCurrent|SC_FOLDLEVELBASE); | |
65ec6247 | 1535 | } |
9e730a78 RD |
1536 | |
1537 | static const char * const rubyWordListDesc[] = { | |
1538 | "Keywords", | |
1539 | 0 | |
1540 | }; | |
1541 | ||
1542 | LexerModule lmRuby(SCLEX_RUBY, ColouriseRbDoc, "ruby", FoldRbDoc, rubyWordListDesc); |