contrib/src/stc/scintilla/src/LexRuby.cxx

   1 // Scintilla source code edit control
   2 /** @file LexRuby.cxx
   3  ** Lexer for Ruby.
   4  **/
   5 // Copyright 2001- by Clemens Wyss <wys@helbling.ch>
   6 // The License.txt file describes the conditions under which this software may be distributed.
   7
   8 #include <stdlib.h>
   9 #include <string.h>
  10 #include <ctype.h>
  11 #include <stdio.h>
  12 #include <stdarg.h>
  13
  14 #include "Platform.h"
  15
  16 #include "PropSet.h"
  17 #include "Accessor.h"
  18 #include "KeyWords.h"
  19 #include "Scintilla.h"
  20 #include "SciLexer.h"
  21
  22 #ifdef SCI_NAMESPACE
  23 using namespace Scintilla;
  24 #endif
  25
  26 //XXX Identical to Perl, put in common area
  27 static inline bool isEOLChar(char ch) {
  28         return (ch == '\r') || (ch == '\n');
  29 }
  30
  31 #define isSafeASCII(ch) ((unsigned int)(ch) <= 127)
  32 // This one's redundant, but makes for more readable code
  33 #define isHighBitChar(ch) ((unsigned int)(ch) > 127)
  34
  35 static inline bool isSafeAlpha(char ch) {
  36     return (isSafeASCII(ch) && isalpha(ch)) || ch == '_';
  37 }
  38
  39 static inline bool isSafeAlnum(char ch) {
  40     return (isSafeASCII(ch) && isalnum(ch)) || ch == '_';
  41 }
  42
  43 static inline bool isSafeAlnumOrHigh(char ch) {
  44     return isHighBitChar(ch) || isalnum(ch) || ch == '_';
  45 }
  46
  47 static inline bool isSafeDigit(char ch) {
  48     return isSafeASCII(ch) && isdigit(ch);
  49 }
  50
  51 static inline bool isSafeWordcharOrHigh(char ch) {
  52     return isHighBitChar(ch) || iswordchar(ch);
  53 }
  54
  55 static bool inline iswhitespace(char ch) {
  56         return ch == ' ' || ch == '\t';
  57 }
  58
  59 #define MAX_KEYWORD_LENGTH 200
  60
  61 #define STYLE_MASK 63
  62 #define actual_style(style) (style & STYLE_MASK)
  63
  64 static bool followsDot(unsigned int pos, Accessor &styler) {
  65     styler.Flush();
  66     for (; pos >= 1; --pos) {
  67         int style = actual_style(styler.StyleAt(pos));
  68         char ch;
  69         switch (style) {
  70             case SCE_RB_DEFAULT:
  71                 ch = styler[pos];
  72                 if (ch == ' ' || ch == '\t') {
  73                     //continue
  74                 } else {
  75                     return false;
  76                 }
  77                 break;
  78
  79             case SCE_RB_OPERATOR:
  80                 return styler[pos] == '.';
  81
  82             default:
  83                 return false;
  84         }
  85     }
  86     return false;
  87 }
  88
  89 // Forward declarations
  90 static bool keywordIsAmbiguous(const char *prevWord);
  91 static bool keywordDoStartsLoop(int pos,
  92                                 Accessor &styler);
  93 static bool keywordIsModifier(const char *word,
  94                               int pos,
  95                               Accessor &styler);
  96
  97 static int ClassifyWordRb(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord) {
  98         char s[100];
  99     unsigned int i, j;
 100         unsigned int lim = end - start + 1; // num chars to copy
 101         if (lim >= MAX_KEYWORD_LENGTH) {
 102                 lim = MAX_KEYWORD_LENGTH - 1;
 103         }
 104         for (i = start, j = 0; j < lim; i++, j++) {
 105                 s[j] = styler[i];
 106         }
 107     s[j] = '\0';
 108         int chAttr;
 109         if (0 == strcmp(prevWord, "class"))
 110                 chAttr = SCE_RB_CLASSNAME;
 111         else if (0 == strcmp(prevWord, "module"))
 112                 chAttr = SCE_RB_MODULE_NAME;
 113         else if (0 == strcmp(prevWord, "def"))
 114                 chAttr = SCE_RB_DEFNAME;
 115     else if (keywords.InList(s) && !followsDot(start - 1, styler)) {
 116         if (keywordIsAmbiguous(s)
 117             && keywordIsModifier(s, start, styler)) {
 118
 119             // Demoted keywords are colored as keywords,
 120             // but do not affect changes in indentation.
 121             //
 122             // Consider the word 'if':
 123             // 1. <<if test ...>> : normal
 124             // 2. <<stmt if test>> : demoted
 125             // 3. <<lhs = if ...>> : normal: start a new indent level
 126             // 4. <<obj.if = 10>> : color as identifer, since it follows '.'
 127
 128             chAttr = SCE_RB_WORD_DEMOTED;
 129         } else {
 130             chAttr = SCE_RB_WORD;
 131         }
 132         } else
 133         chAttr = SCE_RB_IDENTIFIER;
 134         styler.ColourTo(end, chAttr);
 135         if (chAttr == SCE_RB_WORD) {
 136                 strcpy(prevWord, s);
 137         } else {
 138                 prevWord[0] = 0;
 139         }
 140     return chAttr;
 141 }
 142
 143
 144 //XXX Identical to Perl, put in common area
 145 static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
 146         if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
 147                 return false;
 148         }
 149         while (*val) {
 150                 if (*val != styler[pos++]) {
 151                         return false;
 152                 }
 153                 val++;
 154         }
 155         return true;
 156 }
 157
 158 // Do Ruby better -- find the end of the line, work back,
 159 // and then check for leading white space
 160
 161 // Precondition: the here-doc target can be indented
 162 static bool lookingAtHereDocDelim(Accessor         &styler,
 163                                   int                   pos,
 164                                   int                   lengthDoc,
 165                                   const char   *HereDocDelim)
 166 {
 167     if (!isMatch(styler, lengthDoc, pos, HereDocDelim)) {
 168         return false;
 169     }
 170     while (--pos > 0) {
 171         char ch = styler[pos];
 172         if (isEOLChar(ch)) {
 173             return true;
 174         } else if (ch != ' ' && ch != '\t') {
 175             return false;
 176         }
 177     }
 178     return false;
 179 }
 180
 181 //XXX Identical to Perl, put in common area
 182 static char opposite(char ch) {
 183         if (ch == '(')
 184                 return ')';
 185         if (ch == '[')
 186                 return ']';
 187         if (ch == '{')
 188                 return '}';
 189         if (ch == '<')
 190                 return '>';
 191         return ch;
 192 }
 193
 194 // Null transitions when we see we've reached the end
 195 // and need to relex the curr char.
 196
 197 static void redo_char(int &i, char &ch, char &chNext, char &chNext2,
 198                       int &state) {
 199     i--;
 200     chNext2 = chNext;
 201     chNext = ch;
 202     state = SCE_RB_DEFAULT;
 203 }
 204
 205 static void advance_char(int &i, char &ch, char &chNext, char &chNext2) {
 206     i++;
 207     ch = chNext;
 208     chNext = chNext2;
 209 }
 210
 211 // precondition: startPos points to one after the EOL char
 212 static bool currLineContainsHereDelims(int& startPos,
 213                                        Accessor &styler) {
 214     if (startPos <= 1)
 215         return false;
 216
 217     int pos;
 218     for (pos = startPos - 1; pos > 0; pos--) {
 219         char ch = styler.SafeGetCharAt(pos);
 220         if (isEOLChar(ch)) {
 221             // Leave the pointers where they are -- there are no
 222             // here doc delims on the current line, even if
 223             // the EOL isn't default style
 224
 225             return false;
 226         } else {
 227             styler.Flush();
 228             if (actual_style(styler.StyleAt(pos)) == SCE_RB_HERE_DELIM) {
 229                 break;
 230             }
 231         }
 232     }
 233     if (pos == 0) {
 234         return false;
 235     }
 236     // Update the pointers so we don't have to re-analyze the string
 237     startPos = pos;
 238     return true;
 239 }
 240
 241
 242 static bool isEmptyLine(int pos,
 243                         Accessor &styler) {
 244         int spaceFlags = 0;
 245         int lineCurrent = styler.GetLine(pos);
 246         int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
 247     return (indentCurrent & SC_FOLDLEVELWHITEFLAG) != 0;
 248 }
 249
 250 static bool RE_CanFollowKeyword(const char *keyword) {
 251     if (!strcmp(keyword, "and")
 252         || !strcmp(keyword, "begin")
 253         || !strcmp(keyword, "break")
 254         || !strcmp(keyword, "case")
 255         || !strcmp(keyword, "do")
 256         || !strcmp(keyword, "else")
 257         || !strcmp(keyword, "elsif")
 258         || !strcmp(keyword, "if")
 259         || !strcmp(keyword, "next")
 260         || !strcmp(keyword, "return")
 261         || !strcmp(keyword, "when")
 262         || !strcmp(keyword, "unless")
 263         || !strcmp(keyword, "until")
 264         || !strcmp(keyword, "not")
 265         || !strcmp(keyword, "or")) {
 266         return true;
 267     }
 268     return false;
 269 }
 270
 271 // Look at chars up to but not including endPos
 272 // Don't look at styles in case we're looking forward
 273
 274 static int skipWhitespace(int startPos,
 275                            int endPos,
 276                            Accessor &styler) {
 277     for (int i = startPos; i < endPos; i++) {
 278         if (!iswhitespace(styler[i])) {
 279             return i;
 280         }
 281     }
 282     return endPos;
 283 }
 284
 285 // This routine looks for false positives like
 286 // undef foo, <<
 287 // There aren't too many.
 288 //
 289 // iPrev points to the start of <<
 290
 291 static bool sureThisIsHeredoc(int iPrev,
 292                               Accessor &styler,
 293                               char *prevWord) {
 294
 295     // Not so fast, since Ruby's so dynamic.  Check the context
 296     // to make sure we're OK.
 297     int prevStyle;
 298     int lineStart = styler.GetLine(iPrev);
 299     int lineStartPosn = styler.LineStart(lineStart);
 300     styler.Flush();
 301
 302     // Find the first word after some whitespace
 303     int firstWordPosn = skipWhitespace(lineStartPosn, iPrev, styler);
 304     if (firstWordPosn >= iPrev) {
 305         // Have something like {^     <<}
 306                 //XXX Look at the first previous non-comment non-white line
 307                 // to establish the context.  Not too likely though.
 308         return true;
 309     } else {
 310         switch (prevStyle = styler.StyleAt(firstWordPosn)) {
 311         case SCE_RB_WORD:
 312         case SCE_RB_WORD_DEMOTED:
 313         case SCE_RB_IDENTIFIER:
 314             break;
 315         default:
 316             return true;
 317         }
 318     }
 319     int firstWordEndPosn = firstWordPosn;
 320     char *dst = prevWord;
 321     for (;;) {
 322         if (firstWordEndPosn >= iPrev ||
 323             styler.StyleAt(firstWordEndPosn) != prevStyle) {
 324             *dst = 0;
 325             break;
 326         }
 327         *dst++ = styler[firstWordEndPosn];
 328         firstWordEndPosn += 1;
 329     }
 330     //XXX Write a style-aware thing to regex scintilla buffer objects
 331     if (!strcmp(prevWord, "undef")
 332         || !strcmp(prevWord, "def")
 333         || !strcmp(prevWord, "alias")) {
 334         // These keywords are what we were looking for
 335         return false;
 336     }
 337     return true;
 338 }
 339
 340 // Routine that saves us from allocating a buffer for the here-doc target
 341 // targetEndPos points one past the end of the current target
 342 static bool haveTargetMatch(int currPos,
 343                             int lengthDoc,
 344                             int targetStartPos,
 345                             int targetEndPos,
 346                             Accessor &styler) {
 347     if (lengthDoc - currPos < targetEndPos - targetStartPos) {
 348         return false;
 349     }
 350     int i, j;
 351     for (i = targetStartPos, j = currPos;
 352          i < targetEndPos && j < lengthDoc;
 353          i++, j++) {
 354         if (styler[i] != styler[j]) {
 355             return false;
 356         }
 357     }
 358     return true;
 359 }
 360
 361 // We need a check because the form
 362 // [identifier] <<[target]
 363 // is ambiguous.  The Ruby lexer/parser resolves it by
 364 // looking to see if [identifier] names a variable or a
 365 // function.  If it's the first, it's the start of a here-doc.
 366 // If it's a var, it's an operator.  This lexer doesn't
 367 // maintain a symbol table, so it looks ahead to see what's
 368 // going on, in cases where we have
 369 // ^[white-space]*[identifier([.|::]identifier)*][white-space]*<<[target]
 370 //
 371 // If there's no occurrence of [target] on a line, assume we don't.
 372
 373 // return true == yes, we have no heredocs
 374
 375 static bool sureThisIsNotHeredoc(int lt2StartPos,
 376                                  Accessor &styler) {
 377     int prevStyle;
 378      // Use full document, not just part we're styling
 379     int lengthDoc = styler.Length();
 380     int lineStart = styler.GetLine(lt2StartPos);
 381     int lineStartPosn = styler.LineStart(lineStart);
 382     styler.Flush();
 383     const bool definitely_not_a_here_doc = true;
 384     const bool looks_like_a_here_doc = false;
 385
 386     // Find the first word after some whitespace
 387     int firstWordPosn = skipWhitespace(lineStartPosn, lt2StartPos, styler);
 388     if (firstWordPosn >= lt2StartPos) {
 389         return definitely_not_a_here_doc;
 390     }
 391     prevStyle = styler.StyleAt(firstWordPosn);
 392     // If we have '<<' following a keyword, it's not a heredoc
 393     if (prevStyle != SCE_RB_IDENTIFIER) {
 394         return definitely_not_a_here_doc;
 395     }
 396     int newStyle = prevStyle;
 397     // Some compilers incorrectly warn about uninit newStyle
 398     for (firstWordPosn += 1; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
 399         // Inner loop looks at the name
 400         for (; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
 401             newStyle = styler.StyleAt(firstWordPosn);
 402             if (newStyle != prevStyle) {
 403                 break;
 404             }
 405         }
 406         // Do we have '::' or '.'?
 407         if (firstWordPosn < lt2StartPos && newStyle == SCE_RB_OPERATOR) {
 408             char ch = styler[firstWordPosn];
 409             if (ch == '.') {
 410                 // yes
 411             } else if (ch == ':') {
 412                 if (styler.StyleAt(++firstWordPosn) != SCE_RB_OPERATOR) {
 413                     return definitely_not_a_here_doc;
 414                 } else if (styler[firstWordPosn] != ':') {
 415                     return definitely_not_a_here_doc;
 416                 }
 417             } else {
 418                 break;
 419             }
 420         } else {
 421             break;
 422         }
 423     }
 424     // Skip next batch of white-space
 425     firstWordPosn = skipWhitespace(firstWordPosn, lt2StartPos, styler);
 426     if (firstWordPosn != lt2StartPos) {
 427         // Have [[^ws[identifier]ws[*something_else*]ws<<
 428         return definitely_not_a_here_doc;
 429     }
 430     // OK, now 'j' will point to the current spot moving ahead
 431         int j = firstWordPosn + 1;
 432     if (styler.StyleAt(j) != SCE_RB_OPERATOR || styler[j] != '<') {
 433         // This shouldn't happen
 434         return definitely_not_a_here_doc;
 435     }
 436     int nextLineStartPosn = styler.LineStart(lineStart + 1);
 437     if (nextLineStartPosn >= lengthDoc) {
 438         return definitely_not_a_here_doc;
 439     }
 440     j = skipWhitespace(j + 1, nextLineStartPosn, styler);
 441     if (j >= lengthDoc) {
 442         return definitely_not_a_here_doc;
 443     }
 444     bool allow_indent;
 445     int target_start, target_end;
 446     // From this point on no more styling, since we're looking ahead
 447     if (styler[j] == '-') {
 448         allow_indent = true;
 449         j++;
 450     } else {
 451         allow_indent = false;
 452     }
 453
 454     // Allow for quoted targets.
 455     char target_quote = 0;
 456     switch (styler[j]) {
 457     case '\'':
 458     case '"':
 459     case '`':
 460         target_quote = styler[j];
 461         j += 1;
 462     }
 463
 464     if (isSafeAlnum(styler[j])) {
 465         // Init target_end because some compilers think it won't
 466         // be initialized by the time it's used
 467         target_start = target_end = j;
 468         j++;
 469     } else {
 470         return definitely_not_a_here_doc;
 471     }
 472     for (; j < lengthDoc; j++) {
 473         if (!isSafeAlnum(styler[j])) {
 474             if (target_quote && styler[j] != target_quote) {
 475                 // unquoted end
 476                 return definitely_not_a_here_doc;
 477             }
 478
 479             // And for now make sure that it's a newline
 480             // don't handle arbitrary expressions yet
 481
 482             target_end = j;
 483                         if (target_quote) {
 484                                 // Now we can move to the character after the string delimiter.
 485                                 j += 1;
 486                         }
 487             j = skipWhitespace(j, lengthDoc, styler);
 488             if (j >= lengthDoc) {
 489                 return definitely_not_a_here_doc;
 490             } else {
 491                 char ch = styler[j];
 492                 if (ch == '#' || isEOLChar(ch)) {
 493                     // This is OK, so break and continue;
 494                     break;
 495                 } else {
 496                     return definitely_not_a_here_doc;
 497                 }
 498             }
 499         }
 500     }
 501
 502     // Just look at the start of each line
 503     int last_line = styler.GetLine(lengthDoc - 1);
 504     // But don't go too far
 505     if (last_line > lineStart + 50) {
 506         last_line = lineStart + 50;
 507     }
 508     for (int line_num = lineStart + 1; line_num <= last_line; line_num++) {
 509         if (allow_indent) {
 510             j = skipWhitespace(styler.LineStart(line_num), lengthDoc, styler);
 511         } else {
 512             j = styler.LineStart(line_num);
 513         }
 514         // target_end is one past the end
 515         if (haveTargetMatch(j, lengthDoc, target_start, target_end, styler)) {
 516             // We got it
 517             return looks_like_a_here_doc;
 518         }
 519     }
 520     return definitely_not_a_here_doc;
 521 }
 522
 523 //todo: if we aren't looking at a stdio character,
 524 // move to the start of the first line that is not in a
 525 // multi-line construct
 526
 527 static void synchronizeDocStart(unsigned int& startPos,
 528                                 int &length,
 529                                 int &initStyle,
 530                                 Accessor &styler,
 531                                 bool skipWhiteSpace=false) {
 532
 533     styler.Flush();
 534     int style = actual_style(styler.StyleAt(startPos));
 535     switch (style) {
 536         case SCE_RB_STDIN:
 537         case SCE_RB_STDOUT:
 538         case SCE_RB_STDERR:
 539             // Don't do anything else with these.
 540             return;
 541     }
 542
 543     int pos = startPos;
 544     // Quick way to characterize each line
 545     int lineStart;
 546     for (lineStart = styler.GetLine(pos); lineStart > 0; lineStart--) {
 547         // Now look at the style before the previous line's EOL
 548         pos = styler.LineStart(lineStart) - 1;
 549         if (pos <= 10) {
 550             lineStart = 0;
 551             break;
 552         }
 553         char ch = styler.SafeGetCharAt(pos);
 554         char chPrev = styler.SafeGetCharAt(pos - 1);
 555         if (ch == '\n' && chPrev == '\r') {
 556             pos--;
 557         }
 558         if (styler.SafeGetCharAt(pos - 1) == '\\') {
 559             // Continuation line -- keep going
 560         } else if (actual_style(styler.StyleAt(pos)) != SCE_RB_DEFAULT) {
 561             // Part of multi-line construct -- keep going
 562         } else if (currLineContainsHereDelims(pos, styler)) {
 563             // Keep going, with pos and length now pointing
 564             // at the end of the here-doc delimiter
 565         } else if (skipWhiteSpace && isEmptyLine(pos, styler)) {
 566             // Keep going
 567         } else {
 568             break;
 569         }
 570     }
 571     pos = styler.LineStart(lineStart);
 572     length += (startPos - pos);
 573     startPos = pos;
 574     initStyle = SCE_RB_DEFAULT;
 575 }
 576
 577 static void ColouriseRbDoc(unsigned int startPos, int length, int initStyle,
 578                                                    WordList *keywordlists[], Accessor &styler) {
 579
 580         // Lexer for Ruby often has to backtrack to start of current style to determine
 581         // which characters are being used as quotes, how deeply nested is the
 582         // start position and what the termination string is for here documents
 583
 584         WordList &keywords = *keywordlists[0];
 585
 586         class HereDocCls {
 587         public:
 588                 int State;
 589         // States
 590         // 0: '<<' encountered
 591                 // 1: collect the delimiter
 592         // 1b: text between the end of the delimiter and the EOL
 593                 // 2: here doc text (lines after the delimiter)
 594                 char Quote;             // the char after '<<'
 595                 bool Quoted;            // true if Quote in ('\'','"','`')
 596                 int DelimiterLength;    // strlen(Delimiter)
 597                 char Delimiter[256];    // the Delimiter, limit of 256: from Perl
 598         bool CanBeIndented;
 599                 HereDocCls() {
 600                         State = 0;
 601                         DelimiterLength = 0;
 602                         Delimiter[0] = '\0';
 603             CanBeIndented = false;
 604                 }
 605         };
 606         HereDocCls HereDoc;
 607
 608         class QuoteCls {
 609                 public:
 610                 int  Count;
 611                 char Up;
 612                 char Down;
 613                 QuoteCls() {
 614                         this->New();
 615                 }
 616                 void New() {
 617                         Count = 0;
 618                         Up    = '\0';
 619                         Down  = '\0';
 620                 }
 621                 void Open(char u) {
 622                         Count++;
 623                         Up    = u;
 624                         Down  = opposite(Up);
 625                 }
 626         };
 627         QuoteCls Quote;
 628
 629     int numDots = 0;  // For numbers --
 630                       // Don't start lexing in the middle of a num
 631
 632     synchronizeDocStart(startPos, length, initStyle, styler, // ref args
 633                         false);
 634
 635         bool preferRE = true;
 636     int state = initStyle;
 637         int lengthDoc = startPos + length;
 638
 639         char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
 640         prevWord[0] = '\0';
 641         if (length == 0)
 642                 return;
 643
 644         char chPrev = styler.SafeGetCharAt(startPos - 1);
 645         char chNext = styler.SafeGetCharAt(startPos);
 646         // Ruby uses a different mask because bad indentation is marked by oring with 32
 647         styler.StartAt(startPos, 127);
 648         styler.StartSegment(startPos);
 649
 650     static int q_states[] = {SCE_RB_STRING_Q,
 651                              SCE_RB_STRING_QQ,
 652                              SCE_RB_STRING_QR,
 653                              SCE_RB_STRING_QW,
 654                              SCE_RB_STRING_QW,
 655                              SCE_RB_STRING_QX};
 656     static const char* q_chars = "qQrwWx";
 657
 658         for (int i = startPos; i < lengthDoc; i++) {
 659                 char ch = chNext;
 660                 chNext = styler.SafeGetCharAt(i + 1);
 661                 char chNext2 = styler.SafeGetCharAt(i + 2);
 662
 663         if (styler.IsLeadByte(ch)) {
 664                         chNext = chNext2;
 665                         chPrev = ' ';
 666                         i += 1;
 667                         continue;
 668                 }
 669
 670         // skip on DOS/Windows
 671         //No, don't, because some things will get tagged on,
 672         // so we won't recognize keywords, for example
 673 #if 0
 674                 if (ch == '\r' && chNext == '\n') {
 675                 continue;
 676         }
 677 #endif
 678
 679         if (HereDoc.State == 1 && isEOLChar(ch)) {
 680                         // Begin of here-doc (the line after the here-doc delimiter):
 681                         HereDoc.State = 2;
 682                         styler.ColourTo(i-1, state);
 683             // Don't check for a missing quote, just jump into
 684             // the here-doc state
 685             state = SCE_RB_HERE_Q;
 686         }
 687
 688         // Regular transitions
 689                 if (state == SCE_RB_DEFAULT) {
 690             if (isSafeDigit(ch)) {
 691                 styler.ColourTo(i - 1, state);
 692                                 state = SCE_RB_NUMBER;
 693                 numDots = 0;
 694             } else if (isHighBitChar(ch) || iswordstart(ch)) {
 695                 styler.ColourTo(i - 1, state);
 696                                 state = SCE_RB_WORD;
 697                         } else if (ch == '#') {
 698                                 styler.ColourTo(i - 1, state);
 699                                 state = SCE_RB_COMMENTLINE;
 700                         } else if (ch == '=') {
 701                                 // =begin indicates the start of a comment (doc) block
 702                 if (i == 0 || isEOLChar(chPrev)
 703                     && chNext == 'b'
 704                     && styler.SafeGetCharAt(i + 2) == 'e'
 705                     && styler.SafeGetCharAt(i + 3) == 'g'
 706                     && styler.SafeGetCharAt(i + 4) == 'i'
 707                     && styler.SafeGetCharAt(i + 5) == 'n'
 708                     && !isSafeWordcharOrHigh(styler.SafeGetCharAt(i + 6))) {
 709                     styler.ColourTo(i - 1, state);
 710                     state = SCE_RB_POD;
 711                                 } else {
 712                                         styler.ColourTo(i - 1, state);
 713                                         styler.ColourTo(i, SCE_RB_OPERATOR);
 714                                         preferRE = true;
 715                                 }
 716                         } else if (ch == '"') {
 717                                 styler.ColourTo(i - 1, state);
 718                                 state = SCE_RB_STRING;
 719                                 Quote.New();
 720                                 Quote.Open(ch);
 721                         } else if (ch == '\'') {
 722                 styler.ColourTo(i - 1, state);
 723                 state = SCE_RB_CHARACTER;
 724                 Quote.New();
 725                 Quote.Open(ch);
 726                         } else if (ch == '`') {
 727                                 styler.ColourTo(i - 1, state);
 728                                 state = SCE_RB_BACKTICKS;
 729                                 Quote.New();
 730                                 Quote.Open(ch);
 731                         } else if (ch == '@') {
 732                 // Instance or class var
 733                                 styler.ColourTo(i - 1, state);
 734                 if (chNext == '@') {
 735                     state = SCE_RB_CLASS_VAR;
 736                     advance_char(i, ch, chNext, chNext2); // pass by ref
 737                 } else {
 738                     state = SCE_RB_INSTANCE_VAR;
 739                 }
 740                         } else if (ch == '$') {
 741                 // Check for a builtin global
 742                                 styler.ColourTo(i - 1, state);
 743                 // Recognize it bit by bit
 744                 state = SCE_RB_GLOBAL;
 745             } else if (ch == '/' && preferRE) {
 746                 // Ambigous operator
 747                                 styler.ColourTo(i - 1, state);
 748                                 state = SCE_RB_REGEX;
 749                 Quote.New();
 750                 Quote.Open(ch);
 751                         } else if (ch == '<' && chNext == '<' && chNext2 != '=') {
 752
 753                 // Recognise the '<<' symbol - either a here document or a binary op
 754                                 styler.ColourTo(i - 1, state);
 755                 i++;
 756                 chNext = chNext2;
 757                                 styler.ColourTo(i, SCE_RB_OPERATOR);
 758
 759                 if (! (strchr("\"\'`_-", chNext2) || isSafeAlpha(chNext2))) {
 760                     // It's definitely not a here-doc,
 761                     // based on Ruby's lexer/parser in the
 762                     // heredoc_identifier routine.
 763                     // Nothing else to do.
 764                 } else if (preferRE) {
 765                     if (sureThisIsHeredoc(i - 1, styler, prevWord)) {
 766                         state = SCE_RB_HERE_DELIM;
 767                         HereDoc.State = 0;
 768                     }
 769                     // else leave it in default state
 770                 } else {
 771                     if (sureThisIsNotHeredoc(i - 1, styler)) {
 772                         // leave state as default
 773                         // We don't have all the heuristics Perl has for indications
 774                         // of a here-doc, because '<<' is overloadable and used
 775                         // for so many other classes.
 776                     } else {
 777                         state = SCE_RB_HERE_DELIM;
 778                         HereDoc.State = 0;
 779                     }
 780                 }
 781                 preferRE = (state != SCE_RB_HERE_DELIM);
 782             } else if (ch == ':') {
 783                                 styler.ColourTo(i - 1, state);
 784                 if (chNext == ':') {
 785                     // Mark "::" as an operator, not symbol start
 786                     styler.ColourTo(i + 1, SCE_RB_OPERATOR);
 787                     advance_char(i, ch, chNext, chNext2); // pass by ref
 788                     state = SCE_RB_DEFAULT;
 789                                         preferRE = false;
 790                 } else if (isSafeWordcharOrHigh(chNext)) {
 791                                         state = SCE_RB_SYMBOL;
 792                 } else if (strchr("[*!~+-*/%=<>&^|", chNext)) {
 793                     // Do the operator analysis in-line, looking ahead
 794                     // Based on the table in pickaxe 2nd ed., page 339
 795                     bool doColoring = true;
 796                     switch (chNext) {
 797                     case '[':
 798                         if (chNext2 == ']' ) {
 799                             char ch_tmp = styler.SafeGetCharAt(i + 3);
 800                             if (ch_tmp == '=') {
 801                                 i += 3;
 802                                 ch = ch_tmp;
 803                                 chNext = styler.SafeGetCharAt(i + 1);
 804                             } else {
 805                                 i += 2;
 806                                 ch = chNext2;
 807                                 chNext = ch_tmp;
 808                             }
 809                         } else {
 810                             doColoring = false;
 811                         }
 812                         break;
 813
 814                     case '*':
 815                         if (chNext2 == '*') {
 816                             i += 2;
 817                             ch = chNext2;
 818                             chNext = styler.SafeGetCharAt(i + 1);
 819                         } else {
 820                             advance_char(i, ch, chNext, chNext2);
 821                         }
 822                         break;
 823
 824                     case '!':
 825                         if (chNext2 == '=' || chNext2 == '~') {
 826                             i += 2;
 827                             ch = chNext2;
 828                             chNext = styler.SafeGetCharAt(i + 1);
 829                         } else {
 830                             advance_char(i, ch, chNext, chNext2);
 831                         }
 832                         break;
 833
 834                     case '<':
 835                         if (chNext2 == '<') {
 836                             i += 2;
 837                             ch = chNext2;
 838                             chNext = styler.SafeGetCharAt(i + 1);
 839                         } else if (chNext2 == '=') {
 840                             char ch_tmp = styler.SafeGetCharAt(i + 3);
 841                             if (ch_tmp == '>') {  // <=> operator
 842                                 i += 3;
 843                                 ch = ch_tmp;
 844                                 chNext = styler.SafeGetCharAt(i + 1);
 845                             } else {
 846                                 i += 2;
 847                                 ch = chNext2;
 848                                 chNext = ch_tmp;
 849                             }
 850                         } else {
 851                             advance_char(i, ch, chNext, chNext2);
 852                         }
 853                         break;
 854
 855                     default:
 856                         // Simple one-character operators
 857                         advance_char(i, ch, chNext, chNext2);
 858                         break;
 859                     }
 860                     if (doColoring) {
 861                         styler.ColourTo(i, SCE_RB_SYMBOL);
 862                         state = SCE_RB_DEFAULT;
 863                     }
 864                                 } else if (!preferRE) {
 865                                         // Don't color symbol strings (yet)
 866                                         // Just color the ":" and color rest as string
 867                                         styler.ColourTo(i, SCE_RB_SYMBOL);
 868                                         state = SCE_RB_DEFAULT;
 869                 } else {
 870                     styler.ColourTo(i, SCE_RB_OPERATOR);
 871                     state = SCE_RB_DEFAULT;
 872                     preferRE = true;
 873                 }
 874             } else if (ch == '%') {
 875                 styler.ColourTo(i - 1, state);
 876                 bool have_string = false;
 877                 if (strchr(q_chars, chNext) && !isSafeWordcharOrHigh(chNext2)) {
 878                     Quote.New();
 879                     const char *hit = strchr(q_chars, chNext);
 880                     if (hit != NULL) {
 881                         state = q_states[hit - q_chars];
 882                         Quote.Open(chNext2);
 883                         i += 2;
 884                         ch = chNext2;
 885                                                 chNext = styler.SafeGetCharAt(i + 1);
 886                         have_string = true;
 887                     }
 888                 } else if (!isSafeWordcharOrHigh(chNext)) {
 889                     // Ruby doesn't allow high bit chars here,
 890                     // but the editor host might
 891                     state = SCE_RB_STRING_QQ;
 892                     Quote.Open(chNext);
 893                     advance_char(i, ch, chNext, chNext2); // pass by ref
 894                     have_string = true;
 895                 }
 896                 if (!have_string) {
 897                     styler.ColourTo(i, SCE_RB_OPERATOR);
 898                     // stay in default
 899                     preferRE = true;
 900                 }
 901             } else if (isoperator(ch) || ch == '.') {
 902                                 styler.ColourTo(i - 1, state);
 903                                 styler.ColourTo(i, SCE_RB_OPERATOR);
 904                 // If we're ending an expression or block,
 905                 // assume it ends an object, and the ambivalent
 906                 // constructs are binary operators
 907                 //
 908                 // So if we don't have one of these chars,
 909                 // we aren't ending an object exp'n, and ops
 910                 // like : << / are unary operators.
 911
 912                 preferRE = (strchr(")}].", ch) == NULL);
 913                 // Stay in default state
 914             } else if (isEOLChar(ch)) {
 915                 // Make sure it's a true line-end, with no backslash
 916                 if ((ch == '\r' || (ch == '\n' && chPrev != '\r'))
 917                     && chPrev != '\\') {
 918                     // Assume we've hit the end of the statement.
 919                     preferRE = true;
 920                 }
 921             }
 922         } else if (state == SCE_RB_WORD) {
 923             if (ch == '.' || !isSafeWordcharOrHigh(ch)) {
 924                 // Words include x? in all contexts,
 925                 // and <letters>= after either 'def' or a dot
 926                 // Move along until a complete word is on our left
 927
 928                 // Default accessor treats '.' as word-chars,
 929                 // but we don't for now.
 930
 931                 if (ch == '='
 932                     && isSafeWordcharOrHigh(chPrev)
 933                     && (chNext == '('
 934                         || strchr(" \t\n\r", chNext) != NULL)
 935                     && (!strcmp(prevWord, "def")
 936                         || followsDot(styler.GetStartSegment(), styler))) {
 937                     // <name>= is a name only when being def'd -- Get it the next time
 938                     // This means that <name>=<name> is always lexed as
 939                     // <name>, (op, =), <name>
 940                 } else if ((ch == '?' || ch == '!')
 941                            && isSafeWordcharOrHigh(chPrev)
 942                            && !isSafeWordcharOrHigh(chNext)) {
 943                     // <name>? is a name -- Get it the next time
 944                     // But <name>?<name> is always lexed as
 945                     // <name>, (op, ?), <name>
 946                     // Same with <name>! to indicate a method that
 947                     // modifies its target
 948                 } else if (isEOLChar(ch)
 949                            && isMatch(styler, lengthDoc, i - 7, "__END__")) {
 950                     styler.ColourTo(i, SCE_RB_DATASECTION);
 951                     state = SCE_RB_DATASECTION;
 952                     // No need to handle this state -- we'll just move to the end
 953                     preferRE = false;
 954                 } else {
 955                                         int wordStartPos = styler.GetStartSegment();
 956                     int word_style = ClassifyWordRb(wordStartPos, i - 1, keywords, styler, prevWord);
 957                     switch (word_style) {
 958                         case SCE_RB_WORD:
 959                             preferRE = RE_CanFollowKeyword(prevWord);
 960                                                         break;
 961
 962                         case SCE_RB_WORD_DEMOTED:
 963                             preferRE = true;
 964                                                         break;
 965
 966                         case SCE_RB_IDENTIFIER:
 967                             if (isMatch(styler, lengthDoc, wordStartPos, "print")) {
 968                                 preferRE = true;
 969                             } else if (isEOLChar(ch)) {
 970                                 preferRE = true;
 971                             } else {
 972                                 preferRE = false;
 973                             }
 974                                                         break;
 975                         default:
 976                             preferRE = false;
 977                     }
 978                     if (ch == '.') {
 979                         // We might be redefining an operator-method
 980                         preferRE = false;
 981                     }
 982                     // And if it's the first
 983                     redo_char(i, ch, chNext, chNext2, state); // pass by ref
 984                 }
 985             }
 986         } else if (state == SCE_RB_NUMBER) {
 987             if (isSafeAlnumOrHigh(ch) || ch == '_') {
 988                 // Keep going
 989             } else if (ch == '.' && ++numDots == 1) {
 990                 // Keep going
 991             } else {
 992                 styler.ColourTo(i - 1, state);
 993                 redo_char(i, ch, chNext, chNext2, state); // pass by ref
 994                 preferRE = false;
 995             }
 996         } else if (state == SCE_RB_COMMENTLINE) {
 997                         if (isEOLChar(ch)) {
 998                 styler.ColourTo(i - 1, state);
 999                 state = SCE_RB_DEFAULT;
1000                 // Use whatever setting we had going into the comment
1001             }
1002         } else if (state == SCE_RB_HERE_DELIM) {
1003             // See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx
1004             // Slightly different: if we find an immediate '-',
1005             // the target can appear indented.
1006
1007                         if (HereDoc.State == 0) { // '<<' encountered
1008                                 HereDoc.State = 1;
1009                 HereDoc.DelimiterLength = 0;
1010                 if (ch == '-') {
1011                     HereDoc.CanBeIndented = true;
1012                     advance_char(i, ch, chNext, chNext2); // pass by ref
1013                 } else {
1014                     HereDoc.CanBeIndented = false;
1015                 }
1016                 if (isEOLChar(ch)) {
1017                     // Bail out of doing a here doc if there's no target
1018                     state = SCE_RB_DEFAULT;
1019                     preferRE = false;
1020                 } else {
1021                     HereDoc.Quote = ch;
1022
1023                     if (ch == '\'' || ch == '"' || ch == '`') {
1024                         HereDoc.Quoted = true;
1025                         HereDoc.Delimiter[0] = '\0';
1026                     } else {
1027                         HereDoc.Quoted = false;
1028                         HereDoc.Delimiter[0] = ch;
1029                         HereDoc.Delimiter[1] = '\0';
1030                         HereDoc.DelimiterLength = 1;
1031                     }
1032                 }
1033                         } else if (HereDoc.State == 1) { // collect the delimiter
1034                 if (isEOLChar(ch)) {
1035                     // End the quote now, and go back for more
1036                     styler.ColourTo(i - 1, state);
1037                     state = SCE_RB_DEFAULT;
1038                     i--;
1039                     chNext = ch;
1040                     chNext2 = chNext;
1041                     preferRE = false;
1042                 } else if (HereDoc.Quoted) {
1043                                         if (ch == HereDoc.Quote) { // closing quote => end of delimiter
1044                                                 styler.ColourTo(i, state);
1045                                                 state = SCE_RB_DEFAULT;
1046                         preferRE = false;
1047                     } else {
1048                                                 if (ch == '\\' && !isEOLChar(chNext)) {
1049                             advance_char(i, ch, chNext, chNext2);
1050                                                 }
1051                                                 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1052                                                 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
1053                     }
1054                 } else { // an unquoted here-doc delimiter
1055                                         if (isSafeAlnumOrHigh(ch) || ch == '_') {
1056                                                 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1057                                                 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
1058                                         } else {
1059                                                 styler.ColourTo(i - 1, state);
1060                         redo_char(i, ch, chNext, chNext2, state);
1061                         preferRE = false;
1062                                         }
1063                 }
1064                                 if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) {
1065                                         styler.ColourTo(i - 1, state);
1066                                         state = SCE_RB_ERROR;
1067                     preferRE = false;
1068                                 }
1069             }
1070         } else if (state == SCE_RB_HERE_Q) {
1071             // Not needed: HereDoc.State == 2
1072             // Indentable here docs: look backwards
1073             // Non-indentable: look forwards, like in Perl
1074             //
1075             // Why: so we can quickly resolve things like <<-" abc"
1076
1077             if (!HereDoc.CanBeIndented) {
1078                 if (isEOLChar(chPrev)
1079                     && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
1080                     styler.ColourTo(i - 1, state);
1081                     i += HereDoc.DelimiterLength - 1;
1082                     chNext = styler.SafeGetCharAt(i + 1);
1083                     if (isEOLChar(chNext)) {
1084                         styler.ColourTo(i, SCE_RB_HERE_DELIM);
1085                         state = SCE_RB_DEFAULT;
1086                         HereDoc.State = 0;
1087                         preferRE = false;
1088                     }
1089                     // Otherwise we skipped through the here doc faster.
1090                 }
1091             } else if (isEOLChar(chNext)
1092                        && lookingAtHereDocDelim(styler,
1093                                                 i - HereDoc.DelimiterLength + 1,
1094                                                 lengthDoc,
1095                                                 HereDoc.Delimiter)) {
1096                 styler.ColourTo(i - 1 - HereDoc.DelimiterLength, state);
1097                 styler.ColourTo(i, SCE_RB_HERE_DELIM);
1098                 state = SCE_RB_DEFAULT;
1099                 preferRE = false;
1100                 HereDoc.State = 0;
1101             }
1102         } else if (state == SCE_RB_CLASS_VAR
1103                    || state == SCE_RB_INSTANCE_VAR
1104                    || state == SCE_RB_SYMBOL) {
1105             if (!isSafeWordcharOrHigh(ch)) {
1106                 styler.ColourTo(i - 1, state);
1107                 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1108                 preferRE = false;
1109             }
1110         } else if (state == SCE_RB_GLOBAL) {
1111             if (!isSafeWordcharOrHigh(ch)) {
1112                 // handle special globals here as well
1113                 if (chPrev == '$') {
1114                     if (ch == '-') {
1115                         // Include the next char, like $-a
1116                         advance_char(i, ch, chNext, chNext2);
1117                     }
1118                     styler.ColourTo(i, state);
1119                     state = SCE_RB_DEFAULT;
1120                 } else {
1121                     styler.ColourTo(i - 1, state);
1122                     redo_char(i, ch, chNext, chNext2, state); // pass by ref
1123                 }
1124                 preferRE = false;
1125             }
1126         } else if (state == SCE_RB_POD) {
1127             // PODs end with ^=end\s, -- any whitespace can follow =end
1128             if (strchr(" \t\n\r", ch) != NULL
1129                 && i > 5
1130                 && isEOLChar(styler[i - 5])
1131                 && isMatch(styler, lengthDoc, i - 4, "=end")) {
1132                 styler.ColourTo(i - 1, state);
1133                 state = SCE_RB_DEFAULT;
1134                 preferRE = false;
1135             }
1136         } else if (state == SCE_RB_REGEX || state == SCE_RB_STRING_QR) {
1137             if (ch == '\\' && Quote.Up != '\\') {
1138                 // Skip one
1139                 advance_char(i, ch, chNext, chNext2);
1140             } else if (ch == Quote.Down) {
1141                 Quote.Count--;
1142                 if (Quote.Count == 0) {
1143                     // Include the options
1144                     while (isSafeAlpha(chNext)) {
1145                         i++;
1146                                                 ch = chNext;
1147                         chNext = styler.SafeGetCharAt(i + 1);
1148                     }
1149                     styler.ColourTo(i, state);
1150                     state = SCE_RB_DEFAULT;
1151                     preferRE = false;
1152                 }
1153             } else if (ch == Quote.Up) {
1154                 // Only if close quoter != open quoter
1155                 Quote.Count++;
1156
1157             } else if (ch == '#' ) {
1158                 //todo: distinguish comments from pound chars
1159                 // for now, handle as comment
1160                 styler.ColourTo(i - 1, state);
1161                 bool inEscape = false;
1162                 while (++i < lengthDoc) {
1163                     ch = styler.SafeGetCharAt(i);
1164                     if (ch == '\\') {
1165                         inEscape = true;
1166                     } else if (isEOLChar(ch)) {
1167                         // Comment inside a regex
1168                         styler.ColourTo(i - 1, SCE_RB_COMMENTLINE);
1169                         break;
1170                     } else if (inEscape) {
1171                         inEscape = false;  // don't look at char
1172                     } else if (ch == Quote.Down) {
1173                         // Have the regular handler deal with this
1174                         // to get trailing modifiers.
1175                         i--;
1176                         ch = styler[i];
1177                                                 break;
1178                     }
1179                 }
1180                 chNext = styler.SafeGetCharAt(i + 1);
1181                 chNext2 = styler.SafeGetCharAt(i + 2);
1182             }
1183         // Quotes of all kinds...
1184         } else if (state == SCE_RB_STRING_Q || state == SCE_RB_STRING_QQ ||
1185                    state == SCE_RB_STRING_QX || state == SCE_RB_STRING_QW ||
1186                    state == SCE_RB_STRING || state == SCE_RB_CHARACTER ||
1187                    state == SCE_RB_BACKTICKS) {
1188             if (!Quote.Down && !isspacechar(ch)) {
1189                 Quote.Open(ch);
1190             } else if (ch == '\\' && Quote.Up != '\\') {
1191                 //Riddle me this: Is it safe to skip *every* escaped char?
1192                 advance_char(i, ch, chNext, chNext2);
1193             } else if (ch == Quote.Down) {
1194                 Quote.Count--;
1195                 if (Quote.Count == 0) {
1196                     styler.ColourTo(i, state);
1197                     state = SCE_RB_DEFAULT;
1198                     preferRE = false;
1199                 }
1200             } else if (ch == Quote.Up) {
1201                 Quote.Count++;
1202             }
1203         }
1204
1205         if (state == SCE_RB_ERROR) {
1206             break;
1207         }
1208         chPrev = ch;
1209     }
1210     if (state == SCE_RB_WORD) {
1211         // We've ended on a word, possibly at EOF, and need to
1212         // classify it.
1213         (void) ClassifyWordRb(styler.GetStartSegment(), lengthDoc - 1, keywords, styler, prevWord);
1214     } else {
1215         styler.ColourTo(lengthDoc - 1, state);
1216     }
1217 }
1218
1219 // Helper functions for folding, disambiguation keywords
1220 // Assert that there are no high-bit chars
1221
1222 static void getPrevWord(int pos,
1223                         char *prevWord,
1224                         Accessor &styler,
1225                         int word_state)
1226 {
1227     int i;
1228     styler.Flush();
1229     for (i = pos - 1; i > 0; i--) {
1230         if (actual_style(styler.StyleAt(i)) != word_state) {
1231             i++;
1232             break;
1233         }
1234     }
1235     if (i < pos - MAX_KEYWORD_LENGTH) // overflow
1236         i = pos - MAX_KEYWORD_LENGTH;
1237     char *dst = prevWord;
1238     for (; i <= pos; i++) {
1239         *dst++ = styler[i];
1240     }
1241         *dst = 0;
1242 }
1243
1244 static bool keywordIsAmbiguous(const char *prevWord)
1245 {
1246     // Order from most likely used to least likely
1247     // Lots of ways to do a loop in Ruby besides 'while/until'
1248     if (!strcmp(prevWord, "if")
1249         || !strcmp(prevWord, "do")
1250         || !strcmp(prevWord, "while")
1251         || !strcmp(prevWord, "unless")
1252         || !strcmp(prevWord, "until")) {
1253         return true;
1254     } else {
1255         return false;
1256     }
1257 }
1258
1259 // Demote keywords in the following conditions:
1260 // if, while, unless, until modify a statement
1261 // do after a while or until, as a noise word (like then after if)
1262
1263 static bool keywordIsModifier(const char *word,
1264                               int pos,
1265                               Accessor &styler)
1266 {
1267     if (word[0] == 'd' && word[1] == 'o' && !word[2]) {
1268         return keywordDoStartsLoop(pos, styler);
1269     }
1270     char ch;
1271     int style = SCE_RB_DEFAULT;
1272         int lineStart = styler.GetLine(pos);
1273     int lineStartPosn = styler.LineStart(lineStart);
1274     styler.Flush();
1275     while (--pos >= lineStartPosn) {
1276         style = actual_style(styler.StyleAt(pos));
1277                 if (style == SCE_RB_DEFAULT) {
1278                         if (iswhitespace(ch = styler[pos])) {
1279                                 //continue
1280                         } else if (ch == '\r' || ch == '\n') {
1281                                 // Scintilla's LineStart() and GetLine() routines aren't
1282                                 // platform-independent, so if we have text prepared with
1283                                 // a different system we can't rely on it.
1284                                 return false;
1285                         }
1286                 } else {
1287             break;
1288                 }
1289     }
1290     if (pos < lineStartPosn) {
1291         return false; //XXX not quite right if the prev line is a continuation
1292     }
1293     // First things where the action is unambiguous
1294     switch (style) {
1295         case SCE_RB_DEFAULT:
1296         case SCE_RB_COMMENTLINE:
1297         case SCE_RB_POD:
1298         case SCE_RB_CLASSNAME:
1299         case SCE_RB_DEFNAME:
1300         case SCE_RB_MODULE_NAME:
1301             return false;
1302         case SCE_RB_OPERATOR:
1303             break;
1304         case SCE_RB_WORD:
1305             // Watch out for uses of 'else if'
1306             //XXX: Make a list of other keywords where 'if' isn't a modifier
1307             //     and can appear legitimately
1308             // Formulate this to avoid warnings from most compilers
1309             if (strcmp(word, "if") == 0) {
1310                 char prevWord[MAX_KEYWORD_LENGTH + 1];
1311                 getPrevWord(pos, prevWord, styler, SCE_RB_WORD);
1312                 return strcmp(prevWord, "else") != 0;
1313             }
1314             return true;
1315         default:
1316             return true;
1317     }
1318     // Assume that if the keyword follows an operator,
1319     // usually it's a block assignment, like
1320     // a << if x then y else z
1321
1322     ch = styler[pos];
1323     switch (ch) {
1324         case ')':
1325         case ']':
1326         case '}':
1327             return true;
1328         default:
1329             return false;
1330     }
1331 }
1332
1333 #define WHILE_BACKWARDS "elihw"
1334 #define UNTIL_BACKWARDS "litnu"
1335
1336 // Nothing fancy -- look to see if we follow a while/until somewhere
1337 // on the current line
1338
1339 static bool keywordDoStartsLoop(int pos,
1340                                 Accessor &styler)
1341 {
1342     char ch;
1343     int style;
1344         int lineStart = styler.GetLine(pos);
1345     int lineStartPosn = styler.LineStart(lineStart);
1346     styler.Flush();
1347     while (--pos >= lineStartPosn) {
1348         style = actual_style(styler.StyleAt(pos));
1349                 if (style == SCE_RB_DEFAULT) {
1350                         if ((ch = styler[pos]) == '\r' || ch == '\n') {
1351                                 // Scintilla's LineStart() and GetLine() routines aren't
1352                                 // platform-independent, so if we have text prepared with
1353                                 // a different system we can't rely on it.
1354                                 return false;
1355                         }
1356                 } else if (style == SCE_RB_WORD) {
1357             // Check for while or until, but write the word in backwards
1358             char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1359             char *dst = prevWord;
1360             int wordLen = 0;
1361             int start_word;
1362             for (start_word = pos;
1363                  start_word >= lineStartPosn && actual_style(styler.StyleAt(start_word)) == SCE_RB_WORD;
1364                  start_word--) {
1365                 if (++wordLen < MAX_KEYWORD_LENGTH) {
1366                     *dst++ = styler[start_word];
1367                 }
1368             }
1369             *dst = 0;
1370             // Did we see our keyword?
1371             if (!strcmp(prevWord, WHILE_BACKWARDS)
1372                 || !strcmp(prevWord, UNTIL_BACKWARDS)) {
1373                 return true;
1374             }
1375             // We can move pos to the beginning of the keyword, and then
1376             // accept another decrement, as we can never have two contiguous
1377             // keywords:
1378             // word1 word2
1379             //           ^
1380             //        <-  move to start_word
1381             //      ^
1382             //      <- loop decrement
1383             //     ^  # pointing to end of word1 is fine
1384             pos = start_word;
1385         }
1386     }
1387     return false;
1388 }
1389
1390 /*
1391  *  Folding Ruby
1392  *
1393  *  The language is quite complex to analyze without a full parse.
1394  *  For example, this line shouldn't affect fold level:
1395  *
1396  *   print "hello" if feeling_friendly?
1397  *
1398  *  Neither should this:
1399  *
1400  *   print "hello" \
1401  *      if feeling_friendly?
1402  *
1403  *
1404  *  But this should:
1405  *
1406  *   if feeling_friendly?  #++
1407  *     print "hello" \
1408  *     print "goodbye"
1409  *   end                   #--
1410  *
1411  *  So we cheat, by actually looking at the existing indentation
1412  *  levels for each line, and just echoing it back.  Like Python.
1413  *  Then if we get better at it, we'll take braces into consideration,
1414  *  which always affect folding levels.
1415
1416  *  How the keywords should work:
1417  *  No effect:
1418  *  __FILE__ __LINE__ BEGIN END alias and
1419  *  defined? false in nil not or self super then
1420  *  true undef
1421
1422  *  Always increment:
1423  *  begin  class def do for module when {
1424  *
1425  *  Always decrement:
1426  *  end }
1427  *
1428  *  Increment if these start a statement
1429  *  if unless until while -- do nothing if they're modifiers
1430
1431  *  These end a block if there's no modifier, but don't bother
1432  *  break next redo retry return yield
1433  *
1434  *  These temporarily de-indent, but re-indent
1435  *  case else elsif ensure rescue
1436  *
1437  *  This means that the folder reflects indentation rather
1438  *  than setting it.  The language-service updates indentation
1439  *  when users type return and finishes entering de-denters.
1440  *
1441  *  Later offer to fold POD, here-docs, strings, and blocks of comments
1442  */
1443
1444 static void FoldRbDoc(unsigned int startPos, int length, int initStyle,
1445                       WordList *[], Accessor &styler) {
1446         const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
1447         bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
1448
1449     synchronizeDocStart(startPos, length, initStyle, styler, // ref args
1450                         false);
1451         unsigned int endPos = startPos + length;
1452         int visibleChars = 0;
1453         int lineCurrent = styler.GetLine(startPos);
1454         int levelPrev = startPos == 0 ? 0 : (styler.LevelAt(lineCurrent)
1455                                          & SC_FOLDLEVELNUMBERMASK
1456                                          & ~SC_FOLDLEVELBASE);
1457         int levelCurrent = levelPrev;
1458         char chNext = styler[startPos];
1459         int styleNext = styler.StyleAt(startPos);
1460         int stylePrev = startPos <= 1 ? SCE_RB_DEFAULT : styler.StyleAt(startPos - 1);
1461     bool buffer_ends_with_eol = false;
1462         for (unsigned int i = startPos; i < endPos; i++) {
1463                 char ch = chNext;
1464                 chNext = styler.SafeGetCharAt(i + 1);
1465                 int style = styleNext;
1466                 styleNext = styler.StyleAt(i + 1);
1467                 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1468         if (style == SCE_RB_COMMENTLINE) {
1469             if (foldComment && stylePrev != SCE_RB_COMMENTLINE) {
1470                 if (chNext == '{') {
1471                                         levelCurrent++;
1472                                 } else if (chNext == '}') {
1473                                         levelCurrent--;
1474                                 }
1475             }
1476         } else if (style == SCE_RB_OPERATOR) {
1477                         if (strchr("[{(", ch)) {
1478                                 levelCurrent++;
1479                         } else if (strchr(")}]", ch)) {
1480                 // Don't decrement below 0
1481                 if (levelCurrent > 0)
1482                     levelCurrent--;
1483                         }
1484         } else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) {
1485             // Look at the keyword on the left and decide what to do
1486             char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1487             prevWord[0] = 0;
1488             getPrevWord(i, prevWord, styler, SCE_RB_WORD);
1489             if (!strcmp(prevWord, "end")) {
1490                 // Don't decrement below 0
1491                 if (levelCurrent > 0)
1492                     levelCurrent--;
1493             } else if (   !strcmp(prevWord, "if")
1494                        || !strcmp(prevWord, "def")
1495                        || !strcmp(prevWord, "class")
1496                        || !strcmp(prevWord, "module")
1497                        || !strcmp(prevWord, "begin")
1498                        || !strcmp(prevWord, "case")
1499                        || !strcmp(prevWord, "do")
1500                        || !strcmp(prevWord, "while")
1501                        || !strcmp(prevWord, "unless")
1502                        || !strcmp(prevWord, "until")
1503                        || !strcmp(prevWord, "for")
1504                           ) {
1505                                 levelCurrent++;
1506             }
1507         }
1508                 if (atEOL) {
1509                         int lev = levelPrev;
1510                         if (visibleChars == 0 && foldCompact)
1511                                 lev |= SC_FOLDLEVELWHITEFLAG;
1512                         if ((levelCurrent > levelPrev) && (visibleChars > 0))
1513                                 lev |= SC_FOLDLEVELHEADERFLAG;
1514             styler.SetLevel(lineCurrent, lev|SC_FOLDLEVELBASE);
1515                         lineCurrent++;
1516                         levelPrev = levelCurrent;
1517                         visibleChars = 0;
1518             buffer_ends_with_eol = true;
1519                 } else if (!isspacechar(ch)) {
1520                         visibleChars++;
1521             buffer_ends_with_eol = false;
1522         }
1523     }
1524         // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1525     if (!buffer_ends_with_eol) {
1526         lineCurrent++;
1527         int new_lev = levelCurrent;
1528         if (visibleChars == 0 && foldCompact)
1529             new_lev |= SC_FOLDLEVELWHITEFLAG;
1530                         if ((levelCurrent > levelPrev) && (visibleChars > 0))
1531                                 new_lev |= SC_FOLDLEVELHEADERFLAG;
1532             levelCurrent = new_lev;
1533     }
1534         styler.SetLevel(lineCurrent, levelCurrent|SC_FOLDLEVELBASE);
1535 }
1536
1537 static const char * const rubyWordListDesc[] = {
1538         "Keywords",
1539         0
1540 };
1541
1542 LexerModule lmRuby(SCLEX_RUBY, ColouriseRbDoc, "ruby", FoldRbDoc, rubyWordListDesc);