src/stc/scintilla/lexers/LexPerl.cxx

   1 // Scintilla source code edit control
   2 /** @file LexPerl.cxx
   3  ** Lexer for Perl.
   4  ** Converted to lexer object by "Udo Lechner" <dlchnr(at)gmx(dot)net>
   5  **/
   6 // Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>
   7 // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
   8 // The License.txt file describes the conditions under which this software may be distributed.
   9
  10 #include <stdlib.h>
  11 #include <string.h>
  12 #include <stdio.h>
  13 #include <stdarg.h>
  14 #include <assert.h>
  15 #include <ctype.h>
  16
  17 #include <string>
  18 #include <map>
  19
  20 #include "ILexer.h"
  21 #include "Scintilla.h"
  22 #include "SciLexer.h"
  23
  24 #include "WordList.h"
  25 #include "LexAccessor.h"
  26 #include "StyleContext.h"
  27 #include "CharacterSet.h"
  28 #include "LexerModule.h"
  29 #include "OptionSet.h"
  30
  31 #ifdef SCI_NAMESPACE
  32 using namespace Scintilla;
  33 #endif
  34
  35 // Info for HERE document handling from perldata.pod (reformatted):
  36 // ----------------------------------------------------------------
  37 // A line-oriented form of quoting is based on the shell ``here-doc'' syntax.
  38 // Following a << you specify a string to terminate the quoted material, and
  39 // all lines following the current line down to the terminating string are
  40 // the value of the item.
  41 // * The terminating string may be either an identifier (a word), or some
  42 //   quoted text.
  43 // * If quoted, the type of quotes you use determines the treatment of the
  44 //   text, just as in regular quoting.
  45 // * An unquoted identifier works like double quotes.
  46 // * There must be no space between the << and the identifier.
  47 //   (If you put a space it will be treated as a null identifier,
  48 //    which is valid, and matches the first empty line.)
  49 //   (This is deprecated, -w warns of this syntax)
  50 // * The terminating string must appear by itself (unquoted and
  51 //   with no surrounding whitespace) on the terminating line.
  52
  53 #define HERE_DELIM_MAX 256              // maximum length of HERE doc delimiter
  54
  55 #define PERLNUM_BINARY          1       // order is significant: 1-4 cannot have a dot
  56 #define PERLNUM_HEX                     2
  57 #define PERLNUM_OCTAL           3
  58 #define PERLNUM_FLOAT_EXP       4       // exponent part only
  59 #define PERLNUM_DECIMAL         5       // 1-5 are numbers; 6-7 are strings
  60 #define PERLNUM_VECTOR          6
  61 #define PERLNUM_V_VECTOR        7
  62 #define PERLNUM_BAD                     8
  63
  64 #define BACK_NONE               0       // lookback state for bareword disambiguation:
  65 #define BACK_OPERATOR   1       // whitespace/comments are insignificant
  66 #define BACK_KEYWORD    2       // operators/keywords are needed for disambiguation
  67
  68 // all interpolated styles are different from their parent styles by a constant difference
  69 // we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value
  70 #define INTERPOLATE_SHIFT       (SCE_PL_STRING_VAR - SCE_PL_STRING)
  71
  72 static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, LexAccessor &styler) {
  73         // old-style keyword matcher; needed because GetCurrent() needs
  74         // current segment to be committed, but we may abandon early...
  75         char s[100];
  76         unsigned int i, len = end - start;
  77         if (len > 30) { len = 30; }
  78         for (i = 0; i < len; i++, start++) s[i] = styler[start];
  79         s[i] = '\0';
  80         return keywords.InList(s);
  81 }
  82
  83 static int disambiguateBareword(LexAccessor &styler, unsigned int bk, unsigned int fw,
  84         int backFlag, unsigned int backPos, unsigned int endPos) {
  85         // identifiers are recognized by Perl as barewords under some
  86         // conditions, the following attempts to do the disambiguation
  87         // by looking backward and forward; result in 2 LSB
  88         int result = 0;
  89         bool moreback = false;          // true if passed newline/comments
  90         bool brace = false;                     // true if opening brace found
  91         // if BACK_NONE, neither operator nor keyword, so skip test
  92         if (backFlag == BACK_NONE)
  93                 return result;
  94         // first look backwards past whitespace/comments to set EOL flag
  95         // (some disambiguation patterns must be on a single line)
  96         if (backPos <= static_cast<unsigned int>(styler.LineStart(styler.GetLine(bk))))
  97                 moreback = true;
  98         // look backwards at last significant lexed item for disambiguation
  99         bk = backPos - 1;
 100         int ch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
 101         if (ch == '{' && !moreback) {
 102                 // {bareword: possible variable spec
 103                 brace = true;
 104         } else if ((ch == '&' && styler.SafeGetCharAt(bk - 1) != '&')
 105                 // &bareword: subroutine call
 106                 || styler.Match(bk - 1, "->")
 107                 // ->bareword: part of variable spec
 108                 || styler.Match(bk - 2, "sub")) {
 109                 // sub bareword: subroutine declaration
 110                 // (implied BACK_KEYWORD, no keywords end in 'sub'!)
 111                 result |= 1;
 112         }
 113         // next, scan forward after word past tab/spaces only;
 114         // if ch isn't one of '[{(,' we can skip the test
 115         if ((ch == '{' || ch == '(' || ch == '['|| ch == ',')
 116                 && fw < endPos) {
 117                 while (ch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)),
 118                         IsASpaceOrTab(ch) && fw < endPos) {
 119                         fw++;
 120                 }
 121                 if ((ch == '}' && brace)
 122                         // {bareword}: variable spec
 123                         || styler.Match(fw, "=>")) {
 124                         // [{(, bareword=>: hash literal
 125                         result |= 2;
 126                 }
 127         }
 128         return result;
 129 }
 130
 131 static void skipWhitespaceComment(LexAccessor &styler, unsigned int &p) {
 132         // when backtracking, we need to skip whitespace and comments
 133         int style;
 134         while ((p > 0) && (style = styler.StyleAt(p),
 135                 style == SCE_PL_DEFAULT || style == SCE_PL_COMMENTLINE))
 136                 p--;
 137 }
 138
 139 static int styleBeforeBracePair(LexAccessor &styler, unsigned int bk) {
 140         // backtrack to find open '{' corresponding to a '}', balanced
 141         // return significant style to be tested for '/' disambiguation
 142         int braceCount = 1;
 143         if (bk == 0)
 144                 return SCE_PL_DEFAULT;
 145         while (--bk > 0) {
 146                 if (styler.StyleAt(bk) == SCE_PL_OPERATOR) {
 147                         int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
 148                         if (bkch == ';') {      // early out
 149                                 break;
 150                         } else if (bkch == '}') {
 151                                 braceCount++;
 152                         } else if (bkch == '{') {
 153                                 if (--braceCount == 0) break;
 154                         }
 155                 }
 156         }
 157         if (bk > 0 && braceCount == 0) {
 158                 // balanced { found, bk > 0, skip more whitespace/comments
 159                 bk--;
 160                 skipWhitespaceComment(styler, bk);
 161                 return styler.StyleAt(bk);
 162         }
 163         return SCE_PL_DEFAULT;
 164 }
 165
 166 static int styleCheckIdentifier(LexAccessor &styler, unsigned int bk) {
 167         // backtrack to classify sub-styles of identifier under test
 168         // return sub-style to be tested for '/' disambiguation
 169         if (styler.SafeGetCharAt(bk) == '>')    // inputsymbol, like <foo>
 170                 return 1;
 171         // backtrack to check for possible "->" or "::" before identifier
 172         while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
 173                 bk--;
 174         }
 175         while (bk > 0) {
 176                 int bkstyle = styler.StyleAt(bk);
 177                 if (bkstyle == SCE_PL_DEFAULT
 178                         || bkstyle == SCE_PL_COMMENTLINE) {
 179                         // skip whitespace, comments
 180                 } else if (bkstyle == SCE_PL_OPERATOR) {
 181                         // test for "->" and "::"
 182                         if (styler.Match(bk - 1, "->") || styler.Match(bk - 1, "::"))
 183                                 return 2;
 184                 } else
 185                         return 3;       // bare identifier
 186                 bk--;
 187         }
 188         return 0;
 189 }
 190
 191 static int inputsymbolScan(LexAccessor &styler, unsigned int pos, unsigned int endPos) {
 192         // looks forward for matching > on same line; a bit ugly
 193         unsigned int fw = pos;
 194         while (++fw < endPos) {
 195                 int fwch = static_cast<unsigned char>(styler.SafeGetCharAt(fw));
 196                 if (fwch == '\r' || fwch == '\n') {
 197                         return 0;
 198                 } else if (fwch == '>') {
 199                         if (styler.Match(fw - 2, "<=>"))        // '<=>' case
 200                                 return 0;
 201                         return fw - pos;
 202                 }
 203         }
 204         return 0;
 205 }
 206
 207 static int podLineScan(LexAccessor &styler, unsigned int &pos, unsigned int endPos) {
 208         // forward scan the current line to classify line for POD style
 209         int state = -1;
 210         while (pos <= endPos) {
 211                 int ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos));
 212                 if (ch == '\n' || ch == '\r' || pos >= endPos) {
 213                         if (ch == '\r' && styler.SafeGetCharAt(pos + 1) == '\n') pos++;
 214                         break;
 215                 }
 216                 if (IsASpaceOrTab(ch)) {        // whitespace, take note
 217                         if (state == -1)
 218                                 state = SCE_PL_DEFAULT;
 219                 } else if (state == SCE_PL_DEFAULT) {   // verbatim POD line
 220                         state = SCE_PL_POD_VERB;
 221                 } else if (state != SCE_PL_POD_VERB) {  // regular POD line
 222                         state = SCE_PL_POD;
 223                 }
 224                 pos++;
 225         }
 226         if (state == -1)
 227                 state = SCE_PL_DEFAULT;
 228         return state;
 229 }
 230
 231 static bool styleCheckSubPrototype(LexAccessor &styler, unsigned int bk) {
 232         // backtrack to identify if we're starting a subroutine prototype
 233         // we also need to ignore whitespace/comments:
 234         // 'sub' [whitespace|comment] <identifier> [whitespace|comment]
 235         styler.Flush();
 236         skipWhitespaceComment(styler, bk);
 237         if (bk == 0 || styler.StyleAt(bk) != SCE_PL_IDENTIFIER) // check identifier
 238                 return false;
 239         while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_IDENTIFIER)) {
 240                 bk--;
 241         }
 242         skipWhitespaceComment(styler, bk);
 243         if (bk < 2 || styler.StyleAt(bk) != SCE_PL_WORD // check "sub" keyword
 244                 || !styler.Match(bk - 2, "sub"))        // assume suffix is unique!
 245                 return false;
 246         return true;
 247 }
 248
 249 static int actualNumStyle(int numberStyle) {
 250         if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {
 251                 return SCE_PL_STRING;
 252         } else if (numberStyle == PERLNUM_BAD) {
 253                 return SCE_PL_ERROR;
 254         }
 255         return SCE_PL_NUMBER;
 256 }
 257
 258 static int opposite(int ch) {
 259         if (ch == '(') return ')';
 260         if (ch == '[') return ']';
 261         if (ch == '{') return '}';
 262         if (ch == '<') return '>';
 263         return ch;
 264 }
 265
 266 static bool IsCommentLine(int line, LexAccessor &styler) {
 267         int pos = styler.LineStart(line);
 268         int eol_pos = styler.LineStart(line + 1) - 1;
 269         for (int i = pos; i < eol_pos; i++) {
 270                 char ch = styler[i];
 271                 int style = styler.StyleAt(i);
 272                 if (ch == '#' && style == SCE_PL_COMMENTLINE)
 273                         return true;
 274                 else if (!IsASpaceOrTab(ch))
 275                         return false;
 276         }
 277         return false;
 278 }
 279
 280 static bool IsPackageLine(int line, LexAccessor &styler) {
 281         int pos = styler.LineStart(line);
 282         int style = styler.StyleAt(pos);
 283         if (style == SCE_PL_WORD && styler.Match(pos, "package")) {
 284                 return true;
 285         }
 286         return false;
 287 }
 288
 289 static int PodHeadingLevel(int pos, LexAccessor &styler) {
 290         int lvl = static_cast<unsigned char>(styler.SafeGetCharAt(pos + 5));
 291         if (lvl >= '1' && lvl <= '4') {
 292                 return lvl - '0';
 293         }
 294         return 0;
 295 }
 296
 297 // An individual named option for use in an OptionSet
 298
 299 // Options used for LexerPerl
 300 struct OptionsPerl {
 301         bool fold;
 302         bool foldComment;
 303         bool foldCompact;
 304         // Custom folding of POD and packages
 305         bool foldPOD;            // fold.perl.pod
 306         // Enable folding Pod blocks when using the Perl lexer.
 307         bool foldPackage;        // fold.perl.package
 308         // Enable folding packages when using the Perl lexer.
 309
 310         bool foldCommentExplicit;
 311
 312         bool foldAtElse;
 313
 314         OptionsPerl() {
 315                 fold = false;
 316                 foldComment = false;
 317                 foldCompact = true;
 318                 foldPOD = true;
 319                 foldPackage = true;
 320                 foldCommentExplicit = true;
 321                 foldAtElse = false;
 322         }
 323 };
 324
 325 static const char *const perlWordListDesc[] = {
 326         "Keywords",
 327         0
 328 };
 329
 330 struct OptionSetPerl : public OptionSet<OptionsPerl> {
 331         OptionSetPerl() {
 332                 DefineProperty("fold", &OptionsPerl::fold);
 333
 334                 DefineProperty("fold.comment", &OptionsPerl::foldComment);
 335
 336                 DefineProperty("fold.compact", &OptionsPerl::foldCompact);
 337
 338                 DefineProperty("fold.perl.pod", &OptionsPerl::foldPOD,
 339                         "Set to 0 to disable folding Pod blocks when using the Perl lexer.");
 340
 341                 DefineProperty("fold.perl.package", &OptionsPerl::foldPackage,
 342                         "Set to 0 to disable folding packages when using the Perl lexer.");
 343
 344                 DefineProperty("fold.perl.comment.explicit", &OptionsPerl::foldCommentExplicit,
 345                         "Set to 0 to disable explicit folding.");
 346
 347                 DefineProperty("fold.perl.at.else", &OptionsPerl::foldAtElse,
 348                                "This option enables Perl folding on a \"} else {\" line of an if statement.");
 349
 350                 DefineWordListSets(perlWordListDesc);
 351         }
 352 };
 353
 354 class LexerPerl : public ILexer {
 355         CharacterSet setWordStart;
 356         CharacterSet setWord;
 357         CharacterSet setSpecialVar;
 358         CharacterSet setControlVar;
 359         WordList keywords;
 360         OptionsPerl options;
 361         OptionSetPerl osPerl;
 362 public:
 363         LexerPerl() :
 364                 setWordStart(CharacterSet::setAlpha, "_", 0x80, true),
 365                 setWord(CharacterSet::setAlphaNum, "_", 0x80, true),
 366                 setSpecialVar(CharacterSet::setNone, "\"$;<>&`'+,./\\%:=~!?@[]"),
 367                 setControlVar(CharacterSet::setNone, "ACDEFHILMNOPRSTVWX") {
 368         }
 369         virtual ~LexerPerl() {
 370         }
 371         void SCI_METHOD Release() {
 372                 delete this;
 373         }
 374         int SCI_METHOD Version() const {
 375                 return lvOriginal;
 376         }
 377         const char *SCI_METHOD PropertyNames() {
 378                 return osPerl.PropertyNames();
 379         }
 380         int SCI_METHOD PropertyType(const char *name) {
 381                 return osPerl.PropertyType(name);
 382         }
 383         const char *SCI_METHOD DescribeProperty(const char *name) {
 384                 return osPerl.DescribeProperty(name);
 385         }
 386         int SCI_METHOD PropertySet(const char *key, const char *val);
 387         const char *SCI_METHOD DescribeWordListSets() {
 388                 return osPerl.DescribeWordListSets();
 389         }
 390         int SCI_METHOD WordListSet(int n, const char *wl);
 391         void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
 392         void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
 393
 394         void *SCI_METHOD PrivateCall(int, void *) {
 395                 return 0;
 396         }
 397
 398         static ILexer *LexerFactoryPerl() {
 399                 return new LexerPerl();
 400         }
 401         void InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern=false);
 402 };
 403
 404 int SCI_METHOD LexerPerl::PropertySet(const char *key, const char *val) {
 405         if (osPerl.PropertySet(&options, key, val)) {
 406                 return 0;
 407         }
 408         return -1;
 409 }
 410
 411 int SCI_METHOD LexerPerl::WordListSet(int n, const char *wl) {
 412         WordList *wordListN = 0;
 413         switch (n) {
 414         case 0:
 415                 wordListN = &keywords;
 416                 break;
 417         }
 418         int firstModification = -1;
 419         if (wordListN) {
 420                 WordList wlNew;
 421                 wlNew.Set(wl);
 422                 if (*wordListN != wlNew) {
 423                         wordListN->Set(wl);
 424                         firstModification = 0;
 425                 }
 426         }
 427         return firstModification;
 428 }
 429
 430 void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) {
 431         // interpolate a segment (with no active backslashes or delimiters within)
 432         // switch in or out of an interpolation style or continue current style
 433         // commit variable patterns if found, trim segment, repeat until done
 434         while (maxSeg > 0) {
 435                 bool isVar = false;
 436                 int sLen = 0;
 437                 if ((maxSeg > 1) && (sc.ch == '$' || sc.ch == '@')) {
 438                         // $#[$]*word [$@][$]*word (where word or {word} is always present)
 439                         bool braces = false;
 440                         sLen = 1;
 441                         if (sc.ch == '$' && sc.chNext == '#') { // starts with $#
 442                                 sLen++;
 443                         }
 444                         while ((maxSeg > sLen) && (sc.GetRelative(sLen) == '$'))        // >0 $ dereference within
 445                                 sLen++;
 446                         if ((maxSeg > sLen) && (sc.GetRelative(sLen) == '{')) { // { start for {word}
 447                                 sLen++;
 448                                 braces = true;
 449                         }
 450                         if (maxSeg > sLen) {
 451                                 int c = sc.GetRelative(sLen);
 452                                 if (setWordStart.Contains(c)) { // word (various)
 453                                         sLen++;
 454                                         isVar = true;
 455                                         while ((maxSeg > sLen) && setWord.Contains(sc.GetRelative(sLen)))
 456                                                 sLen++;
 457                                 } else if (braces && IsADigit(c) && (sLen == 2)) {      // digit for ${digit}
 458                                         sLen++;
 459                                         isVar = true;
 460                                 }
 461                         }
 462                         if (braces) {
 463                                 if ((maxSeg > sLen) && (sc.GetRelative(sLen) == '}')) { // } end for {word}
 464                                         sLen++;
 465                                 } else
 466                                         isVar = false;
 467                         }
 468                 }
 469                 if (!isVar && (maxSeg > 1)) {   // $- or @-specific variable patterns
 470                         sLen = 1;
 471                         int c = sc.chNext;
 472                         if (sc.ch == '$') {
 473                                 if (IsADigit(c)) {      // $[0-9] and slurp trailing digits
 474                                         sLen++;
 475                                         isVar = true;
 476                                         while ((maxSeg > sLen) && IsADigit(sc.GetRelative(sLen)))
 477                                                 sLen++;
 478                                 } else if (setSpecialVar.Contains(c)) { // $ special variables
 479                                         sLen++;
 480                                         isVar = true;
 481                                 } else if (!isPattern && ((c == '(') || (c == ')') || (c == '|'))) {    // $ additional
 482                                         sLen++;
 483                                         isVar = true;
 484                                 } else if (c == '^') {  // $^A control-char style
 485                                         sLen++;
 486                                         if ((maxSeg > sLen) && setControlVar.Contains(sc.GetRelative(sLen))) {
 487                                                 sLen++;
 488                                                 isVar = true;
 489                                         }
 490                                 }
 491                         } else if (sc.ch == '@') {
 492                                 if (!isPattern && ((c == '+') || (c == '-'))) { // @ specials non-pattern
 493                                         sLen++;
 494                                         isVar = true;
 495                                 }
 496                         }
 497                 }
 498                 if (isVar) {    // commit as interpolated variable or normal character
 499                         if (sc.state < SCE_PL_STRING_VAR)
 500                                 sc.SetState(sc.state + INTERPOLATE_SHIFT);
 501                         sc.Forward(sLen);
 502                         maxSeg -= sLen;
 503                 } else {
 504                         if (sc.state >= SCE_PL_STRING_VAR)
 505                                 sc.SetState(sc.state - INTERPOLATE_SHIFT);
 506                         sc.Forward();
 507                         maxSeg--;
 508                 }
 509         }
 510         if (sc.state >= SCE_PL_STRING_VAR)
 511                 sc.SetState(sc.state - INTERPOLATE_SHIFT);
 512 }
 513
 514 void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
 515         LexAccessor styler(pAccess);
 516
 517         // keywords that forces /PATTERN/ at all times; should track vim's behaviour
 518         WordList reWords;
 519         reWords.Set("elsif if split while");
 520
 521         // charset classes
 522         CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC");
 523         // lexing of "%*</" operators is non-trivial; these are missing in the set below
 524         CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~");
 525         CharacterSet setQDelim(CharacterSet::setNone, "qrwx");
 526         CharacterSet setModifiers(CharacterSet::setAlpha);
 527         CharacterSet setPreferRE(CharacterSet::setNone, "*/<%");
 528         // setArray and setHash also accepts chars for special vars like $_,
 529         // which are then truncated when the next char does not match setVar
 530         CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'", 0x80, true);
 531         CharacterSet setArray(CharacterSet::setAlpha, "#$_+-", 0x80, true);
 532         CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-", 0x80, true);
 533         CharacterSet &setPOD = setModifiers;
 534         CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@");
 535         CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_");
 536         CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*+];");
 537         // for format identifiers
 538         CharacterSet setFormatStart(CharacterSet::setAlpha, "_=");
 539         CharacterSet &setFormat = setHereDocDelim;
 540
 541         // Lexer for perl often has to backtrack to start of current style to determine
 542         // which characters are being used as quotes, how deeply nested is the
 543         // start position and what the termination string is for HERE documents.
 544
 545         class HereDocCls {      // Class to manage HERE doc sequence
 546         public:
 547                 int State;
 548                 // 0: '<<' encountered
 549                 // 1: collect the delimiter
 550                 // 2: here doc text (lines after the delimiter)
 551                 int Quote;              // the char after '<<'
 552                 bool Quoted;            // true if Quote in ('\'','"','`')
 553                 int DelimiterLength;    // strlen(Delimiter)
 554                 char *Delimiter;        // the Delimiter, 256: sizeof PL_tokenbuf
 555                 HereDocCls() {
 556                         State = 0;
 557                         Quote = 0;
 558                         Quoted = false;
 559                         DelimiterLength = 0;
 560                         Delimiter = new char[HERE_DELIM_MAX];
 561                         Delimiter[0] = '\0';
 562                 }
 563                 void Append(int ch) {
 564                         Delimiter[DelimiterLength++] = static_cast<char>(ch);
 565                         Delimiter[DelimiterLength] = '\0';
 566                 }
 567                 ~HereDocCls() {
 568                         delete []Delimiter;
 569                 }
 570         };
 571         HereDocCls HereDoc;             // TODO: FIFO for stacked here-docs
 572
 573         class QuoteCls {        // Class to manage quote pairs
 574         public:
 575                 int Rep;
 576                 int Count;
 577                 int Up, Down;
 578                 QuoteCls() {
 579                         this->New(1);
 580                 }
 581                 void New(int r = 1) {
 582                         Rep   = r;
 583                         Count = 0;
 584                         Up    = '\0';
 585                         Down  = '\0';
 586                 }
 587                 void Open(int u) {
 588                         Count++;
 589                         Up    = u;
 590                         Down  = opposite(Up);
 591                 }
 592         };
 593         QuoteCls Quote;
 594
 595         // additional state for number lexing
 596         int numState = PERLNUM_DECIMAL;
 597         int dotCount = 0;
 598
 599         unsigned int endPos = startPos + length;
 600
 601         // Backtrack to beginning of style if required...
 602         // If in a long distance lexical state, backtrack to find quote characters.
 603         // Includes strings (may be multi-line), numbers (additional state), format
 604         // bodies, as well as POD sections.
 605         if (initStyle == SCE_PL_HERE_Q
 606             || initStyle == SCE_PL_HERE_QQ
 607             || initStyle == SCE_PL_HERE_QX
 608             || initStyle == SCE_PL_FORMAT
 609             || initStyle == SCE_PL_HERE_QQ_VAR
 610             || initStyle == SCE_PL_HERE_QX_VAR
 611            ) {
 612                 // backtrack through multiple styles to reach the delimiter start
 613                 int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM;
 614                 while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) {
 615                         startPos--;
 616                 }
 617                 startPos = styler.LineStart(styler.GetLine(startPos));
 618                 initStyle = styler.StyleAt(startPos - 1);
 619         }
 620         if (initStyle == SCE_PL_STRING
 621             || initStyle == SCE_PL_STRING_QQ
 622             || initStyle == SCE_PL_BACKTICKS
 623             || initStyle == SCE_PL_STRING_QX
 624             || initStyle == SCE_PL_REGEX
 625             || initStyle == SCE_PL_STRING_QR
 626             || initStyle == SCE_PL_REGSUBST
 627             || initStyle == SCE_PL_STRING_VAR
 628             || initStyle == SCE_PL_STRING_QQ_VAR
 629             || initStyle == SCE_PL_BACKTICKS_VAR
 630             || initStyle == SCE_PL_STRING_QX_VAR
 631             || initStyle == SCE_PL_REGEX_VAR
 632             || initStyle == SCE_PL_STRING_QR_VAR
 633             || initStyle == SCE_PL_REGSUBST_VAR
 634            ) {
 635                 // for interpolation, must backtrack through a mix of two different styles
 636                 int otherStyle = (initStyle >= SCE_PL_STRING_VAR) ?
 637                         initStyle - INTERPOLATE_SHIFT : initStyle + INTERPOLATE_SHIFT;
 638                 while (startPos > 1) {
 639                         int st = styler.StyleAt(startPos - 1);
 640                         if ((st != initStyle) && (st != otherStyle))
 641                                 break;
 642                         startPos--;
 643                 }
 644                 initStyle = SCE_PL_DEFAULT;
 645         } else if (initStyle == SCE_PL_STRING_Q
 646                 || initStyle == SCE_PL_STRING_QW
 647                 || initStyle == SCE_PL_XLAT
 648                 || initStyle == SCE_PL_CHARACTER
 649                 || initStyle == SCE_PL_NUMBER
 650                 || initStyle == SCE_PL_IDENTIFIER
 651                 || initStyle == SCE_PL_ERROR
 652                 || initStyle == SCE_PL_SUB_PROTOTYPE
 653            ) {
 654                 while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {
 655                         startPos--;
 656                 }
 657                 initStyle = SCE_PL_DEFAULT;
 658         } else if (initStyle == SCE_PL_POD
 659                 || initStyle == SCE_PL_POD_VERB
 660                   ) {
 661                 // POD backtracking finds preceeding blank lines and goes back past them
 662                 int ln = styler.GetLine(startPos);
 663                 if (ln > 0) {
 664                         initStyle = styler.StyleAt(styler.LineStart(--ln));
 665                         if (initStyle == SCE_PL_POD || initStyle == SCE_PL_POD_VERB) {
 666                                 while (ln > 0 && styler.GetLineState(ln) == SCE_PL_DEFAULT)
 667                                         ln--;
 668                         }
 669                         startPos = styler.LineStart(++ln);
 670                         initStyle = styler.StyleAt(startPos - 1);
 671                 } else {
 672                         startPos = 0;
 673                         initStyle = SCE_PL_DEFAULT;
 674                 }
 675         }
 676
 677         // backFlag, backPos are additional state to aid identifier corner cases.
 678         // Look backwards past whitespace and comments in order to detect either
 679         // operator or keyword. Later updated as we go along.
 680         int backFlag = BACK_NONE;
 681         unsigned int backPos = startPos;
 682         if (backPos > 0) {
 683                 backPos--;
 684                 skipWhitespaceComment(styler, backPos);
 685                 if (styler.StyleAt(backPos) == SCE_PL_OPERATOR)
 686                         backFlag = BACK_OPERATOR;
 687                 else if (styler.StyleAt(backPos) == SCE_PL_WORD)
 688                         backFlag = BACK_KEYWORD;
 689                 backPos++;
 690         }
 691
 692         StyleContext sc(startPos, endPos - startPos, initStyle, styler, static_cast<char>(STYLE_MAX));
 693
 694         for (; sc.More(); sc.Forward()) {
 695
 696                 // Determine if the current state should terminate.
 697                 switch (sc.state) {
 698                 case SCE_PL_OPERATOR:
 699                         sc.SetState(SCE_PL_DEFAULT);
 700                         backFlag = BACK_OPERATOR;
 701                         backPos = sc.currentPos;
 702                         break;
 703                 case SCE_PL_IDENTIFIER:         // identifier, bareword, inputsymbol
 704                         if ((!setWord.Contains(sc.ch) && sc.ch != '\'')
 705                                 || sc.Match('.', '.')
 706                                 || sc.chPrev == '>') {  // end of inputsymbol
 707                                 sc.SetState(SCE_PL_DEFAULT);
 708                         }
 709                         break;
 710                 case SCE_PL_WORD:               // keyword, plus special cases
 711                         if (!setWord.Contains(sc.ch)) {
 712                                 char s[100];
 713                                 sc.GetCurrent(s, sizeof(s));
 714                                 if ((strcmp(s, "__DATA__") == 0) || (strcmp(s, "__END__") == 0)) {
 715                                         sc.ChangeState(SCE_PL_DATASECTION);
 716                                 } else {
 717                                         if ((strcmp(s, "format") == 0)) {
 718                                                 sc.SetState(SCE_PL_FORMAT_IDENT);
 719                                                 HereDoc.State = 0;
 720                                         } else {
 721                                                 sc.SetState(SCE_PL_DEFAULT);
 722                                         }
 723                                         backFlag = BACK_KEYWORD;
 724                                         backPos = sc.currentPos;
 725                                 }
 726                         }
 727                         break;
 728                 case SCE_PL_SCALAR:
 729                 case SCE_PL_ARRAY:
 730                 case SCE_PL_HASH:
 731                 case SCE_PL_SYMBOLTABLE:
 732                         if (sc.Match(':', ':')) {       // skip ::
 733                                 sc.Forward();
 734                         } else if (!setVar.Contains(sc.ch)) {
 735                                 if (sc.LengthCurrent() == 1) {
 736                                         // Special variable: $(, $_ etc.
 737                                         sc.Forward();
 738                                 }
 739                                 sc.SetState(SCE_PL_DEFAULT);
 740                         }
 741                         break;
 742                 case SCE_PL_NUMBER:
 743                         // if no early break, number style is terminated at "(go through)"
 744                         if (sc.ch == '.') {
 745                                 if (sc.chNext == '.') {
 746                                         // double dot is always an operator (go through)
 747                                 } else if (numState <= PERLNUM_FLOAT_EXP) {
 748                                         // non-decimal number or float exponent, consume next dot
 749                                         sc.SetState(SCE_PL_OPERATOR);
 750                                         break;
 751                                 } else {        // decimal or vectors allows dots
 752                                         dotCount++;
 753                                         if (numState == PERLNUM_DECIMAL) {
 754                                                 if (dotCount <= 1)      // number with one dot in it
 755                                                         break;
 756                                                 if (IsADigit(sc.chNext)) {      // really a vector
 757                                                         numState = PERLNUM_VECTOR;
 758                                                         break;
 759                                                 }
 760                                                 // number then dot (go through)
 761                                         } else if (IsADigit(sc.chNext)) // vectors
 762                                                 break;
 763                                         // vector then dot (go through)
 764                                 }
 765                         } else if (sc.ch == '_') {
 766                                 // permissive underscoring for number and vector literals
 767                                 break;
 768                         } else if (numState == PERLNUM_DECIMAL) {
 769                                 if (sc.ch == 'E' || sc.ch == 'e') {     // exponent, sign
 770                                         numState = PERLNUM_FLOAT_EXP;
 771                                         if (sc.chNext == '+' || sc.chNext == '-') {
 772                                                 sc.Forward();
 773                                         }
 774                                         break;
 775                                 } else if (IsADigit(sc.ch))
 776                                         break;
 777                                 // number then word (go through)
 778                         } else if (numState == PERLNUM_HEX) {
 779                                 if (IsADigit(sc.ch, 16))
 780                                         break;
 781                         } else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
 782                                 if (IsADigit(sc.ch))    // vector
 783                                         break;
 784                                 if (setWord.Contains(sc.ch) && dotCount == 0) { // change to word
 785                                         sc.ChangeState(SCE_PL_IDENTIFIER);
 786                                         break;
 787                                 }
 788                                 // vector then word (go through)
 789                         } else if (IsADigit(sc.ch)) {
 790                                 if (numState == PERLNUM_FLOAT_EXP) {
 791                                         break;
 792                                 } else if (numState == PERLNUM_OCTAL) {
 793                                         if (sc.ch <= '7') break;
 794                                 } else if (numState == PERLNUM_BINARY) {
 795                                         if (sc.ch <= '1') break;
 796                                 }
 797                                 // mark invalid octal, binary numbers (go through)
 798                                 numState = PERLNUM_BAD;
 799                                 break;
 800                         }
 801                         // complete current number or vector
 802                         sc.ChangeState(actualNumStyle(numState));
 803                         sc.SetState(SCE_PL_DEFAULT);
 804                         break;
 805                 case SCE_PL_COMMENTLINE:
 806                         if (sc.atLineEnd) {
 807                                 sc.SetState(SCE_PL_DEFAULT);
 808                         }
 809                         break;
 810                 case SCE_PL_HERE_DELIM:
 811                         if (HereDoc.State == 0) { // '<<' encountered
 812                                 int delim_ch = sc.chNext;
 813                                 int ws_skip = 0;
 814                                 HereDoc.State = 1;      // pre-init HERE doc class
 815                                 HereDoc.Quote = sc.chNext;
 816                                 HereDoc.Quoted = false;
 817                                 HereDoc.DelimiterLength = 0;
 818                                 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
 819                                 if (IsASpaceOrTab(delim_ch)) {
 820                                         // skip whitespace; legal only for quoted delimiters
 821                                         unsigned int i = sc.currentPos + 1;
 822                                         while ((i < endPos) && IsASpaceOrTab(delim_ch)) {
 823                                                 i++;
 824                                                 delim_ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
 825                                         }
 826                                         ws_skip = i - sc.currentPos - 1;
 827                                 }
 828                                 if (delim_ch == '\'' || delim_ch == '"' || delim_ch == '`') {
 829                                         // a quoted here-doc delimiter; skip any whitespace
 830                                         sc.Forward(ws_skip + 1);
 831                                         HereDoc.Quote = delim_ch;
 832                                         HereDoc.Quoted = true;
 833                                 } else if ((ws_skip == 0 && setNonHereDoc.Contains(sc.chNext))
 834                                         || ws_skip > 0) {
 835                                         // left shift << or <<= operator cases
 836                                         // restore position if operator
 837                                         sc.ChangeState(SCE_PL_OPERATOR);
 838                                         sc.ForwardSetState(SCE_PL_DEFAULT);
 839                                         backFlag = BACK_OPERATOR;
 840                                         backPos = sc.currentPos;
 841                                         HereDoc.State = 0;
 842                                 } else {
 843                                         // specially handle initial '\' for identifier
 844                                         if (ws_skip == 0 && HereDoc.Quote == '\\')
 845                                                 sc.Forward();
 846                                         // an unquoted here-doc delimiter, no special handling
 847                                         // (cannot be prefixed by spaces/tabs), or
 848                                         // symbols terminates; deprecated zero-length delimiter
 849                                 }
 850                         } else if (HereDoc.State == 1) { // collect the delimiter
 851                                 backFlag = BACK_NONE;
 852                                 if (HereDoc.Quoted) { // a quoted here-doc delimiter
 853                                         if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
 854                                                 sc.ForwardSetState(SCE_PL_DEFAULT);
 855                                         } else if (!sc.atLineEnd) {
 856                                                 if (sc.Match('\\', static_cast<char>(HereDoc.Quote))) { // escaped quote
 857                                                         sc.Forward();
 858                                                 }
 859                                                 if (sc.ch != '\r') {    // skip CR if CRLF
 860                                                         HereDoc.Append(sc.ch);
 861                                                 }
 862                                         }
 863                                 } else { // an unquoted here-doc delimiter
 864                                         if (setHereDocDelim.Contains(sc.ch)) {
 865                                                 HereDoc.Append(sc.ch);
 866                                         } else {
 867                                                 sc.SetState(SCE_PL_DEFAULT);
 868                                         }
 869                                 }
 870                                 if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
 871                                         sc.SetState(SCE_PL_ERROR);
 872                                         HereDoc.State = 0;
 873                                 }
 874                         }
 875                         break;
 876                 case SCE_PL_HERE_Q:
 877                 case SCE_PL_HERE_QQ:
 878                 case SCE_PL_HERE_QX:
 879                         // also implies HereDoc.State == 2
 880                         sc.Complete();
 881                         if (HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter)) {
 882                                 int c = sc.GetRelative(HereDoc.DelimiterLength);
 883                                 if (c == '\r' || c == '\n') {   // peek first, do not consume match
 884                                         sc.Forward(HereDoc.DelimiterLength);
 885                                         sc.SetState(SCE_PL_DEFAULT);
 886                                         backFlag = BACK_NONE;
 887                                         HereDoc.State = 0;
 888                                         if (!sc.atLineEnd)
 889                                                 sc.Forward();
 890                                         break;
 891                                 }
 892                         }
 893                         if (sc.state == SCE_PL_HERE_Q) {        // \EOF and 'EOF' non-interpolated
 894                                 while (!sc.atLineEnd)
 895                                         sc.Forward();
 896                                 break;
 897                         }
 898                         while (!sc.atLineEnd) {         // "EOF" and `EOF` interpolated
 899                                 int s = 0, endType = 0;
 900                                 int maxSeg = endPos - sc.currentPos;
 901                                 while (s < maxSeg) {    // scan to break string into segments
 902                                         int c = sc.GetRelative(s);
 903                                         if (c == '\\') {
 904                                                 endType = 1; break;
 905                                         } else if (c == '\r' || c == '\n') {
 906                                                 endType = 2; break;
 907                                         }
 908                                         s++;
 909                                 }
 910                                 if (s > 0)      // process non-empty segments
 911                                         InterpolateSegment(sc, s);
 912                                 if (endType == 1) {
 913                                         sc.Forward();
 914                                         // \ at end-of-line does not appear to have any effect, skip
 915                                         if (sc.ch != '\r' && sc.ch != '\n')
 916                                                 sc.Forward();
 917                                 } else if (endType == 2) {
 918                                         if (!sc.atLineEnd)
 919                                                 sc.Forward();
 920                                 }
 921                         }
 922                         break;
 923                 case SCE_PL_POD:
 924                 case SCE_PL_POD_VERB: {
 925                                 unsigned int fw = sc.currentPos;
 926                                 int ln = styler.GetLine(fw);
 927                                 if (sc.atLineStart && sc.Match("=cut")) {       // end of POD
 928                                         sc.SetState(SCE_PL_POD);
 929                                         sc.Forward(4);
 930                                         sc.SetState(SCE_PL_DEFAULT);
 931                                         styler.SetLineState(ln, SCE_PL_POD);
 932                                         break;
 933                                 }
 934                                 int pod = podLineScan(styler, fw, endPos);      // classify POD line
 935                                 styler.SetLineState(ln, pod);
 936                                 if (pod == SCE_PL_DEFAULT) {
 937                                         if (sc.state == SCE_PL_POD_VERB) {
 938                                                 unsigned int fw2 = fw;
 939                                                 while (fw2 <= endPos && pod == SCE_PL_DEFAULT) {
 940                                                         fw = fw2++;     // penultimate line (last blank line)
 941                                                         pod = podLineScan(styler, fw2, endPos);
 942                                                         styler.SetLineState(styler.GetLine(fw2), pod);
 943                                                 }
 944                                                 if (pod == SCE_PL_POD) {        // truncate verbatim POD early
 945                                                         sc.SetState(SCE_PL_POD);
 946                                                 } else
 947                                                         fw = fw2;
 948                                         }
 949                                 } else {
 950                                         if (pod == SCE_PL_POD_VERB      // still part of current paragraph
 951                                                 && (styler.GetLineState(ln - 1) == SCE_PL_POD)) {
 952                                                 pod = SCE_PL_POD;
 953                                                 styler.SetLineState(ln, pod);
 954                                         } else if (pod == SCE_PL_POD
 955                                                 && (styler.GetLineState(ln - 1) == SCE_PL_POD_VERB)) {
 956                                                 pod = SCE_PL_POD_VERB;
 957                                                 styler.SetLineState(ln, pod);
 958                                         }
 959                                         sc.SetState(pod);
 960                                 }
 961                                 sc.Forward(fw - sc.currentPos); // commit style
 962                         }
 963                         break;
 964                 case SCE_PL_REGEX:
 965                 case SCE_PL_STRING_QR:
 966                         if (Quote.Rep <= 0) {
 967                                 if (!setModifiers.Contains(sc.ch))
 968                                         sc.SetState(SCE_PL_DEFAULT);
 969                         } else if (!Quote.Up && !IsASpace(sc.ch)) {
 970                                 Quote.Open(sc.ch);
 971                         } else {
 972                                 int s = 0, endType = 0;
 973                                 int maxSeg = endPos - sc.currentPos;
 974                                 while (s < maxSeg) {    // scan to break string into segments
 975                                         int c = sc.GetRelative(s);
 976                                         if (IsASpace(c)) {
 977                                                 break;
 978                                         } else if (c == '\\' && Quote.Up != '\\') {
 979                                                 endType = 1; break;
 980                                         } else if (c == Quote.Down) {
 981                                                 Quote.Count--;
 982                                                 if (Quote.Count == 0) {
 983                                                         Quote.Rep--;
 984                                                         break;
 985                                                 }
 986                                         } else if (c == Quote.Up)
 987                                                 Quote.Count++;
 988                                         s++;
 989                                 }
 990                                 if (s > 0) {    // process non-empty segments
 991                                         if (Quote.Up != '\'') {
 992                                                 InterpolateSegment(sc, s, true);
 993                                         } else          // non-interpolated path
 994                                                 sc.Forward(s);
 995                                 }
 996                                 if (endType == 1)
 997                                         sc.Forward();
 998                         }
 999                         break;
1000                 case SCE_PL_REGSUBST:
1001                 case SCE_PL_XLAT:
1002                         if (Quote.Rep <= 0) {
1003                                 if (!setModifiers.Contains(sc.ch))
1004                                         sc.SetState(SCE_PL_DEFAULT);
1005                         } else if (!Quote.Up && !IsASpace(sc.ch)) {
1006                                 Quote.Open(sc.ch);
1007                         } else {
1008                                 int s = 0, endType = 0;
1009                                 int maxSeg = endPos - sc.currentPos;
1010                                 bool isPattern = (Quote.Rep == 2);
1011                                 while (s < maxSeg) {    // scan to break string into segments
1012                                         int c = sc.GetRelative(s);
1013                                         if (c == '\\' && Quote.Up != '\\') {
1014                                                 endType = 2; break;
1015                                         } else if (Quote.Count == 0 && Quote.Rep == 1) {
1016                                                 // We matched something like s(...) or tr{...}, Perl 5.10
1017                                                 // appears to allow almost any character for use as the
1018                                                 // next delimiters. Whitespace and comments are accepted in
1019                                                 // between, but we'll limit to whitespace here.
1020                                                 // For '#', if no whitespace in between, it's a delimiter.
1021                                                 if (IsASpace(c)) {
1022                                                         // Keep going
1023                                                 } else if (c == '#' && IsASpaceOrTab(sc.GetRelative(s - 1))) {
1024                                                         endType = 3;
1025                                                 } else
1026                                                         Quote.Open(c);
1027                                                 break;
1028                                         } else if (c == Quote.Down) {
1029                                                 Quote.Count--;
1030                                                 if (Quote.Count == 0) {
1031                                                         Quote.Rep--;
1032                                                         endType = 1;
1033                                                 }
1034                                                 if (Quote.Up == Quote.Down)
1035                                                         Quote.Count++;
1036                                                 if (endType == 1)
1037                                                         break;
1038                                         } else if (c == Quote.Up) {
1039                                                 Quote.Count++;
1040                                         } else if (IsASpace(c))
1041                                                 break;
1042                                         s++;
1043                                 }
1044                                 if (s > 0) {    // process non-empty segments
1045                                         if (sc.state == SCE_PL_REGSUBST && Quote.Up != '\'') {
1046                                                 InterpolateSegment(sc, s, isPattern);
1047                                         } else          // non-interpolated path
1048                                                 sc.Forward(s);
1049                                 }
1050                                 if (endType == 2) {
1051                                         sc.Forward();
1052                                 } else if (endType == 3)
1053                                         sc.SetState(SCE_PL_DEFAULT);
1054                         }
1055                         break;
1056                 case SCE_PL_STRING_Q:
1057                 case SCE_PL_STRING_QQ:
1058                 case SCE_PL_STRING_QX:
1059                 case SCE_PL_STRING_QW:
1060                 case SCE_PL_STRING:
1061                 case SCE_PL_CHARACTER:
1062                 case SCE_PL_BACKTICKS:
1063                         if (!Quote.Down && !IsASpace(sc.ch)) {
1064                                 Quote.Open(sc.ch);
1065                         } else {
1066                                 int s = 0, endType = 0;
1067                                 int maxSeg = endPos - sc.currentPos;
1068                                 while (s < maxSeg) {    // scan to break string into segments
1069                                         int c = sc.GetRelative(s);
1070                                         if (IsASpace(c)) {
1071                                                 break;
1072                                         } else if (c == '\\' && Quote.Up != '\\') {
1073                                                 endType = 2; break;
1074                                         } else if (c == Quote.Down) {
1075                                                 Quote.Count--;
1076                                                 if (Quote.Count == 0) {
1077                                                         endType = 3; break;
1078                                                 }
1079                                         } else if (c == Quote.Up)
1080                                                 Quote.Count++;
1081                                         s++;
1082                                 }
1083                                 if (s > 0) {    // process non-empty segments
1084                                         switch (sc.state) {
1085                                         case SCE_PL_STRING:
1086                                         case SCE_PL_STRING_QQ:
1087                                         case SCE_PL_BACKTICKS:
1088                                                 InterpolateSegment(sc, s);
1089                                                 break;
1090                                         case SCE_PL_STRING_QX:
1091                                                 if (Quote.Up != '\'') {
1092                                                         InterpolateSegment(sc, s);
1093                                                         break;
1094                                                 }
1095                                                 // (continued for ' delim)
1096                                         default:        // non-interpolated path
1097                                                 sc.Forward(s);
1098                                         }
1099                                 }
1100                                 if (endType == 2) {
1101                                         sc.Forward();
1102                                 } else if (endType == 3)
1103                                         sc.ForwardSetState(SCE_PL_DEFAULT);
1104                         }
1105                         break;
1106                 case SCE_PL_SUB_PROTOTYPE: {
1107                                 int i = 0;
1108                                 // forward scan; must all be valid proto characters
1109                                 while (setSubPrototype.Contains(sc.GetRelative(i)))
1110                                         i++;
1111                                 if (sc.GetRelative(i) == ')') { // valid sub prototype
1112                                         sc.Forward(i);
1113                                         sc.ForwardSetState(SCE_PL_DEFAULT);
1114                                 } else {
1115                                         // abandon prototype, restart from '('
1116                                         sc.ChangeState(SCE_PL_OPERATOR);
1117                                         sc.SetState(SCE_PL_DEFAULT);
1118                                 }
1119                         }
1120                         break;
1121                 case SCE_PL_FORMAT: {
1122                                 sc.Complete();
1123                                 if (sc.Match('.')) {
1124                                         sc.Forward();
1125                                         if (sc.atLineEnd || ((sc.ch == '\r' && sc.chNext == '\n')))
1126                                                 sc.SetState(SCE_PL_DEFAULT);
1127                                 }
1128                                 while (!sc.atLineEnd)
1129                                         sc.Forward();
1130                         }
1131                         break;
1132                 case SCE_PL_ERROR:
1133                         break;
1134                 }
1135                 // Needed for specific continuation styles (one follows the other)
1136                 switch (sc.state) {
1137                         // continued from SCE_PL_WORD
1138                 case SCE_PL_FORMAT_IDENT:
1139                         // occupies HereDoc state 3 to avoid clashing with HERE docs
1140                         if (IsASpaceOrTab(sc.ch)) {             // skip whitespace
1141                                 sc.ChangeState(SCE_PL_DEFAULT);
1142                                 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1143                                         sc.Forward();
1144                                 sc.SetState(SCE_PL_FORMAT_IDENT);
1145                         }
1146                         if (setFormatStart.Contains(sc.ch)) {   // identifier or '='
1147                                 if (sc.ch != '=') {
1148                                         do {
1149                                                 sc.Forward();
1150                                         } while (setFormat.Contains(sc.ch));
1151                                 }
1152                                 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1153                                         sc.Forward();
1154                                 if (sc.ch == '=') {
1155                                         sc.ForwardSetState(SCE_PL_DEFAULT);
1156                                         HereDoc.State = 3;
1157                                 } else {
1158                                         // invalid indentifier; inexact fallback, but hey
1159                                         sc.ChangeState(SCE_PL_IDENTIFIER);
1160                                         sc.SetState(SCE_PL_DEFAULT);
1161                                 }
1162                         } else {
1163                                 sc.ChangeState(SCE_PL_DEFAULT); // invalid indentifier
1164                         }
1165                         backFlag = BACK_NONE;
1166                         break;
1167                 }
1168
1169                 // Must check end of HereDoc states here before default state is handled
1170                 if (HereDoc.State == 1 && sc.atLineEnd) {
1171                         // Begin of here-doc (the line after the here-doc delimiter):
1172                         // Lexically, the here-doc starts from the next line after the >>, but the
1173                         // first line of here-doc seem to follow the style of the last EOL sequence
1174                         int st_new = SCE_PL_HERE_QQ;
1175                         HereDoc.State = 2;
1176                         if (HereDoc.Quoted) {
1177                                 if (sc.state == SCE_PL_HERE_DELIM) {
1178                                         // Missing quote at end of string! We are stricter than perl.
1179                                         // Colour here-doc anyway while marking this bit as an error.
1180                                         sc.ChangeState(SCE_PL_ERROR);
1181                                 }
1182                                 switch (HereDoc.Quote) {
1183                                 case '\'':
1184                                         st_new = SCE_PL_HERE_Q ;
1185                                         break;
1186                                 case '"' :
1187                                         st_new = SCE_PL_HERE_QQ;
1188                                         break;
1189                                 case '`' :
1190                                         st_new = SCE_PL_HERE_QX;
1191                                         break;
1192                                 }
1193                         } else {
1194                                 if (HereDoc.Quote == '\\')
1195                                         st_new = SCE_PL_HERE_Q;
1196                         }
1197                         sc.SetState(st_new);
1198                 }
1199                 if (HereDoc.State == 3 && sc.atLineEnd) {
1200                         // Start of format body.
1201                         HereDoc.State = 0;
1202                         sc.SetState(SCE_PL_FORMAT);
1203                 }
1204
1205                 // Determine if a new state should be entered.
1206                 if (sc.state == SCE_PL_DEFAULT) {
1207                         if (IsADigit(sc.ch) ||
1208                                 (IsADigit(sc.chNext) && (sc.ch == '.' || sc.ch == 'v'))) {
1209                                 sc.SetState(SCE_PL_NUMBER);
1210                                 backFlag = BACK_NONE;
1211                                 numState = PERLNUM_DECIMAL;
1212                                 dotCount = 0;
1213                                 if (sc.ch == '0') {             // hex,bin,octal
1214                                         if (sc.chNext == 'x' || sc.chNext == 'X') {
1215                                                 numState = PERLNUM_HEX;
1216                                         } else if (sc.chNext == 'b' || sc.chNext == 'B') {
1217                                                 numState = PERLNUM_BINARY;
1218                                         } else if (IsADigit(sc.chNext)) {
1219                                                 numState = PERLNUM_OCTAL;
1220                                         }
1221                                         if (numState != PERLNUM_DECIMAL) {
1222                                                 sc.Forward();
1223                                         }
1224                                 } else if (sc.ch == 'v') {              // vector
1225                                         numState = PERLNUM_V_VECTOR;
1226                                 }
1227                         } else if (setWord.Contains(sc.ch)) {
1228                                 // if immediately prefixed by '::', always a bareword
1229                                 sc.SetState(SCE_PL_WORD);
1230                                 if (sc.chPrev == ':' && sc.GetRelative(-2) == ':') {
1231                                         sc.ChangeState(SCE_PL_IDENTIFIER);
1232                                 }
1233                                 unsigned int bk = sc.currentPos;
1234                                 unsigned int fw = sc.currentPos + 1;
1235                                 // first check for possible quote-like delimiter
1236                                 if (sc.ch == 's' && !setWord.Contains(sc.chNext)) {
1237                                         sc.ChangeState(SCE_PL_REGSUBST);
1238                                         Quote.New(2);
1239                                 } else if (sc.ch == 'm' && !setWord.Contains(sc.chNext)) {
1240                                         sc.ChangeState(SCE_PL_REGEX);
1241                                         Quote.New();
1242                                 } else if (sc.ch == 'q' && !setWord.Contains(sc.chNext)) {
1243                                         sc.ChangeState(SCE_PL_STRING_Q);
1244                                         Quote.New();
1245                                 } else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) {
1246                                         sc.ChangeState(SCE_PL_XLAT);
1247                                         Quote.New(2);
1248                                 } else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) {
1249                                         sc.ChangeState(SCE_PL_XLAT);
1250                                         Quote.New(2);
1251                                         sc.Forward();
1252                                         fw++;
1253                                 } else if (sc.ch == 'q' && setQDelim.Contains(sc.chNext)
1254                                         && !setWord.Contains(sc.GetRelative(2))) {
1255                                         if (sc.chNext == 'q') sc.ChangeState(SCE_PL_STRING_QQ);
1256                                         else if (sc.chNext == 'x') sc.ChangeState(SCE_PL_STRING_QX);
1257                                         else if (sc.chNext == 'r') sc.ChangeState(SCE_PL_STRING_QR);
1258                                         else sc.ChangeState(SCE_PL_STRING_QW);  // sc.chNext == 'w'
1259                                         Quote.New();
1260                                         sc.Forward();
1261                                         fw++;
1262                                 } else if (sc.ch == 'x' && (sc.chNext == '=' || // repetition
1263                                         !setWord.Contains(sc.chNext) ||
1264                                         (IsADigit(sc.chPrev) && IsADigit(sc.chNext)))) {
1265                                         sc.ChangeState(SCE_PL_OPERATOR);
1266                                 }
1267                                 // if potentially a keyword, scan forward and grab word, then check
1268                                 // if it's really one; if yes, disambiguation test is performed
1269                                 // otherwise it is always a bareword and we skip a lot of scanning
1270                                 if (sc.state == SCE_PL_WORD) {
1271                                         while (setWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(fw))))
1272                                                 fw++;
1273                                         if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) {
1274                                                 sc.ChangeState(SCE_PL_IDENTIFIER);
1275                                         }
1276                                 }
1277                                 // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
1278                                 // for quote-like delimiters/keywords, attempt to disambiguate
1279                                 // to select for bareword, change state -> SCE_PL_IDENTIFIER
1280                                 if (sc.state != SCE_PL_IDENTIFIER && bk > 0) {
1281                                         if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos))
1282                                                 sc.ChangeState(SCE_PL_IDENTIFIER);
1283                                 }
1284                                 backFlag = BACK_NONE;
1285                         } else if (sc.ch == '#') {
1286                                 sc.SetState(SCE_PL_COMMENTLINE);
1287                         } else if (sc.ch == '\"') {
1288                                 sc.SetState(SCE_PL_STRING);
1289                                 Quote.New();
1290                                 Quote.Open(sc.ch);
1291                                 backFlag = BACK_NONE;
1292                         } else if (sc.ch == '\'') {
1293                                 if (sc.chPrev == '&' && setWordStart.Contains(sc.chNext)) {
1294                                         // Archaic call
1295                                         sc.SetState(SCE_PL_IDENTIFIER);
1296                                 } else {
1297                                         sc.SetState(SCE_PL_CHARACTER);
1298                                         Quote.New();
1299                                         Quote.Open(sc.ch);
1300                                 }
1301                                 backFlag = BACK_NONE;
1302                         } else if (sc.ch == '`') {
1303                                 sc.SetState(SCE_PL_BACKTICKS);
1304                                 Quote.New();
1305                                 Quote.Open(sc.ch);
1306                                 backFlag = BACK_NONE;
1307                         } else if (sc.ch == '$') {
1308                                 sc.SetState(SCE_PL_SCALAR);
1309                                 if (sc.chNext == '{') {
1310                                         sc.ForwardSetState(SCE_PL_OPERATOR);
1311                                 } else if (IsASpace(sc.chNext)) {
1312                                         sc.ForwardSetState(SCE_PL_DEFAULT);
1313                                 } else {
1314                                         sc.Forward();
1315                                         if (sc.Match('`', '`') || sc.Match(':', ':')) {
1316                                                 sc.Forward();
1317                                         }
1318                                 }
1319                                 backFlag = BACK_NONE;
1320                         } else if (sc.ch == '@') {
1321                                 sc.SetState(SCE_PL_ARRAY);
1322                                 if (setArray.Contains(sc.chNext)) {
1323                                         // no special treatment
1324                                 } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1325                                         sc.Forward(2);
1326                                 } else if (sc.chNext == '{' || sc.chNext == '[') {
1327                                         sc.ForwardSetState(SCE_PL_OPERATOR);
1328                                 } else {
1329                                         sc.ChangeState(SCE_PL_OPERATOR);
1330                                 }
1331                                 backFlag = BACK_NONE;
1332                         } else if (setPreferRE.Contains(sc.ch)) {
1333                                 // Explicit backward peeking to set a consistent preferRE for
1334                                 // any slash found, so no longer need to track preferRE state.
1335                                 // Find first previous significant lexed element and interpret.
1336                                 // A few symbols shares this code for disambiguation.
1337                                 bool preferRE = false;
1338                                 bool isHereDoc = sc.Match('<', '<');
1339                                 bool hereDocSpace = false;              // for: SCALAR [whitespace] '<<'
1340                                 unsigned int bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0;
1341                                 sc.Complete();
1342                                 styler.Flush();
1343                                 if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
1344                                         hereDocSpace = true;
1345                                 skipWhitespaceComment(styler, bk);
1346                                 if (bk == 0) {
1347                                         // avoid backward scanning breakage
1348                                         preferRE = true;
1349                                 } else {
1350                                         int bkstyle = styler.StyleAt(bk);
1351                                         int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
1352                                         switch (bkstyle) {
1353                                         case SCE_PL_OPERATOR:
1354                                                 preferRE = true;
1355                                                 if (bkch == ')' || bkch == ']') {
1356                                                         preferRE = false;
1357                                                 } else if (bkch == '}') {
1358                                                         // backtrack by counting balanced brace pairs
1359                                                         // needed to test for variables like ${}, @{} etc.
1360                                                         bkstyle = styleBeforeBracePair(styler, bk);
1361                                                         if (bkstyle == SCE_PL_SCALAR
1362                                                                 || bkstyle == SCE_PL_ARRAY
1363                                                                 || bkstyle == SCE_PL_HASH
1364                                                                 || bkstyle == SCE_PL_SYMBOLTABLE
1365                                                                 || bkstyle == SCE_PL_OPERATOR) {
1366                                                                 preferRE = false;
1367                                                         }
1368                                                 } else if (bkch == '+' || bkch == '-') {
1369                                                         if (bkch == static_cast<unsigned char>(styler.SafeGetCharAt(bk - 1))
1370                                                                 && bkch != static_cast<unsigned char>(styler.SafeGetCharAt(bk - 2)))
1371                                                                 // exceptions for operators: unary suffixes ++, --
1372                                                                 preferRE = false;
1373                                                 }
1374                                                 break;
1375                                         case SCE_PL_IDENTIFIER:
1376                                                 preferRE = true;
1377                                                 bkstyle = styleCheckIdentifier(styler, bk);
1378                                                 if ((bkstyle == 1) || (bkstyle == 2)) {
1379                                                         // inputsymbol or var with "->" or "::" before identifier
1380                                                         preferRE = false;
1381                                                 } else if (bkstyle == 3) {
1382                                                         // bare identifier, test cases follows:
1383                                                         if (sc.ch == '/') {
1384                                                                 // if '/', /PATTERN/ unless digit/space immediately after '/'
1385                                                                 // if '//', always expect defined-or operator to follow identifier
1386                                                                 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1387                                                                         preferRE = false;
1388                                                         } else if (sc.ch == '*' || sc.ch == '%') {
1389                                                                 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1390                                                                         preferRE = false;
1391                                                         } else if (sc.ch == '<') {
1392                                                                 if (IsASpace(sc.chNext) || sc.chNext == '=')
1393                                                                         preferRE = false;
1394                                                         }
1395                                                 }
1396                                                 break;
1397                                         case SCE_PL_SCALAR:             // for $var<< case:
1398                                                 if (isHereDoc && hereDocSpace)  // if SCALAR whitespace '<<', *always* a HERE doc
1399                                                         preferRE = true;
1400                                                 break;
1401                                         case SCE_PL_WORD:
1402                                                 preferRE = true;
1403                                                 // for HERE docs, always true
1404                                                 if (sc.ch == '/') {
1405                                                         // adopt heuristics similar to vim-style rules:
1406                                                         // keywords always forced as /PATTERN/: split, if, elsif, while
1407                                                         // everything else /PATTERN/ unless digit/space immediately after '/'
1408                                                         // for '//', defined-or favoured unless special keywords
1409                                                         unsigned int bkend = bk + 1;
1410                                                         while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) {
1411                                                                 bk--;
1412                                                         }
1413                                                         if (isPerlKeyword(bk, bkend, reWords, styler))
1414                                                                 break;
1415                                                         if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1416                                                                 preferRE = false;
1417                                                 } else if (sc.ch == '*' || sc.ch == '%') {
1418                                                         if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1419                                                                 preferRE = false;
1420                                                 } else if (sc.ch == '<') {
1421                                                         if (IsASpace(sc.chNext) || sc.chNext == '=')
1422                                                                 preferRE = false;
1423                                                 }
1424                                                 break;
1425
1426                                                 // other styles uses the default, preferRE=false
1427                                         case SCE_PL_POD:
1428                                         case SCE_PL_HERE_Q:
1429                                         case SCE_PL_HERE_QQ:
1430                                         case SCE_PL_HERE_QX:
1431                                                 preferRE = true;
1432                                                 break;
1433                                         }
1434                                 }
1435                                 backFlag = BACK_NONE;
1436                                 if (isHereDoc) {        // handle '<<', HERE doc
1437                                         if (preferRE) {
1438                                                 sc.SetState(SCE_PL_HERE_DELIM);
1439                                                 HereDoc.State = 0;
1440                                         } else {                // << operator
1441                                                 sc.SetState(SCE_PL_OPERATOR);
1442                                                 sc.Forward();
1443                                         }
1444                                 } else if (sc.ch == '*') {      // handle '*', typeglob
1445                                         if (preferRE) {
1446                                                 sc.SetState(SCE_PL_SYMBOLTABLE);
1447                                                 if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1448                                                         sc.Forward(2);
1449                                                 } else if (sc.chNext == '{') {
1450                                                         sc.ForwardSetState(SCE_PL_OPERATOR);
1451                                                 } else {
1452                                                         sc.Forward();
1453                                                 }
1454                                         } else {
1455                                                 sc.SetState(SCE_PL_OPERATOR);
1456                                                 if (sc.chNext == '*')   // exponentiation
1457                                                         sc.Forward();
1458                                         }
1459                                 } else if (sc.ch == '%') {      // handle '%', hash
1460                                         if (preferRE) {
1461                                                 sc.SetState(SCE_PL_HASH);
1462                                                 if (setHash.Contains(sc.chNext)) {
1463                                                         sc.Forward();
1464                                                 } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1465                                                         sc.Forward(2);
1466                                                 } else if (sc.chNext == '{') {
1467                                                         sc.ForwardSetState(SCE_PL_OPERATOR);
1468                                                 } else {
1469                                                         sc.ChangeState(SCE_PL_OPERATOR);
1470                                                 }
1471                                         } else {
1472                                                 sc.SetState(SCE_PL_OPERATOR);
1473                                         }
1474                                 } else if (sc.ch == '<') {      // handle '<', inputsymbol
1475                                         if (preferRE) {
1476                                                 // forward scan
1477                                                 int i = inputsymbolScan(styler, sc.currentPos, endPos);
1478                                                 if (i > 0) {
1479                                                         sc.SetState(SCE_PL_IDENTIFIER);
1480                                                         sc.Forward(i);
1481                                                 } else {
1482                                                         sc.SetState(SCE_PL_OPERATOR);
1483                                                 }
1484                                         } else {
1485                                                 sc.SetState(SCE_PL_OPERATOR);
1486                                         }
1487                                 } else {                        // handle '/', regexp
1488                                         if (preferRE) {
1489                                                 sc.SetState(SCE_PL_REGEX);
1490                                                 Quote.New();
1491                                                 Quote.Open(sc.ch);
1492                                         } else {                // / and // operators
1493                                                 sc.SetState(SCE_PL_OPERATOR);
1494                                                 if (sc.chNext == '/') {
1495                                                         sc.Forward();
1496                                                 }
1497                                         }
1498                                 }
1499                         } else if (sc.ch == '='         // POD
1500                                 && setPOD.Contains(sc.chNext)
1501                                 && sc.atLineStart) {
1502                                 sc.SetState(SCE_PL_POD);
1503                                 backFlag = BACK_NONE;
1504                         } else if (sc.ch == '-' && setWordStart.Contains(sc.chNext)) {  // extended '-' cases
1505                                 unsigned int bk = sc.currentPos;
1506                                 unsigned int fw = 2;
1507                                 if (setSingleCharOp.Contains(sc.chNext) &&      // file test operators
1508                                         !setWord.Contains(sc.GetRelative(2))) {
1509                                         sc.SetState(SCE_PL_WORD);
1510                                 } else {
1511                                         // nominally a minus and bareword; find extent of bareword
1512                                         while (setWord.Contains(sc.GetRelative(fw)))
1513                                                 fw++;
1514                                         sc.SetState(SCE_PL_OPERATOR);
1515                                 }
1516                                 // force to bareword for hash key => or {variable literal} cases
1517                                 if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & 2) {
1518                                         sc.ChangeState(SCE_PL_IDENTIFIER);
1519                                 }
1520                                 backFlag = BACK_NONE;
1521                         } else if (sc.ch == '(' && sc.currentPos > 0) { // '(' or subroutine prototype
1522                                 sc.Complete();
1523                                 if (styleCheckSubPrototype(styler, sc.currentPos - 1)) {
1524                                         sc.SetState(SCE_PL_SUB_PROTOTYPE);
1525                                         backFlag = BACK_NONE;
1526                                 } else {
1527                                         sc.SetState(SCE_PL_OPERATOR);
1528                                 }
1529                         } else if (setPerlOperator.Contains(sc.ch)) {   // operators
1530                                 sc.SetState(SCE_PL_OPERATOR);
1531                                 if (sc.Match('.', '.')) {       // .. and ...
1532                                         sc.Forward();
1533                                         if (sc.chNext == '.') sc.Forward();
1534                                 }
1535                         } else if (sc.ch == 4 || sc.ch == 26) {         // ^D and ^Z ends valid perl source
1536                                 sc.SetState(SCE_PL_DATASECTION);
1537                         } else {
1538                                 // keep colouring defaults
1539                                 sc.Complete();
1540                         }
1541                 }
1542         }
1543         sc.Complete();
1544         if (sc.state == SCE_PL_HERE_Q
1545                 || sc.state == SCE_PL_HERE_QQ
1546                 || sc.state == SCE_PL_HERE_QX
1547                 || sc.state == SCE_PL_FORMAT) {
1548                 styler.ChangeLexerState(sc.currentPos, styler.Length());
1549         }
1550         sc.Complete();
1551 }
1552
1553 #define PERL_HEADFOLD_SHIFT             4
1554 #define PERL_HEADFOLD_MASK              0xF0
1555
1556 void SCI_METHOD LexerPerl::Fold(unsigned int startPos, int length, int /* initStyle */, IDocument *pAccess) {
1557
1558         if (!options.fold)
1559                 return;
1560
1561         LexAccessor styler(pAccess);
1562
1563         unsigned int endPos = startPos + length;
1564         int visibleChars = 0;
1565         int lineCurrent = styler.GetLine(startPos);
1566
1567         // Backtrack to previous line in case need to fix its fold status
1568         if (startPos > 0) {
1569                 if (lineCurrent > 0) {
1570                         lineCurrent--;
1571                         startPos = styler.LineStart(lineCurrent);
1572                 }
1573         }
1574
1575         int levelPrev = SC_FOLDLEVELBASE;
1576         if (lineCurrent > 0)
1577                 levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
1578         int levelCurrent = levelPrev;
1579         char chNext = styler[startPos];
1580         char chPrev = styler.SafeGetCharAt(startPos - 1);
1581         int styleNext = styler.StyleAt(startPos);
1582         // Used at end of line to determine if the line was a package definition
1583         bool isPackageLine = false;
1584         int podHeading = 0;
1585         for (unsigned int i = startPos; i < endPos; i++) {
1586                 char ch = chNext;
1587                 chNext = styler.SafeGetCharAt(i + 1);
1588                 int style = styleNext;
1589                 styleNext = styler.StyleAt(i + 1);
1590                 int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT;
1591                 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1592                 bool atLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0;
1593                 // Comment folding
1594                 if (options.foldComment && atEOL && IsCommentLine(lineCurrent, styler)) {
1595                         if (!IsCommentLine(lineCurrent - 1, styler)
1596                                 && IsCommentLine(lineCurrent + 1, styler))
1597                                 levelCurrent++;
1598                         else if (IsCommentLine(lineCurrent - 1, styler)
1599                                 && !IsCommentLine(lineCurrent + 1, styler))
1600                                 levelCurrent--;
1601                 }
1602                 // {} [] block folding
1603                 if (style == SCE_PL_OPERATOR) {
1604                         if (ch == '{') {
1605                                 if (options.foldAtElse && levelCurrent < levelPrev)
1606                                         --levelPrev;
1607                                 levelCurrent++;
1608                         } else if (ch == '}') {
1609                                 levelCurrent--;
1610                         }
1611                         if (ch == '[') {
1612                                 if (options.foldAtElse && levelCurrent < levelPrev)
1613                                         --levelPrev;
1614                                 levelCurrent++;
1615                         } else if (ch == ']') {
1616                                 levelCurrent--;
1617                         }
1618                 }
1619                 // POD folding
1620                 if (options.foldPOD && atLineStart) {
1621                         if (style == SCE_PL_POD) {
1622                                 if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
1623                                         levelCurrent++;
1624                                 else if (styler.Match(i, "=cut"))
1625                                         levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1626                                 else if (styler.Match(i, "=head"))
1627                                         podHeading = PodHeadingLevel(i, styler);
1628                         } else if (style == SCE_PL_DATASECTION) {
1629                                 if (ch == '=' && isascii(chNext) && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
1630                                         levelCurrent++;
1631                                 else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
1632                                         levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1633                                 else if (styler.Match(i, "=head"))
1634                                         podHeading = PodHeadingLevel(i, styler);
1635                                 // if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1636                                 // reset needed as level test is vs. SC_FOLDLEVELBASE
1637                                 else if (stylePrevCh != SCE_PL_DATASECTION)
1638                                         levelCurrent = SC_FOLDLEVELBASE;
1639                         }
1640                 }
1641                 // package folding
1642                 if (options.foldPackage && atLineStart) {
1643                         if (IsPackageLine(lineCurrent, styler)
1644                                 && !IsPackageLine(lineCurrent + 1, styler))
1645                                 isPackageLine = true;
1646                 }
1647
1648                 //heredoc folding
1649                 switch (style) {
1650                 case SCE_PL_HERE_QQ :
1651                 case SCE_PL_HERE_Q :
1652                 case SCE_PL_HERE_QX :
1653                         switch (stylePrevCh) {
1654                         case SCE_PL_HERE_QQ :
1655                         case SCE_PL_HERE_Q :
1656                         case SCE_PL_HERE_QX :
1657                                 //do nothing;
1658                                 break;
1659                         default :
1660                                 levelCurrent++;
1661                                 break;
1662                         }
1663                         break;
1664                 default:
1665                         switch (stylePrevCh) {
1666                         case SCE_PL_HERE_QQ :
1667                         case SCE_PL_HERE_Q :
1668                         case SCE_PL_HERE_QX :
1669                                 levelCurrent--;
1670                                 break;
1671                         default :
1672                                 //do nothing;
1673                                 break;
1674                         }
1675                         break;
1676                 }
1677
1678                 //explicit folding
1679                 if (options.foldCommentExplicit && style == SCE_PL_COMMENTLINE && ch == '#') {
1680                         if (chNext == '{') {
1681                                 levelCurrent++;
1682                         } else if (levelCurrent > SC_FOLDLEVELBASE  && chNext == '}') {
1683                                 levelCurrent--;
1684                         }
1685                 }
1686
1687                 if (atEOL) {
1688                         int lev = levelPrev;
1689                         // POD headings occupy bits 7-4, leaving some breathing room for
1690                         // non-standard practice -- POD sections stuck in blocks, etc.
1691                         if (podHeading > 0) {
1692                                 levelCurrent = (lev & ~PERL_HEADFOLD_MASK) | (podHeading << PERL_HEADFOLD_SHIFT);
1693                                 lev = levelCurrent - 1;
1694                                 lev |= SC_FOLDLEVELHEADERFLAG;
1695                                 podHeading = 0;
1696                         }
1697                         // Check if line was a package declaration
1698                         // because packages need "special" treatment
1699                         if (isPackageLine) {
1700                                 lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
1701                                 levelCurrent = SC_FOLDLEVELBASE + 1;
1702                                 isPackageLine = false;
1703                         }
1704                         lev |= levelCurrent << 16;
1705                         if (visibleChars == 0 && options.foldCompact)
1706                                 lev |= SC_FOLDLEVELWHITEFLAG;
1707                         if ((levelCurrent > levelPrev) && (visibleChars > 0))
1708                                 lev |= SC_FOLDLEVELHEADERFLAG;
1709                         if (lev != styler.LevelAt(lineCurrent)) {
1710                                 styler.SetLevel(lineCurrent, lev);
1711                         }
1712                         lineCurrent++;
1713                         levelPrev = levelCurrent;
1714                         visibleChars = 0;
1715                 }
1716                 if (!isspacechar(ch))
1717                         visibleChars++;
1718                 chPrev = ch;
1719         }
1720         // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1721         int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1722         styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1723 }
1724
1725 LexerModule lmPerl(SCLEX_PERL, LexerPerl::LexerFactoryPerl, "perl", perlWordListDesc, 8);