]> git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/lexers/LexPerl.cxx
da9038778be2da03af442c467b0e6236ef361eb9
[wxWidgets.git] / src / stc / scintilla / lexers / LexPerl.cxx
1 // Scintilla source code edit control
2 /** @file LexPerl.cxx
3 ** Lexer for Perl.
4 ** Converted to lexer object by "Udo Lechner" <dlchnr(at)gmx(dot)net>
5 **/
6 // Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>
7 // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
8 // The License.txt file describes the conditions under which this software may be distributed.
9
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <assert.h>
15 #include <ctype.h>
16
17 #include <string>
18 #include <map>
19
20 #include "ILexer.h"
21 #include "Scintilla.h"
22 #include "SciLexer.h"
23
24 #include "WordList.h"
25 #include "LexAccessor.h"
26 #include "StyleContext.h"
27 #include "CharacterSet.h"
28 #include "LexerModule.h"
29 #include "OptionSet.h"
30
31 #ifdef SCI_NAMESPACE
32 using namespace Scintilla;
33 #endif
34
35 // Info for HERE document handling from perldata.pod (reformatted):
36 // ----------------------------------------------------------------
37 // A line-oriented form of quoting is based on the shell ``here-doc'' syntax.
38 // Following a << you specify a string to terminate the quoted material, and
39 // all lines following the current line down to the terminating string are
40 // the value of the item.
41 // * The terminating string may be either an identifier (a word), or some
42 // quoted text.
43 // * If quoted, the type of quotes you use determines the treatment of the
44 // text, just as in regular quoting.
45 // * An unquoted identifier works like double quotes.
46 // * There must be no space between the << and the identifier.
47 // (If you put a space it will be treated as a null identifier,
48 // which is valid, and matches the first empty line.)
49 // (This is deprecated, -w warns of this syntax)
50 // * The terminating string must appear by itself (unquoted and
51 // with no surrounding whitespace) on the terminating line.
52
53 #define HERE_DELIM_MAX 256 // maximum length of HERE doc delimiter
54
55 #define PERLNUM_BINARY 1 // order is significant: 1-4 cannot have a dot
56 #define PERLNUM_HEX 2
57 #define PERLNUM_OCTAL 3
58 #define PERLNUM_FLOAT_EXP 4 // exponent part only
59 #define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
60 #define PERLNUM_VECTOR 6
61 #define PERLNUM_V_VECTOR 7
62 #define PERLNUM_BAD 8
63
64 #define BACK_NONE 0 // lookback state for bareword disambiguation:
65 #define BACK_OPERATOR 1 // whitespace/comments are insignificant
66 #define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation
67
68 // all interpolated styles are different from their parent styles by a constant difference
69 // we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value
70 #define INTERPOLATE_SHIFT (SCE_PL_STRING_VAR - SCE_PL_STRING)
71
72 static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, LexAccessor &styler) {
73 // old-style keyword matcher; needed because GetCurrent() needs
74 // current segment to be committed, but we may abandon early...
75 char s[100];
76 unsigned int i, len = end - start;
77 if (len > 30) { len = 30; }
78 for (i = 0; i < len; i++, start++) s[i] = styler[start];
79 s[i] = '\0';
80 return keywords.InList(s);
81 }
82
83 static int disambiguateBareword(LexAccessor &styler, unsigned int bk, unsigned int fw,
84 int backFlag, unsigned int backPos, unsigned int endPos) {
85 // identifiers are recognized by Perl as barewords under some
86 // conditions, the following attempts to do the disambiguation
87 // by looking backward and forward; result in 2 LSB
88 int result = 0;
89 bool moreback = false; // true if passed newline/comments
90 bool brace = false; // true if opening brace found
91 // if BACK_NONE, neither operator nor keyword, so skip test
92 if (backFlag == BACK_NONE)
93 return result;
94 // first look backwards past whitespace/comments to set EOL flag
95 // (some disambiguation patterns must be on a single line)
96 if (backPos <= static_cast<unsigned int>(styler.LineStart(styler.GetLine(bk))))
97 moreback = true;
98 // look backwards at last significant lexed item for disambiguation
99 bk = backPos - 1;
100 int ch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
101 if (ch == '{' && !moreback) {
102 // {bareword: possible variable spec
103 brace = true;
104 } else if ((ch == '&' && styler.SafeGetCharAt(bk - 1) != '&')
105 // &bareword: subroutine call
106 || styler.Match(bk - 1, "->")
107 // ->bareword: part of variable spec
108 || styler.Match(bk - 2, "sub")) {
109 // sub bareword: subroutine declaration
110 // (implied BACK_KEYWORD, no keywords end in 'sub'!)
111 result |= 1;
112 }
113 // next, scan forward after word past tab/spaces only;
114 // if ch isn't one of '[{(,' we can skip the test
115 if ((ch == '{' || ch == '(' || ch == '['|| ch == ',')
116 && fw < endPos) {
117 while (ch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)),
118 IsASpaceOrTab(ch) && fw < endPos) {
119 fw++;
120 }
121 if ((ch == '}' && brace)
122 // {bareword}: variable spec
123 || styler.Match(fw, "=>")) {
124 // [{(, bareword=>: hash literal
125 result |= 2;
126 }
127 }
128 return result;
129 }
130
131 static void skipWhitespaceComment(LexAccessor &styler, unsigned int &p) {
132 // when backtracking, we need to skip whitespace and comments
133 int style;
134 while ((p > 0) && (style = styler.StyleAt(p),
135 style == SCE_PL_DEFAULT || style == SCE_PL_COMMENTLINE))
136 p--;
137 }
138
139 static int styleBeforeBracePair(LexAccessor &styler, unsigned int bk) {
140 // backtrack to find open '{' corresponding to a '}', balanced
141 // return significant style to be tested for '/' disambiguation
142 int braceCount = 1;
143 if (bk == 0)
144 return SCE_PL_DEFAULT;
145 while (--bk > 0) {
146 if (styler.StyleAt(bk) == SCE_PL_OPERATOR) {
147 int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
148 if (bkch == ';') { // early out
149 break;
150 } else if (bkch == '}') {
151 braceCount++;
152 } else if (bkch == '{') {
153 if (--braceCount == 0) break;
154 }
155 }
156 }
157 if (bk > 0 && braceCount == 0) {
158 // balanced { found, bk > 0, skip more whitespace/comments
159 bk--;
160 skipWhitespaceComment(styler, bk);
161 return styler.StyleAt(bk);
162 }
163 return SCE_PL_DEFAULT;
164 }
165
166 static int styleCheckIdentifier(LexAccessor &styler, unsigned int bk) {
167 // backtrack to classify sub-styles of identifier under test
168 // return sub-style to be tested for '/' disambiguation
169 if (styler.SafeGetCharAt(bk) == '>') // inputsymbol, like <foo>
170 return 1;
171 // backtrack to check for possible "->" or "::" before identifier
172 while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
173 bk--;
174 }
175 while (bk > 0) {
176 int bkstyle = styler.StyleAt(bk);
177 if (bkstyle == SCE_PL_DEFAULT
178 || bkstyle == SCE_PL_COMMENTLINE) {
179 // skip whitespace, comments
180 } else if (bkstyle == SCE_PL_OPERATOR) {
181 // test for "->" and "::"
182 if (styler.Match(bk - 1, "->") || styler.Match(bk - 1, "::"))
183 return 2;
184 } else
185 return 3; // bare identifier
186 bk--;
187 }
188 return 0;
189 }
190
191 static int inputsymbolScan(LexAccessor &styler, unsigned int pos, unsigned int endPos) {
192 // looks forward for matching > on same line; a bit ugly
193 unsigned int fw = pos;
194 while (++fw < endPos) {
195 int fwch = static_cast<unsigned char>(styler.SafeGetCharAt(fw));
196 if (fwch == '\r' || fwch == '\n') {
197 return 0;
198 } else if (fwch == '>') {
199 if (styler.Match(fw - 2, "<=>")) // '<=>' case
200 return 0;
201 return fw - pos;
202 }
203 }
204 return 0;
205 }
206
207 static int podLineScan(LexAccessor &styler, unsigned int &pos, unsigned int endPos) {
208 // forward scan the current line to classify line for POD style
209 int state = -1;
210 while (pos <= endPos) {
211 int ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos));
212 if (ch == '\n' || ch == '\r' || pos >= endPos) {
213 if (ch == '\r' && styler.SafeGetCharAt(pos + 1) == '\n') pos++;
214 break;
215 }
216 if (IsASpaceOrTab(ch)) { // whitespace, take note
217 if (state == -1)
218 state = SCE_PL_DEFAULT;
219 } else if (state == SCE_PL_DEFAULT) { // verbatim POD line
220 state = SCE_PL_POD_VERB;
221 } else if (state != SCE_PL_POD_VERB) { // regular POD line
222 state = SCE_PL_POD;
223 }
224 pos++;
225 }
226 if (state == -1)
227 state = SCE_PL_DEFAULT;
228 return state;
229 }
230
231 static bool styleCheckSubPrototype(LexAccessor &styler, unsigned int bk) {
232 // backtrack to identify if we're starting a subroutine prototype
233 // we also need to ignore whitespace/comments:
234 // 'sub' [whitespace|comment] <identifier> [whitespace|comment]
235 styler.Flush();
236 skipWhitespaceComment(styler, bk);
237 if (bk == 0 || styler.StyleAt(bk) != SCE_PL_IDENTIFIER) // check identifier
238 return false;
239 while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_IDENTIFIER)) {
240 bk--;
241 }
242 skipWhitespaceComment(styler, bk);
243 if (bk < 2 || styler.StyleAt(bk) != SCE_PL_WORD // check "sub" keyword
244 || !styler.Match(bk - 2, "sub")) // assume suffix is unique!
245 return false;
246 return true;
247 }
248
249 static int actualNumStyle(int numberStyle) {
250 if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {
251 return SCE_PL_STRING;
252 } else if (numberStyle == PERLNUM_BAD) {
253 return SCE_PL_ERROR;
254 }
255 return SCE_PL_NUMBER;
256 }
257
258 static int opposite(int ch) {
259 if (ch == '(') return ')';
260 if (ch == '[') return ']';
261 if (ch == '{') return '}';
262 if (ch == '<') return '>';
263 return ch;
264 }
265
266 static bool IsCommentLine(int line, LexAccessor &styler) {
267 int pos = styler.LineStart(line);
268 int eol_pos = styler.LineStart(line + 1) - 1;
269 for (int i = pos; i < eol_pos; i++) {
270 char ch = styler[i];
271 int style = styler.StyleAt(i);
272 if (ch == '#' && style == SCE_PL_COMMENTLINE)
273 return true;
274 else if (!IsASpaceOrTab(ch))
275 return false;
276 }
277 return false;
278 }
279
280 static bool IsPackageLine(int line, LexAccessor &styler) {
281 int pos = styler.LineStart(line);
282 int style = styler.StyleAt(pos);
283 if (style == SCE_PL_WORD && styler.Match(pos, "package")) {
284 return true;
285 }
286 return false;
287 }
288
289 static int PodHeadingLevel(int pos, LexAccessor &styler) {
290 int lvl = static_cast<unsigned char>(styler.SafeGetCharAt(pos + 5));
291 if (lvl >= '1' && lvl <= '4') {
292 return lvl - '0';
293 }
294 return 0;
295 }
296
297 // An individual named option for use in an OptionSet
298
299 // Options used for LexerPerl
300 struct OptionsPerl {
301 bool fold;
302 bool foldComment;
303 bool foldCompact;
304 // Custom folding of POD and packages
305 bool foldPOD; // fold.perl.pod
306 // Enable folding Pod blocks when using the Perl lexer.
307 bool foldPackage; // fold.perl.package
308 // Enable folding packages when using the Perl lexer.
309
310 bool foldCommentExplicit;
311
312 bool foldAtElse;
313
314 OptionsPerl() {
315 fold = false;
316 foldComment = false;
317 foldCompact = true;
318 foldPOD = true;
319 foldPackage = true;
320 foldCommentExplicit = true;
321 foldAtElse = false;
322 }
323 };
324
325 static const char *const perlWordListDesc[] = {
326 "Keywords",
327 0
328 };
329
330 struct OptionSetPerl : public OptionSet<OptionsPerl> {
331 OptionSetPerl() {
332 DefineProperty("fold", &OptionsPerl::fold);
333
334 DefineProperty("fold.comment", &OptionsPerl::foldComment);
335
336 DefineProperty("fold.compact", &OptionsPerl::foldCompact);
337
338 DefineProperty("fold.perl.pod", &OptionsPerl::foldPOD,
339 "Set to 0 to disable folding Pod blocks when using the Perl lexer.");
340
341 DefineProperty("fold.perl.package", &OptionsPerl::foldPackage,
342 "Set to 0 to disable folding packages when using the Perl lexer.");
343
344 DefineProperty("fold.perl.comment.explicit", &OptionsPerl::foldCommentExplicit,
345 "Set to 0 to disable explicit folding.");
346
347 DefineProperty("fold.perl.at.else", &OptionsPerl::foldAtElse,
348 "This option enables Perl folding on a \"} else {\" line of an if statement.");
349
350 DefineWordListSets(perlWordListDesc);
351 }
352 };
353
354 class LexerPerl : public ILexer {
355 CharacterSet setWordStart;
356 CharacterSet setWord;
357 CharacterSet setSpecialVar;
358 CharacterSet setControlVar;
359 WordList keywords;
360 OptionsPerl options;
361 OptionSetPerl osPerl;
362 public:
363 LexerPerl() :
364 setWordStart(CharacterSet::setAlpha, "_", 0x80, true),
365 setWord(CharacterSet::setAlphaNum, "_", 0x80, true),
366 setSpecialVar(CharacterSet::setNone, "\"$;<>&`'+,./\\%:=~!?@[]"),
367 setControlVar(CharacterSet::setNone, "ACDEFHILMNOPRSTVWX") {
368 }
369 virtual ~LexerPerl() {
370 }
371 void SCI_METHOD Release() {
372 delete this;
373 }
374 int SCI_METHOD Version() const {
375 return lvOriginal;
376 }
377 const char *SCI_METHOD PropertyNames() {
378 return osPerl.PropertyNames();
379 }
380 int SCI_METHOD PropertyType(const char *name) {
381 return osPerl.PropertyType(name);
382 }
383 const char *SCI_METHOD DescribeProperty(const char *name) {
384 return osPerl.DescribeProperty(name);
385 }
386 int SCI_METHOD PropertySet(const char *key, const char *val);
387 const char *SCI_METHOD DescribeWordListSets() {
388 return osPerl.DescribeWordListSets();
389 }
390 int SCI_METHOD WordListSet(int n, const char *wl);
391 void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
392 void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
393
394 void *SCI_METHOD PrivateCall(int, void *) {
395 return 0;
396 }
397
398 static ILexer *LexerFactoryPerl() {
399 return new LexerPerl();
400 }
401 void InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern=false);
402 };
403
404 int SCI_METHOD LexerPerl::PropertySet(const char *key, const char *val) {
405 if (osPerl.PropertySet(&options, key, val)) {
406 return 0;
407 }
408 return -1;
409 }
410
411 int SCI_METHOD LexerPerl::WordListSet(int n, const char *wl) {
412 WordList *wordListN = 0;
413 switch (n) {
414 case 0:
415 wordListN = &keywords;
416 break;
417 }
418 int firstModification = -1;
419 if (wordListN) {
420 WordList wlNew;
421 wlNew.Set(wl);
422 if (*wordListN != wlNew) {
423 wordListN->Set(wl);
424 firstModification = 0;
425 }
426 }
427 return firstModification;
428 }
429
430 void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) {
431 // interpolate a segment (with no active backslashes or delimiters within)
432 // switch in or out of an interpolation style or continue current style
433 // commit variable patterns if found, trim segment, repeat until done
434 while (maxSeg > 0) {
435 bool isVar = false;
436 int sLen = 0;
437 if ((maxSeg > 1) && (sc.ch == '$' || sc.ch == '@')) {
438 // $#[$]*word [$@][$]*word (where word or {word} is always present)
439 bool braces = false;
440 sLen = 1;
441 if (sc.ch == '$' && sc.chNext == '#') { // starts with $#
442 sLen++;
443 }
444 while ((maxSeg > sLen) && (sc.GetRelative(sLen) == '$')) // >0 $ dereference within
445 sLen++;
446 if ((maxSeg > sLen) && (sc.GetRelative(sLen) == '{')) { // { start for {word}
447 sLen++;
448 braces = true;
449 }
450 if (maxSeg > sLen) {
451 int c = sc.GetRelative(sLen);
452 if (setWordStart.Contains(c)) { // word (various)
453 sLen++;
454 isVar = true;
455 while ((maxSeg > sLen) && setWord.Contains(sc.GetRelative(sLen)))
456 sLen++;
457 } else if (braces && IsADigit(c) && (sLen == 2)) { // digit for ${digit}
458 sLen++;
459 isVar = true;
460 }
461 }
462 if (braces) {
463 if ((maxSeg > sLen) && (sc.GetRelative(sLen) == '}')) { // } end for {word}
464 sLen++;
465 } else
466 isVar = false;
467 }
468 }
469 if (!isVar && (maxSeg > 1)) { // $- or @-specific variable patterns
470 sLen = 1;
471 int c = sc.chNext;
472 if (sc.ch == '$') {
473 if (IsADigit(c)) { // $[0-9] and slurp trailing digits
474 sLen++;
475 isVar = true;
476 while ((maxSeg > sLen) && IsADigit(sc.GetRelative(sLen)))
477 sLen++;
478 } else if (setSpecialVar.Contains(c)) { // $ special variables
479 sLen++;
480 isVar = true;
481 } else if (!isPattern && ((c == '(') || (c == ')') || (c == '|'))) { // $ additional
482 sLen++;
483 isVar = true;
484 } else if (c == '^') { // $^A control-char style
485 sLen++;
486 if ((maxSeg > sLen) && setControlVar.Contains(sc.GetRelative(sLen))) {
487 sLen++;
488 isVar = true;
489 }
490 }
491 } else if (sc.ch == '@') {
492 if (!isPattern && ((c == '+') || (c == '-'))) { // @ specials non-pattern
493 sLen++;
494 isVar = true;
495 }
496 }
497 }
498 if (isVar) { // commit as interpolated variable or normal character
499 if (sc.state < SCE_PL_STRING_VAR)
500 sc.SetState(sc.state + INTERPOLATE_SHIFT);
501 sc.Forward(sLen);
502 maxSeg -= sLen;
503 } else {
504 if (sc.state >= SCE_PL_STRING_VAR)
505 sc.SetState(sc.state - INTERPOLATE_SHIFT);
506 sc.Forward();
507 maxSeg--;
508 }
509 }
510 if (sc.state >= SCE_PL_STRING_VAR)
511 sc.SetState(sc.state - INTERPOLATE_SHIFT);
512 }
513
514 void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
515 LexAccessor styler(pAccess);
516
517 // keywords that forces /PATTERN/ at all times; should track vim's behaviour
518 WordList reWords;
519 reWords.Set("elsif if split while");
520
521 // charset classes
522 CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC");
523 // lexing of "%*</" operators is non-trivial; these are missing in the set below
524 CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~");
525 CharacterSet setQDelim(CharacterSet::setNone, "qrwx");
526 CharacterSet setModifiers(CharacterSet::setAlpha);
527 CharacterSet setPreferRE(CharacterSet::setNone, "*/<%");
528 // setArray and setHash also accepts chars for special vars like $_,
529 // which are then truncated when the next char does not match setVar
530 CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'", 0x80, true);
531 CharacterSet setArray(CharacterSet::setAlpha, "#$_+-", 0x80, true);
532 CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-", 0x80, true);
533 CharacterSet &setPOD = setModifiers;
534 CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@");
535 CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_");
536 CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*+];");
537 // for format identifiers
538 CharacterSet setFormatStart(CharacterSet::setAlpha, "_=");
539 CharacterSet &setFormat = setHereDocDelim;
540
541 // Lexer for perl often has to backtrack to start of current style to determine
542 // which characters are being used as quotes, how deeply nested is the
543 // start position and what the termination string is for HERE documents.
544
545 class HereDocCls { // Class to manage HERE doc sequence
546 public:
547 int State;
548 // 0: '<<' encountered
549 // 1: collect the delimiter
550 // 2: here doc text (lines after the delimiter)
551 int Quote; // the char after '<<'
552 bool Quoted; // true if Quote in ('\'','"','`')
553 int DelimiterLength; // strlen(Delimiter)
554 char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf
555 HereDocCls() {
556 State = 0;
557 Quote = 0;
558 Quoted = false;
559 DelimiterLength = 0;
560 Delimiter = new char[HERE_DELIM_MAX];
561 Delimiter[0] = '\0';
562 }
563 void Append(int ch) {
564 Delimiter[DelimiterLength++] = static_cast<char>(ch);
565 Delimiter[DelimiterLength] = '\0';
566 }
567 ~HereDocCls() {
568 delete []Delimiter;
569 }
570 };
571 HereDocCls HereDoc; // TODO: FIFO for stacked here-docs
572
573 class QuoteCls { // Class to manage quote pairs
574 public:
575 int Rep;
576 int Count;
577 int Up, Down;
578 QuoteCls() {
579 this->New(1);
580 }
581 void New(int r = 1) {
582 Rep = r;
583 Count = 0;
584 Up = '\0';
585 Down = '\0';
586 }
587 void Open(int u) {
588 Count++;
589 Up = u;
590 Down = opposite(Up);
591 }
592 };
593 QuoteCls Quote;
594
595 // additional state for number lexing
596 int numState = PERLNUM_DECIMAL;
597 int dotCount = 0;
598
599 unsigned int endPos = startPos + length;
600
601 // Backtrack to beginning of style if required...
602 // If in a long distance lexical state, backtrack to find quote characters.
603 // Includes strings (may be multi-line), numbers (additional state), format
604 // bodies, as well as POD sections.
605 if (initStyle == SCE_PL_HERE_Q
606 || initStyle == SCE_PL_HERE_QQ
607 || initStyle == SCE_PL_HERE_QX
608 || initStyle == SCE_PL_FORMAT
609 || initStyle == SCE_PL_HERE_QQ_VAR
610 || initStyle == SCE_PL_HERE_QX_VAR
611 ) {
612 // backtrack through multiple styles to reach the delimiter start
613 int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM;
614 while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) {
615 startPos--;
616 }
617 startPos = styler.LineStart(styler.GetLine(startPos));
618 initStyle = styler.StyleAt(startPos - 1);
619 }
620 if (initStyle == SCE_PL_STRING
621 || initStyle == SCE_PL_STRING_QQ
622 || initStyle == SCE_PL_BACKTICKS
623 || initStyle == SCE_PL_STRING_QX
624 || initStyle == SCE_PL_REGEX
625 || initStyle == SCE_PL_STRING_QR
626 || initStyle == SCE_PL_REGSUBST
627 || initStyle == SCE_PL_STRING_VAR
628 || initStyle == SCE_PL_STRING_QQ_VAR
629 || initStyle == SCE_PL_BACKTICKS_VAR
630 || initStyle == SCE_PL_STRING_QX_VAR
631 || initStyle == SCE_PL_REGEX_VAR
632 || initStyle == SCE_PL_STRING_QR_VAR
633 || initStyle == SCE_PL_REGSUBST_VAR
634 ) {
635 // for interpolation, must backtrack through a mix of two different styles
636 int otherStyle = (initStyle >= SCE_PL_STRING_VAR) ?
637 initStyle - INTERPOLATE_SHIFT : initStyle + INTERPOLATE_SHIFT;
638 while (startPos > 1) {
639 int st = styler.StyleAt(startPos - 1);
640 if ((st != initStyle) && (st != otherStyle))
641 break;
642 startPos--;
643 }
644 initStyle = SCE_PL_DEFAULT;
645 } else if (initStyle == SCE_PL_STRING_Q
646 || initStyle == SCE_PL_STRING_QW
647 || initStyle == SCE_PL_XLAT
648 || initStyle == SCE_PL_CHARACTER
649 || initStyle == SCE_PL_NUMBER
650 || initStyle == SCE_PL_IDENTIFIER
651 || initStyle == SCE_PL_ERROR
652 || initStyle == SCE_PL_SUB_PROTOTYPE
653 ) {
654 while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {
655 startPos--;
656 }
657 initStyle = SCE_PL_DEFAULT;
658 } else if (initStyle == SCE_PL_POD
659 || initStyle == SCE_PL_POD_VERB
660 ) {
661 // POD backtracking finds preceeding blank lines and goes back past them
662 int ln = styler.GetLine(startPos);
663 if (ln > 0) {
664 initStyle = styler.StyleAt(styler.LineStart(--ln));
665 if (initStyle == SCE_PL_POD || initStyle == SCE_PL_POD_VERB) {
666 while (ln > 0 && styler.GetLineState(ln) == SCE_PL_DEFAULT)
667 ln--;
668 }
669 startPos = styler.LineStart(++ln);
670 initStyle = styler.StyleAt(startPos - 1);
671 } else {
672 startPos = 0;
673 initStyle = SCE_PL_DEFAULT;
674 }
675 }
676
677 // backFlag, backPos are additional state to aid identifier corner cases.
678 // Look backwards past whitespace and comments in order to detect either
679 // operator or keyword. Later updated as we go along.
680 int backFlag = BACK_NONE;
681 unsigned int backPos = startPos;
682 if (backPos > 0) {
683 backPos--;
684 skipWhitespaceComment(styler, backPos);
685 if (styler.StyleAt(backPos) == SCE_PL_OPERATOR)
686 backFlag = BACK_OPERATOR;
687 else if (styler.StyleAt(backPos) == SCE_PL_WORD)
688 backFlag = BACK_KEYWORD;
689 backPos++;
690 }
691
692 StyleContext sc(startPos, endPos - startPos, initStyle, styler, static_cast<char>(STYLE_MAX));
693
694 for (; sc.More(); sc.Forward()) {
695
696 // Determine if the current state should terminate.
697 switch (sc.state) {
698 case SCE_PL_OPERATOR:
699 sc.SetState(SCE_PL_DEFAULT);
700 backFlag = BACK_OPERATOR;
701 backPos = sc.currentPos;
702 break;
703 case SCE_PL_IDENTIFIER: // identifier, bareword, inputsymbol
704 if ((!setWord.Contains(sc.ch) && sc.ch != '\'')
705 || sc.Match('.', '.')
706 || sc.chPrev == '>') { // end of inputsymbol
707 sc.SetState(SCE_PL_DEFAULT);
708 }
709 break;
710 case SCE_PL_WORD: // keyword, plus special cases
711 if (!setWord.Contains(sc.ch)) {
712 char s[100];
713 sc.GetCurrent(s, sizeof(s));
714 if ((strcmp(s, "__DATA__") == 0) || (strcmp(s, "__END__") == 0)) {
715 sc.ChangeState(SCE_PL_DATASECTION);
716 } else {
717 if ((strcmp(s, "format") == 0)) {
718 sc.SetState(SCE_PL_FORMAT_IDENT);
719 HereDoc.State = 0;
720 } else {
721 sc.SetState(SCE_PL_DEFAULT);
722 }
723 backFlag = BACK_KEYWORD;
724 backPos = sc.currentPos;
725 }
726 }
727 break;
728 case SCE_PL_SCALAR:
729 case SCE_PL_ARRAY:
730 case SCE_PL_HASH:
731 case SCE_PL_SYMBOLTABLE:
732 if (sc.Match(':', ':')) { // skip ::
733 sc.Forward();
734 } else if (!setVar.Contains(sc.ch)) {
735 if (sc.LengthCurrent() == 1) {
736 // Special variable: $(, $_ etc.
737 sc.Forward();
738 }
739 sc.SetState(SCE_PL_DEFAULT);
740 }
741 break;
742 case SCE_PL_NUMBER:
743 // if no early break, number style is terminated at "(go through)"
744 if (sc.ch == '.') {
745 if (sc.chNext == '.') {
746 // double dot is always an operator (go through)
747 } else if (numState <= PERLNUM_FLOAT_EXP) {
748 // non-decimal number or float exponent, consume next dot
749 sc.SetState(SCE_PL_OPERATOR);
750 break;
751 } else { // decimal or vectors allows dots
752 dotCount++;
753 if (numState == PERLNUM_DECIMAL) {
754 if (dotCount <= 1) // number with one dot in it
755 break;
756 if (IsADigit(sc.chNext)) { // really a vector
757 numState = PERLNUM_VECTOR;
758 break;
759 }
760 // number then dot (go through)
761 } else if (IsADigit(sc.chNext)) // vectors
762 break;
763 // vector then dot (go through)
764 }
765 } else if (sc.ch == '_') {
766 // permissive underscoring for number and vector literals
767 break;
768 } else if (numState == PERLNUM_DECIMAL) {
769 if (sc.ch == 'E' || sc.ch == 'e') { // exponent, sign
770 numState = PERLNUM_FLOAT_EXP;
771 if (sc.chNext == '+' || sc.chNext == '-') {
772 sc.Forward();
773 }
774 break;
775 } else if (IsADigit(sc.ch))
776 break;
777 // number then word (go through)
778 } else if (numState == PERLNUM_HEX) {
779 if (IsADigit(sc.ch, 16))
780 break;
781 } else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
782 if (IsADigit(sc.ch)) // vector
783 break;
784 if (setWord.Contains(sc.ch) && dotCount == 0) { // change to word
785 sc.ChangeState(SCE_PL_IDENTIFIER);
786 break;
787 }
788 // vector then word (go through)
789 } else if (IsADigit(sc.ch)) {
790 if (numState == PERLNUM_FLOAT_EXP) {
791 break;
792 } else if (numState == PERLNUM_OCTAL) {
793 if (sc.ch <= '7') break;
794 } else if (numState == PERLNUM_BINARY) {
795 if (sc.ch <= '1') break;
796 }
797 // mark invalid octal, binary numbers (go through)
798 numState = PERLNUM_BAD;
799 break;
800 }
801 // complete current number or vector
802 sc.ChangeState(actualNumStyle(numState));
803 sc.SetState(SCE_PL_DEFAULT);
804 break;
805 case SCE_PL_COMMENTLINE:
806 if (sc.atLineEnd) {
807 sc.SetState(SCE_PL_DEFAULT);
808 }
809 break;
810 case SCE_PL_HERE_DELIM:
811 if (HereDoc.State == 0) { // '<<' encountered
812 int delim_ch = sc.chNext;
813 int ws_skip = 0;
814 HereDoc.State = 1; // pre-init HERE doc class
815 HereDoc.Quote = sc.chNext;
816 HereDoc.Quoted = false;
817 HereDoc.DelimiterLength = 0;
818 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
819 if (IsASpaceOrTab(delim_ch)) {
820 // skip whitespace; legal only for quoted delimiters
821 unsigned int i = sc.currentPos + 1;
822 while ((i < endPos) && IsASpaceOrTab(delim_ch)) {
823 i++;
824 delim_ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
825 }
826 ws_skip = i - sc.currentPos - 1;
827 }
828 if (delim_ch == '\'' || delim_ch == '"' || delim_ch == '`') {
829 // a quoted here-doc delimiter; skip any whitespace
830 sc.Forward(ws_skip + 1);
831 HereDoc.Quote = delim_ch;
832 HereDoc.Quoted = true;
833 } else if ((ws_skip == 0 && setNonHereDoc.Contains(sc.chNext))
834 || ws_skip > 0) {
835 // left shift << or <<= operator cases
836 // restore position if operator
837 sc.ChangeState(SCE_PL_OPERATOR);
838 sc.ForwardSetState(SCE_PL_DEFAULT);
839 backFlag = BACK_OPERATOR;
840 backPos = sc.currentPos;
841 HereDoc.State = 0;
842 } else {
843 // specially handle initial '\' for identifier
844 if (ws_skip == 0 && HereDoc.Quote == '\\')
845 sc.Forward();
846 // an unquoted here-doc delimiter, no special handling
847 // (cannot be prefixed by spaces/tabs), or
848 // symbols terminates; deprecated zero-length delimiter
849 }
850 } else if (HereDoc.State == 1) { // collect the delimiter
851 backFlag = BACK_NONE;
852 if (HereDoc.Quoted) { // a quoted here-doc delimiter
853 if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
854 sc.ForwardSetState(SCE_PL_DEFAULT);
855 } else if (!sc.atLineEnd) {
856 if (sc.Match('\\', static_cast<char>(HereDoc.Quote))) { // escaped quote
857 sc.Forward();
858 }
859 if (sc.ch != '\r') { // skip CR if CRLF
860 HereDoc.Append(sc.ch);
861 }
862 }
863 } else { // an unquoted here-doc delimiter
864 if (setHereDocDelim.Contains(sc.ch)) {
865 HereDoc.Append(sc.ch);
866 } else {
867 sc.SetState(SCE_PL_DEFAULT);
868 }
869 }
870 if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
871 sc.SetState(SCE_PL_ERROR);
872 HereDoc.State = 0;
873 }
874 }
875 break;
876 case SCE_PL_HERE_Q:
877 case SCE_PL_HERE_QQ:
878 case SCE_PL_HERE_QX:
879 // also implies HereDoc.State == 2
880 sc.Complete();
881 if (HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter)) {
882 int c = sc.GetRelative(HereDoc.DelimiterLength);
883 if (c == '\r' || c == '\n') { // peek first, do not consume match
884 sc.Forward(HereDoc.DelimiterLength);
885 sc.SetState(SCE_PL_DEFAULT);
886 backFlag = BACK_NONE;
887 HereDoc.State = 0;
888 if (!sc.atLineEnd)
889 sc.Forward();
890 break;
891 }
892 }
893 if (sc.state == SCE_PL_HERE_Q) { // \EOF and 'EOF' non-interpolated
894 while (!sc.atLineEnd)
895 sc.Forward();
896 break;
897 }
898 while (!sc.atLineEnd) { // "EOF" and `EOF` interpolated
899 int s = 0, endType = 0;
900 int maxSeg = endPos - sc.currentPos;
901 while (s < maxSeg) { // scan to break string into segments
902 int c = sc.GetRelative(s);
903 if (c == '\\') {
904 endType = 1; break;
905 } else if (c == '\r' || c == '\n') {
906 endType = 2; break;
907 }
908 s++;
909 }
910 if (s > 0) // process non-empty segments
911 InterpolateSegment(sc, s);
912 if (endType == 1) {
913 sc.Forward();
914 // \ at end-of-line does not appear to have any effect, skip
915 if (sc.ch != '\r' && sc.ch != '\n')
916 sc.Forward();
917 } else if (endType == 2) {
918 if (!sc.atLineEnd)
919 sc.Forward();
920 }
921 }
922 break;
923 case SCE_PL_POD:
924 case SCE_PL_POD_VERB: {
925 unsigned int fw = sc.currentPos;
926 int ln = styler.GetLine(fw);
927 if (sc.atLineStart && sc.Match("=cut")) { // end of POD
928 sc.SetState(SCE_PL_POD);
929 sc.Forward(4);
930 sc.SetState(SCE_PL_DEFAULT);
931 styler.SetLineState(ln, SCE_PL_POD);
932 break;
933 }
934 int pod = podLineScan(styler, fw, endPos); // classify POD line
935 styler.SetLineState(ln, pod);
936 if (pod == SCE_PL_DEFAULT) {
937 if (sc.state == SCE_PL_POD_VERB) {
938 unsigned int fw2 = fw;
939 while (fw2 <= endPos && pod == SCE_PL_DEFAULT) {
940 fw = fw2++; // penultimate line (last blank line)
941 pod = podLineScan(styler, fw2, endPos);
942 styler.SetLineState(styler.GetLine(fw2), pod);
943 }
944 if (pod == SCE_PL_POD) { // truncate verbatim POD early
945 sc.SetState(SCE_PL_POD);
946 } else
947 fw = fw2;
948 }
949 } else {
950 if (pod == SCE_PL_POD_VERB // still part of current paragraph
951 && (styler.GetLineState(ln - 1) == SCE_PL_POD)) {
952 pod = SCE_PL_POD;
953 styler.SetLineState(ln, pod);
954 } else if (pod == SCE_PL_POD
955 && (styler.GetLineState(ln - 1) == SCE_PL_POD_VERB)) {
956 pod = SCE_PL_POD_VERB;
957 styler.SetLineState(ln, pod);
958 }
959 sc.SetState(pod);
960 }
961 sc.Forward(fw - sc.currentPos); // commit style
962 }
963 break;
964 case SCE_PL_REGEX:
965 case SCE_PL_STRING_QR:
966 if (Quote.Rep <= 0) {
967 if (!setModifiers.Contains(sc.ch))
968 sc.SetState(SCE_PL_DEFAULT);
969 } else if (!Quote.Up && !IsASpace(sc.ch)) {
970 Quote.Open(sc.ch);
971 } else {
972 int s = 0, endType = 0;
973 int maxSeg = endPos - sc.currentPos;
974 while (s < maxSeg) { // scan to break string into segments
975 int c = sc.GetRelative(s);
976 if (IsASpace(c)) {
977 break;
978 } else if (c == '\\' && Quote.Up != '\\') {
979 endType = 1; break;
980 } else if (c == Quote.Down) {
981 Quote.Count--;
982 if (Quote.Count == 0) {
983 Quote.Rep--;
984 break;
985 }
986 } else if (c == Quote.Up)
987 Quote.Count++;
988 s++;
989 }
990 if (s > 0) { // process non-empty segments
991 if (Quote.Up != '\'') {
992 InterpolateSegment(sc, s, true);
993 } else // non-interpolated path
994 sc.Forward(s);
995 }
996 if (endType == 1)
997 sc.Forward();
998 }
999 break;
1000 case SCE_PL_REGSUBST:
1001 case SCE_PL_XLAT:
1002 if (Quote.Rep <= 0) {
1003 if (!setModifiers.Contains(sc.ch))
1004 sc.SetState(SCE_PL_DEFAULT);
1005 } else if (!Quote.Up && !IsASpace(sc.ch)) {
1006 Quote.Open(sc.ch);
1007 } else {
1008 int s = 0, endType = 0;
1009 int maxSeg = endPos - sc.currentPos;
1010 bool isPattern = (Quote.Rep == 2);
1011 while (s < maxSeg) { // scan to break string into segments
1012 int c = sc.GetRelative(s);
1013 if (c == '\\' && Quote.Up != '\\') {
1014 endType = 2; break;
1015 } else if (Quote.Count == 0 && Quote.Rep == 1) {
1016 // We matched something like s(...) or tr{...}, Perl 5.10
1017 // appears to allow almost any character for use as the
1018 // next delimiters. Whitespace and comments are accepted in
1019 // between, but we'll limit to whitespace here.
1020 // For '#', if no whitespace in between, it's a delimiter.
1021 if (IsASpace(c)) {
1022 // Keep going
1023 } else if (c == '#' && IsASpaceOrTab(sc.GetRelative(s - 1))) {
1024 endType = 3;
1025 } else
1026 Quote.Open(c);
1027 break;
1028 } else if (c == Quote.Down) {
1029 Quote.Count--;
1030 if (Quote.Count == 0) {
1031 Quote.Rep--;
1032 endType = 1;
1033 }
1034 if (Quote.Up == Quote.Down)
1035 Quote.Count++;
1036 if (endType == 1)
1037 break;
1038 } else if (c == Quote.Up) {
1039 Quote.Count++;
1040 } else if (IsASpace(c))
1041 break;
1042 s++;
1043 }
1044 if (s > 0) { // process non-empty segments
1045 if (sc.state == SCE_PL_REGSUBST && Quote.Up != '\'') {
1046 InterpolateSegment(sc, s, isPattern);
1047 } else // non-interpolated path
1048 sc.Forward(s);
1049 }
1050 if (endType == 2) {
1051 sc.Forward();
1052 } else if (endType == 3)
1053 sc.SetState(SCE_PL_DEFAULT);
1054 }
1055 break;
1056 case SCE_PL_STRING_Q:
1057 case SCE_PL_STRING_QQ:
1058 case SCE_PL_STRING_QX:
1059 case SCE_PL_STRING_QW:
1060 case SCE_PL_STRING:
1061 case SCE_PL_CHARACTER:
1062 case SCE_PL_BACKTICKS:
1063 if (!Quote.Down && !IsASpace(sc.ch)) {
1064 Quote.Open(sc.ch);
1065 } else {
1066 int s = 0, endType = 0;
1067 int maxSeg = endPos - sc.currentPos;
1068 while (s < maxSeg) { // scan to break string into segments
1069 int c = sc.GetRelative(s);
1070 if (IsASpace(c)) {
1071 break;
1072 } else if (c == '\\' && Quote.Up != '\\') {
1073 endType = 2; break;
1074 } else if (c == Quote.Down) {
1075 Quote.Count--;
1076 if (Quote.Count == 0) {
1077 endType = 3; break;
1078 }
1079 } else if (c == Quote.Up)
1080 Quote.Count++;
1081 s++;
1082 }
1083 if (s > 0) { // process non-empty segments
1084 switch (sc.state) {
1085 case SCE_PL_STRING:
1086 case SCE_PL_STRING_QQ:
1087 case SCE_PL_BACKTICKS:
1088 InterpolateSegment(sc, s);
1089 break;
1090 case SCE_PL_STRING_QX:
1091 if (Quote.Up != '\'') {
1092 InterpolateSegment(sc, s);
1093 break;
1094 }
1095 // (continued for ' delim)
1096 default: // non-interpolated path
1097 sc.Forward(s);
1098 }
1099 }
1100 if (endType == 2) {
1101 sc.Forward();
1102 } else if (endType == 3)
1103 sc.ForwardSetState(SCE_PL_DEFAULT);
1104 }
1105 break;
1106 case SCE_PL_SUB_PROTOTYPE: {
1107 int i = 0;
1108 // forward scan; must all be valid proto characters
1109 while (setSubPrototype.Contains(sc.GetRelative(i)))
1110 i++;
1111 if (sc.GetRelative(i) == ')') { // valid sub prototype
1112 sc.Forward(i);
1113 sc.ForwardSetState(SCE_PL_DEFAULT);
1114 } else {
1115 // abandon prototype, restart from '('
1116 sc.ChangeState(SCE_PL_OPERATOR);
1117 sc.SetState(SCE_PL_DEFAULT);
1118 }
1119 }
1120 break;
1121 case SCE_PL_FORMAT: {
1122 sc.Complete();
1123 if (sc.Match('.')) {
1124 sc.Forward();
1125 if (sc.atLineEnd || ((sc.ch == '\r' && sc.chNext == '\n')))
1126 sc.SetState(SCE_PL_DEFAULT);
1127 }
1128 while (!sc.atLineEnd)
1129 sc.Forward();
1130 }
1131 break;
1132 case SCE_PL_ERROR:
1133 break;
1134 }
1135 // Needed for specific continuation styles (one follows the other)
1136 switch (sc.state) {
1137 // continued from SCE_PL_WORD
1138 case SCE_PL_FORMAT_IDENT:
1139 // occupies HereDoc state 3 to avoid clashing with HERE docs
1140 if (IsASpaceOrTab(sc.ch)) { // skip whitespace
1141 sc.ChangeState(SCE_PL_DEFAULT);
1142 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1143 sc.Forward();
1144 sc.SetState(SCE_PL_FORMAT_IDENT);
1145 }
1146 if (setFormatStart.Contains(sc.ch)) { // identifier or '='
1147 if (sc.ch != '=') {
1148 do {
1149 sc.Forward();
1150 } while (setFormat.Contains(sc.ch));
1151 }
1152 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1153 sc.Forward();
1154 if (sc.ch == '=') {
1155 sc.ForwardSetState(SCE_PL_DEFAULT);
1156 HereDoc.State = 3;
1157 } else {
1158 // invalid indentifier; inexact fallback, but hey
1159 sc.ChangeState(SCE_PL_IDENTIFIER);
1160 sc.SetState(SCE_PL_DEFAULT);
1161 }
1162 } else {
1163 sc.ChangeState(SCE_PL_DEFAULT); // invalid indentifier
1164 }
1165 backFlag = BACK_NONE;
1166 break;
1167 }
1168
1169 // Must check end of HereDoc states here before default state is handled
1170 if (HereDoc.State == 1 && sc.atLineEnd) {
1171 // Begin of here-doc (the line after the here-doc delimiter):
1172 // Lexically, the here-doc starts from the next line after the >>, but the
1173 // first line of here-doc seem to follow the style of the last EOL sequence
1174 int st_new = SCE_PL_HERE_QQ;
1175 HereDoc.State = 2;
1176 if (HereDoc.Quoted) {
1177 if (sc.state == SCE_PL_HERE_DELIM) {
1178 // Missing quote at end of string! We are stricter than perl.
1179 // Colour here-doc anyway while marking this bit as an error.
1180 sc.ChangeState(SCE_PL_ERROR);
1181 }
1182 switch (HereDoc.Quote) {
1183 case '\'':
1184 st_new = SCE_PL_HERE_Q ;
1185 break;
1186 case '"' :
1187 st_new = SCE_PL_HERE_QQ;
1188 break;
1189 case '`' :
1190 st_new = SCE_PL_HERE_QX;
1191 break;
1192 }
1193 } else {
1194 if (HereDoc.Quote == '\\')
1195 st_new = SCE_PL_HERE_Q;
1196 }
1197 sc.SetState(st_new);
1198 }
1199 if (HereDoc.State == 3 && sc.atLineEnd) {
1200 // Start of format body.
1201 HereDoc.State = 0;
1202 sc.SetState(SCE_PL_FORMAT);
1203 }
1204
1205 // Determine if a new state should be entered.
1206 if (sc.state == SCE_PL_DEFAULT) {
1207 if (IsADigit(sc.ch) ||
1208 (IsADigit(sc.chNext) && (sc.ch == '.' || sc.ch == 'v'))) {
1209 sc.SetState(SCE_PL_NUMBER);
1210 backFlag = BACK_NONE;
1211 numState = PERLNUM_DECIMAL;
1212 dotCount = 0;
1213 if (sc.ch == '0') { // hex,bin,octal
1214 if (sc.chNext == 'x' || sc.chNext == 'X') {
1215 numState = PERLNUM_HEX;
1216 } else if (sc.chNext == 'b' || sc.chNext == 'B') {
1217 numState = PERLNUM_BINARY;
1218 } else if (IsADigit(sc.chNext)) {
1219 numState = PERLNUM_OCTAL;
1220 }
1221 if (numState != PERLNUM_DECIMAL) {
1222 sc.Forward();
1223 }
1224 } else if (sc.ch == 'v') { // vector
1225 numState = PERLNUM_V_VECTOR;
1226 }
1227 } else if (setWord.Contains(sc.ch)) {
1228 // if immediately prefixed by '::', always a bareword
1229 sc.SetState(SCE_PL_WORD);
1230 if (sc.chPrev == ':' && sc.GetRelative(-2) == ':') {
1231 sc.ChangeState(SCE_PL_IDENTIFIER);
1232 }
1233 unsigned int bk = sc.currentPos;
1234 unsigned int fw = sc.currentPos + 1;
1235 // first check for possible quote-like delimiter
1236 if (sc.ch == 's' && !setWord.Contains(sc.chNext)) {
1237 sc.ChangeState(SCE_PL_REGSUBST);
1238 Quote.New(2);
1239 } else if (sc.ch == 'm' && !setWord.Contains(sc.chNext)) {
1240 sc.ChangeState(SCE_PL_REGEX);
1241 Quote.New();
1242 } else if (sc.ch == 'q' && !setWord.Contains(sc.chNext)) {
1243 sc.ChangeState(SCE_PL_STRING_Q);
1244 Quote.New();
1245 } else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) {
1246 sc.ChangeState(SCE_PL_XLAT);
1247 Quote.New(2);
1248 } else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) {
1249 sc.ChangeState(SCE_PL_XLAT);
1250 Quote.New(2);
1251 sc.Forward();
1252 fw++;
1253 } else if (sc.ch == 'q' && setQDelim.Contains(sc.chNext)
1254 && !setWord.Contains(sc.GetRelative(2))) {
1255 if (sc.chNext == 'q') sc.ChangeState(SCE_PL_STRING_QQ);
1256 else if (sc.chNext == 'x') sc.ChangeState(SCE_PL_STRING_QX);
1257 else if (sc.chNext == 'r') sc.ChangeState(SCE_PL_STRING_QR);
1258 else sc.ChangeState(SCE_PL_STRING_QW); // sc.chNext == 'w'
1259 Quote.New();
1260 sc.Forward();
1261 fw++;
1262 } else if (sc.ch == 'x' && (sc.chNext == '=' || // repetition
1263 !setWord.Contains(sc.chNext) ||
1264 (IsADigit(sc.chPrev) && IsADigit(sc.chNext)))) {
1265 sc.ChangeState(SCE_PL_OPERATOR);
1266 }
1267 // if potentially a keyword, scan forward and grab word, then check
1268 // if it's really one; if yes, disambiguation test is performed
1269 // otherwise it is always a bareword and we skip a lot of scanning
1270 if (sc.state == SCE_PL_WORD) {
1271 while (setWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(fw))))
1272 fw++;
1273 if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) {
1274 sc.ChangeState(SCE_PL_IDENTIFIER);
1275 }
1276 }
1277 // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
1278 // for quote-like delimiters/keywords, attempt to disambiguate
1279 // to select for bareword, change state -> SCE_PL_IDENTIFIER
1280 if (sc.state != SCE_PL_IDENTIFIER && bk > 0) {
1281 if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos))
1282 sc.ChangeState(SCE_PL_IDENTIFIER);
1283 }
1284 backFlag = BACK_NONE;
1285 } else if (sc.ch == '#') {
1286 sc.SetState(SCE_PL_COMMENTLINE);
1287 } else if (sc.ch == '\"') {
1288 sc.SetState(SCE_PL_STRING);
1289 Quote.New();
1290 Quote.Open(sc.ch);
1291 backFlag = BACK_NONE;
1292 } else if (sc.ch == '\'') {
1293 if (sc.chPrev == '&' && setWordStart.Contains(sc.chNext)) {
1294 // Archaic call
1295 sc.SetState(SCE_PL_IDENTIFIER);
1296 } else {
1297 sc.SetState(SCE_PL_CHARACTER);
1298 Quote.New();
1299 Quote.Open(sc.ch);
1300 }
1301 backFlag = BACK_NONE;
1302 } else if (sc.ch == '`') {
1303 sc.SetState(SCE_PL_BACKTICKS);
1304 Quote.New();
1305 Quote.Open(sc.ch);
1306 backFlag = BACK_NONE;
1307 } else if (sc.ch == '$') {
1308 sc.SetState(SCE_PL_SCALAR);
1309 if (sc.chNext == '{') {
1310 sc.ForwardSetState(SCE_PL_OPERATOR);
1311 } else if (IsASpace(sc.chNext)) {
1312 sc.ForwardSetState(SCE_PL_DEFAULT);
1313 } else {
1314 sc.Forward();
1315 if (sc.Match('`', '`') || sc.Match(':', ':')) {
1316 sc.Forward();
1317 }
1318 }
1319 backFlag = BACK_NONE;
1320 } else if (sc.ch == '@') {
1321 sc.SetState(SCE_PL_ARRAY);
1322 if (setArray.Contains(sc.chNext)) {
1323 // no special treatment
1324 } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1325 sc.Forward(2);
1326 } else if (sc.chNext == '{' || sc.chNext == '[') {
1327 sc.ForwardSetState(SCE_PL_OPERATOR);
1328 } else {
1329 sc.ChangeState(SCE_PL_OPERATOR);
1330 }
1331 backFlag = BACK_NONE;
1332 } else if (setPreferRE.Contains(sc.ch)) {
1333 // Explicit backward peeking to set a consistent preferRE for
1334 // any slash found, so no longer need to track preferRE state.
1335 // Find first previous significant lexed element and interpret.
1336 // A few symbols shares this code for disambiguation.
1337 bool preferRE = false;
1338 bool isHereDoc = sc.Match('<', '<');
1339 bool hereDocSpace = false; // for: SCALAR [whitespace] '<<'
1340 unsigned int bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0;
1341 sc.Complete();
1342 styler.Flush();
1343 if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
1344 hereDocSpace = true;
1345 skipWhitespaceComment(styler, bk);
1346 if (bk == 0) {
1347 // avoid backward scanning breakage
1348 preferRE = true;
1349 } else {
1350 int bkstyle = styler.StyleAt(bk);
1351 int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
1352 switch (bkstyle) {
1353 case SCE_PL_OPERATOR:
1354 preferRE = true;
1355 if (bkch == ')' || bkch == ']') {
1356 preferRE = false;
1357 } else if (bkch == '}') {
1358 // backtrack by counting balanced brace pairs
1359 // needed to test for variables like ${}, @{} etc.
1360 bkstyle = styleBeforeBracePair(styler, bk);
1361 if (bkstyle == SCE_PL_SCALAR
1362 || bkstyle == SCE_PL_ARRAY
1363 || bkstyle == SCE_PL_HASH
1364 || bkstyle == SCE_PL_SYMBOLTABLE
1365 || bkstyle == SCE_PL_OPERATOR) {
1366 preferRE = false;
1367 }
1368 } else if (bkch == '+' || bkch == '-') {
1369 if (bkch == static_cast<unsigned char>(styler.SafeGetCharAt(bk - 1))
1370 && bkch != static_cast<unsigned char>(styler.SafeGetCharAt(bk - 2)))
1371 // exceptions for operators: unary suffixes ++, --
1372 preferRE = false;
1373 }
1374 break;
1375 case SCE_PL_IDENTIFIER:
1376 preferRE = true;
1377 bkstyle = styleCheckIdentifier(styler, bk);
1378 if ((bkstyle == 1) || (bkstyle == 2)) {
1379 // inputsymbol or var with "->" or "::" before identifier
1380 preferRE = false;
1381 } else if (bkstyle == 3) {
1382 // bare identifier, test cases follows:
1383 if (sc.ch == '/') {
1384 // if '/', /PATTERN/ unless digit/space immediately after '/'
1385 // if '//', always expect defined-or operator to follow identifier
1386 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1387 preferRE = false;
1388 } else if (sc.ch == '*' || sc.ch == '%') {
1389 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1390 preferRE = false;
1391 } else if (sc.ch == '<') {
1392 if (IsASpace(sc.chNext) || sc.chNext == '=')
1393 preferRE = false;
1394 }
1395 }
1396 break;
1397 case SCE_PL_SCALAR: // for $var<< case:
1398 if (isHereDoc && hereDocSpace) // if SCALAR whitespace '<<', *always* a HERE doc
1399 preferRE = true;
1400 break;
1401 case SCE_PL_WORD:
1402 preferRE = true;
1403 // for HERE docs, always true
1404 if (sc.ch == '/') {
1405 // adopt heuristics similar to vim-style rules:
1406 // keywords always forced as /PATTERN/: split, if, elsif, while
1407 // everything else /PATTERN/ unless digit/space immediately after '/'
1408 // for '//', defined-or favoured unless special keywords
1409 unsigned int bkend = bk + 1;
1410 while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) {
1411 bk--;
1412 }
1413 if (isPerlKeyword(bk, bkend, reWords, styler))
1414 break;
1415 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1416 preferRE = false;
1417 } else if (sc.ch == '*' || sc.ch == '%') {
1418 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1419 preferRE = false;
1420 } else if (sc.ch == '<') {
1421 if (IsASpace(sc.chNext) || sc.chNext == '=')
1422 preferRE = false;
1423 }
1424 break;
1425
1426 // other styles uses the default, preferRE=false
1427 case SCE_PL_POD:
1428 case SCE_PL_HERE_Q:
1429 case SCE_PL_HERE_QQ:
1430 case SCE_PL_HERE_QX:
1431 preferRE = true;
1432 break;
1433 }
1434 }
1435 backFlag = BACK_NONE;
1436 if (isHereDoc) { // handle '<<', HERE doc
1437 if (preferRE) {
1438 sc.SetState(SCE_PL_HERE_DELIM);
1439 HereDoc.State = 0;
1440 } else { // << operator
1441 sc.SetState(SCE_PL_OPERATOR);
1442 sc.Forward();
1443 }
1444 } else if (sc.ch == '*') { // handle '*', typeglob
1445 if (preferRE) {
1446 sc.SetState(SCE_PL_SYMBOLTABLE);
1447 if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1448 sc.Forward(2);
1449 } else if (sc.chNext == '{') {
1450 sc.ForwardSetState(SCE_PL_OPERATOR);
1451 } else {
1452 sc.Forward();
1453 }
1454 } else {
1455 sc.SetState(SCE_PL_OPERATOR);
1456 if (sc.chNext == '*') // exponentiation
1457 sc.Forward();
1458 }
1459 } else if (sc.ch == '%') { // handle '%', hash
1460 if (preferRE) {
1461 sc.SetState(SCE_PL_HASH);
1462 if (setHash.Contains(sc.chNext)) {
1463 sc.Forward();
1464 } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1465 sc.Forward(2);
1466 } else if (sc.chNext == '{') {
1467 sc.ForwardSetState(SCE_PL_OPERATOR);
1468 } else {
1469 sc.ChangeState(SCE_PL_OPERATOR);
1470 }
1471 } else {
1472 sc.SetState(SCE_PL_OPERATOR);
1473 }
1474 } else if (sc.ch == '<') { // handle '<', inputsymbol
1475 if (preferRE) {
1476 // forward scan
1477 int i = inputsymbolScan(styler, sc.currentPos, endPos);
1478 if (i > 0) {
1479 sc.SetState(SCE_PL_IDENTIFIER);
1480 sc.Forward(i);
1481 } else {
1482 sc.SetState(SCE_PL_OPERATOR);
1483 }
1484 } else {
1485 sc.SetState(SCE_PL_OPERATOR);
1486 }
1487 } else { // handle '/', regexp
1488 if (preferRE) {
1489 sc.SetState(SCE_PL_REGEX);
1490 Quote.New();
1491 Quote.Open(sc.ch);
1492 } else { // / and // operators
1493 sc.SetState(SCE_PL_OPERATOR);
1494 if (sc.chNext == '/') {
1495 sc.Forward();
1496 }
1497 }
1498 }
1499 } else if (sc.ch == '=' // POD
1500 && setPOD.Contains(sc.chNext)
1501 && sc.atLineStart) {
1502 sc.SetState(SCE_PL_POD);
1503 backFlag = BACK_NONE;
1504 } else if (sc.ch == '-' && setWordStart.Contains(sc.chNext)) { // extended '-' cases
1505 unsigned int bk = sc.currentPos;
1506 unsigned int fw = 2;
1507 if (setSingleCharOp.Contains(sc.chNext) && // file test operators
1508 !setWord.Contains(sc.GetRelative(2))) {
1509 sc.SetState(SCE_PL_WORD);
1510 } else {
1511 // nominally a minus and bareword; find extent of bareword
1512 while (setWord.Contains(sc.GetRelative(fw)))
1513 fw++;
1514 sc.SetState(SCE_PL_OPERATOR);
1515 }
1516 // force to bareword for hash key => or {variable literal} cases
1517 if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & 2) {
1518 sc.ChangeState(SCE_PL_IDENTIFIER);
1519 }
1520 backFlag = BACK_NONE;
1521 } else if (sc.ch == '(' && sc.currentPos > 0) { // '(' or subroutine prototype
1522 sc.Complete();
1523 if (styleCheckSubPrototype(styler, sc.currentPos - 1)) {
1524 sc.SetState(SCE_PL_SUB_PROTOTYPE);
1525 backFlag = BACK_NONE;
1526 } else {
1527 sc.SetState(SCE_PL_OPERATOR);
1528 }
1529 } else if (setPerlOperator.Contains(sc.ch)) { // operators
1530 sc.SetState(SCE_PL_OPERATOR);
1531 if (sc.Match('.', '.')) { // .. and ...
1532 sc.Forward();
1533 if (sc.chNext == '.') sc.Forward();
1534 }
1535 } else if (sc.ch == 4 || sc.ch == 26) { // ^D and ^Z ends valid perl source
1536 sc.SetState(SCE_PL_DATASECTION);
1537 } else {
1538 // keep colouring defaults
1539 sc.Complete();
1540 }
1541 }
1542 }
1543 sc.Complete();
1544 if (sc.state == SCE_PL_HERE_Q
1545 || sc.state == SCE_PL_HERE_QQ
1546 || sc.state == SCE_PL_HERE_QX
1547 || sc.state == SCE_PL_FORMAT) {
1548 styler.ChangeLexerState(sc.currentPos, styler.Length());
1549 }
1550 sc.Complete();
1551 }
1552
1553 #define PERL_HEADFOLD_SHIFT 4
1554 #define PERL_HEADFOLD_MASK 0xF0
1555
1556 void SCI_METHOD LexerPerl::Fold(unsigned int startPos, int length, int /* initStyle */, IDocument *pAccess) {
1557
1558 if (!options.fold)
1559 return;
1560
1561 LexAccessor styler(pAccess);
1562
1563 unsigned int endPos = startPos + length;
1564 int visibleChars = 0;
1565 int lineCurrent = styler.GetLine(startPos);
1566
1567 // Backtrack to previous line in case need to fix its fold status
1568 if (startPos > 0) {
1569 if (lineCurrent > 0) {
1570 lineCurrent--;
1571 startPos = styler.LineStart(lineCurrent);
1572 }
1573 }
1574
1575 int levelPrev = SC_FOLDLEVELBASE;
1576 if (lineCurrent > 0)
1577 levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
1578 int levelCurrent = levelPrev;
1579 char chNext = styler[startPos];
1580 char chPrev = styler.SafeGetCharAt(startPos - 1);
1581 int styleNext = styler.StyleAt(startPos);
1582 // Used at end of line to determine if the line was a package definition
1583 bool isPackageLine = false;
1584 int podHeading = 0;
1585 for (unsigned int i = startPos; i < endPos; i++) {
1586 char ch = chNext;
1587 chNext = styler.SafeGetCharAt(i + 1);
1588 int style = styleNext;
1589 styleNext = styler.StyleAt(i + 1);
1590 int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT;
1591 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1592 bool atLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0;
1593 // Comment folding
1594 if (options.foldComment && atEOL && IsCommentLine(lineCurrent, styler)) {
1595 if (!IsCommentLine(lineCurrent - 1, styler)
1596 && IsCommentLine(lineCurrent + 1, styler))
1597 levelCurrent++;
1598 else if (IsCommentLine(lineCurrent - 1, styler)
1599 && !IsCommentLine(lineCurrent + 1, styler))
1600 levelCurrent--;
1601 }
1602 // {} [] block folding
1603 if (style == SCE_PL_OPERATOR) {
1604 if (ch == '{') {
1605 if (options.foldAtElse && levelCurrent < levelPrev)
1606 --levelPrev;
1607 levelCurrent++;
1608 } else if (ch == '}') {
1609 levelCurrent--;
1610 }
1611 if (ch == '[') {
1612 if (options.foldAtElse && levelCurrent < levelPrev)
1613 --levelPrev;
1614 levelCurrent++;
1615 } else if (ch == ']') {
1616 levelCurrent--;
1617 }
1618 }
1619 // POD folding
1620 if (options.foldPOD && atLineStart) {
1621 if (style == SCE_PL_POD) {
1622 if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
1623 levelCurrent++;
1624 else if (styler.Match(i, "=cut"))
1625 levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1626 else if (styler.Match(i, "=head"))
1627 podHeading = PodHeadingLevel(i, styler);
1628 } else if (style == SCE_PL_DATASECTION) {
1629 if (ch == '=' && isascii(chNext) && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
1630 levelCurrent++;
1631 else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
1632 levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1633 else if (styler.Match(i, "=head"))
1634 podHeading = PodHeadingLevel(i, styler);
1635 // if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1636 // reset needed as level test is vs. SC_FOLDLEVELBASE
1637 else if (stylePrevCh != SCE_PL_DATASECTION)
1638 levelCurrent = SC_FOLDLEVELBASE;
1639 }
1640 }
1641 // package folding
1642 if (options.foldPackage && atLineStart) {
1643 if (IsPackageLine(lineCurrent, styler)
1644 && !IsPackageLine(lineCurrent + 1, styler))
1645 isPackageLine = true;
1646 }
1647
1648 //heredoc folding
1649 switch (style) {
1650 case SCE_PL_HERE_QQ :
1651 case SCE_PL_HERE_Q :
1652 case SCE_PL_HERE_QX :
1653 switch (stylePrevCh) {
1654 case SCE_PL_HERE_QQ :
1655 case SCE_PL_HERE_Q :
1656 case SCE_PL_HERE_QX :
1657 //do nothing;
1658 break;
1659 default :
1660 levelCurrent++;
1661 break;
1662 }
1663 break;
1664 default:
1665 switch (stylePrevCh) {
1666 case SCE_PL_HERE_QQ :
1667 case SCE_PL_HERE_Q :
1668 case SCE_PL_HERE_QX :
1669 levelCurrent--;
1670 break;
1671 default :
1672 //do nothing;
1673 break;
1674 }
1675 break;
1676 }
1677
1678 //explicit folding
1679 if (options.foldCommentExplicit && style == SCE_PL_COMMENTLINE && ch == '#') {
1680 if (chNext == '{') {
1681 levelCurrent++;
1682 } else if (levelCurrent > SC_FOLDLEVELBASE && chNext == '}') {
1683 levelCurrent--;
1684 }
1685 }
1686
1687 if (atEOL) {
1688 int lev = levelPrev;
1689 // POD headings occupy bits 7-4, leaving some breathing room for
1690 // non-standard practice -- POD sections stuck in blocks, etc.
1691 if (podHeading > 0) {
1692 levelCurrent = (lev & ~PERL_HEADFOLD_MASK) | (podHeading << PERL_HEADFOLD_SHIFT);
1693 lev = levelCurrent - 1;
1694 lev |= SC_FOLDLEVELHEADERFLAG;
1695 podHeading = 0;
1696 }
1697 // Check if line was a package declaration
1698 // because packages need "special" treatment
1699 if (isPackageLine) {
1700 lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
1701 levelCurrent = SC_FOLDLEVELBASE + 1;
1702 isPackageLine = false;
1703 }
1704 lev |= levelCurrent << 16;
1705 if (visibleChars == 0 && options.foldCompact)
1706 lev |= SC_FOLDLEVELWHITEFLAG;
1707 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1708 lev |= SC_FOLDLEVELHEADERFLAG;
1709 if (lev != styler.LevelAt(lineCurrent)) {
1710 styler.SetLevel(lineCurrent, lev);
1711 }
1712 lineCurrent++;
1713 levelPrev = levelCurrent;
1714 visibleChars = 0;
1715 }
1716 if (!isspacechar(ch))
1717 visibleChars++;
1718 chPrev = ch;
1719 }
1720 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1721 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1722 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1723 }
1724
1725 LexerModule lmPerl(SCLEX_PERL, LexerPerl::LexerFactoryPerl, "perl", perlWordListDesc, 8);