]> git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/src/LexPerl.cxx
0c66036200f5f079b14897424bab554f30437171
[wxWidgets.git] / src / stc / scintilla / src / LexPerl.cxx
1 // Scintilla source code edit control
2 /** @file LexPerl.cxx
3 ** Lexer for Perl.
4 **/
5 // Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>
6 // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
7 // The License.txt file describes the conditions under which this software may be distributed.
8
9 #include <stdlib.h>
10 #include <string.h>
11 #include <ctype.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14
15 #include "Platform.h"
16
17 #include "PropSet.h"
18 #include "Accessor.h"
19 #include "StyleContext.h"
20 #include "KeyWords.h"
21 #include "Scintilla.h"
22 #include "SciLexer.h"
23 #include "CharacterSet.h"
24
25 #ifdef SCI_NAMESPACE
26 using namespace Scintilla;
27 #endif
28
29 // Info for HERE document handling from perldata.pod (reformatted):
30 // ----------------------------------------------------------------
31 // A line-oriented form of quoting is based on the shell ``here-doc'' syntax.
32 // Following a << you specify a string to terminate the quoted material, and
33 // all lines following the current line down to the terminating string are
34 // the value of the item.
35 // * The terminating string may be either an identifier (a word), or some
36 // quoted text.
37 // * If quoted, the type of quotes you use determines the treatment of the
38 // text, just as in regular quoting.
39 // * An unquoted identifier works like double quotes.
40 // * There must be no space between the << and the identifier.
41 // (If you put a space it will be treated as a null identifier,
42 // which is valid, and matches the first empty line.)
43 // (This is deprecated, -w warns of this syntax)
44 // * The terminating string must appear by itself (unquoted and
45 // with no surrounding whitespace) on the terminating line.
46
47 #define HERE_DELIM_MAX 256 // maximum length of HERE doc delimiter
48
49 #define PERLNUM_BINARY 1 // order is significant: 1-4 cannot have a dot
50 #define PERLNUM_HEX 2
51 #define PERLNUM_OCTAL 3
52 #define PERLNUM_FLOAT_EXP 4 // exponent part only
53 #define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
54 #define PERLNUM_VECTOR 6
55 #define PERLNUM_V_VECTOR 7
56 #define PERLNUM_BAD 8
57
58 #define BACK_NONE 0 // lookback state for bareword disambiguation:
59 #define BACK_OPERATOR 1 // whitespace/comments are insignificant
60 #define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation
61
62 static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler)
63 {
64 // old-style keyword matcher; needed because GetCurrent() needs
65 // current segment to be committed, but we may abandon early...
66 char s[100];
67 unsigned int i, len = end - start;
68 if (len > 30) { len = 30; }
69 for (i = 0; i < len; i++, start++) s[i] = styler[start];
70 s[i] = '\0';
71 return keywords.InList(s);
72 }
73
74 static int disambiguateBareword(Accessor &styler, unsigned int bk, unsigned int fw,
75 int backFlag, unsigned int backPos, unsigned int endPos)
76 {
77 // identifiers are recognized by Perl as barewords under some
78 // conditions, the following attempts to do the disambiguation
79 // by looking backward and forward; result in 2 LSB
80 int result = 0;
81 bool moreback = false; // true if passed newline/comments
82 bool brace = false; // true if opening brace found
83 // if BACK_NONE, neither operator nor keyword, so skip test
84 if (backFlag == BACK_NONE)
85 return result;
86 // first look backwards past whitespace/comments to set EOL flag
87 // (some disambiguation patterns must be on a single line)
88 if (backPos <= static_cast<unsigned int>(styler.LineStart(styler.GetLine(bk))))
89 moreback = true;
90 // look backwards at last significant lexed item for disambiguation
91 bk = backPos - 1;
92 int ch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
93 if (ch == '{' && !moreback) {
94 // {bareword: possible variable spec
95 brace = true;
96 } else if ((ch == '&' && styler.SafeGetCharAt(bk - 1) != '&')
97 // &bareword: subroutine call
98 || styler.Match(bk - 1, "->")
99 // ->bareword: part of variable spec
100 || styler.Match(bk - 2, "sub")) {
101 // sub bareword: subroutine declaration
102 // (implied BACK_KEYWORD, no keywords end in 'sub'!)
103 result |= 1;
104 }
105 // next, scan forward after word past tab/spaces only;
106 // if ch isn't one of '[{(,' we can skip the test
107 if ((ch == '{' || ch == '(' || ch == '['|| ch == ',')
108 && fw < endPos) {
109 while (ch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)),
110 IsASpaceOrTab(ch) && fw < endPos) {
111 fw++;
112 }
113 if ((ch == '}' && brace)
114 // {bareword}: variable spec
115 || styler.Match(fw, "=>")) {
116 // [{(, bareword=>: hash literal
117 result |= 2;
118 }
119 }
120 return result;
121 }
122
123 static void skipWhitespaceComment(Accessor &styler, unsigned int &p)
124 {
125 // when backtracking, we need to skip whitespace and comments
126 int style;
127 while ((p > 0) && (style = styler.StyleAt(p),
128 style == SCE_PL_DEFAULT || style == SCE_PL_COMMENTLINE))
129 p--;
130 }
131
132 static int styleBeforeBracePair(Accessor &styler, unsigned int bk)
133 {
134 // backtrack to find open '{' corresponding to a '}', balanced
135 // return significant style to be tested for '/' disambiguation
136 int braceCount = 1;
137 if (bk == 0)
138 return SCE_PL_DEFAULT;
139 while (--bk > 0) {
140 if (styler.StyleAt(bk) == SCE_PL_OPERATOR) {
141 int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
142 if (bkch == ';') { // early out
143 break;
144 } else if (bkch == '}') {
145 braceCount++;
146 } else if (bkch == '{') {
147 if (--braceCount == 0) break;
148 }
149 }
150 }
151 if (bk > 0 && braceCount == 0) {
152 // balanced { found, bk > 0, skip more whitespace/comments
153 bk--;
154 skipWhitespaceComment(styler, bk);
155 return styler.StyleAt(bk);
156 }
157 return SCE_PL_DEFAULT;
158 }
159
160 static int styleCheckIdentifier(Accessor &styler, unsigned int bk)
161 {
162 // backtrack to classify sub-styles of identifier under test
163 // return sub-style to be tested for '/' disambiguation
164 if (styler.SafeGetCharAt(bk) == '>') // inputsymbol, like <foo>
165 return 1;
166 // backtrack to check for possible "->" or "::" before identifier
167 while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
168 bk--;
169 }
170 while (bk > 0) {
171 int bkstyle = styler.StyleAt(bk);
172 if (bkstyle == SCE_PL_DEFAULT
173 || bkstyle == SCE_PL_COMMENTLINE) {
174 // skip whitespace, comments
175 } else if (bkstyle == SCE_PL_OPERATOR) {
176 // test for "->" and "::"
177 if (styler.Match(bk - 1, "->") || styler.Match(bk - 1, "::"))
178 return 2;
179 } else
180 return 3; // bare identifier
181 bk--;
182 }
183 return 0;
184 }
185
186 static int inputsymbolScan(Accessor &styler, unsigned int pos, unsigned int endPos)
187 {
188 // looks forward for matching > on same line; a bit ugly
189 unsigned int fw = pos;
190 while (++fw < endPos) {
191 int fwch = static_cast<unsigned char>(styler.SafeGetCharAt(fw));
192 if (fwch == '\r' || fwch == '\n') {
193 return 0;
194 } else if (fwch == '>') {
195 if (styler.Match(fw - 2, "<=>")) // '<=>' case
196 return 0;
197 return fw - pos;
198 }
199 }
200 return 0;
201 }
202
203 static int podLineScan(Accessor &styler, unsigned int &pos, unsigned int endPos)
204 {
205 // forward scan the current line to classify line for POD style
206 int state = -1;
207 while (pos <= endPos) {
208 int ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos));
209 if (ch == '\n' || ch == '\r' || pos >= endPos) {
210 if (ch == '\r' && styler.SafeGetCharAt(pos + 1) == '\n') pos++;
211 break;
212 }
213 if (IsASpaceOrTab(ch)) { // whitespace, take note
214 if (state == -1)
215 state = SCE_PL_DEFAULT;
216 } else if (state == SCE_PL_DEFAULT) { // verbatim POD line
217 state = SCE_PL_POD_VERB;
218 } else if (state != SCE_PL_POD_VERB) { // regular POD line
219 state = SCE_PL_POD;
220 }
221 pos++;
222 }
223 if (state == -1)
224 state = SCE_PL_DEFAULT;
225 return state;
226 }
227
228 static bool styleCheckSubPrototype(Accessor &styler, unsigned int bk)
229 {
230 // backtrack to identify if we're starting a subroutine prototype
231 // we also need to ignore whitespace/comments:
232 // 'sub' [whitespace|comment] <identifier> [whitespace|comment]
233 styler.Flush();
234 skipWhitespaceComment(styler, bk);
235 if (bk == 0 || styler.StyleAt(bk) != SCE_PL_IDENTIFIER) // check identifier
236 return false;
237 while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_IDENTIFIER)) {
238 bk--;
239 }
240 skipWhitespaceComment(styler, bk);
241 if (bk < 2 || styler.StyleAt(bk) != SCE_PL_WORD // check "sub" keyword
242 || !styler.Match(bk - 2, "sub")) // assume suffix is unique!
243 return false;
244 return true;
245 }
246
247 static bool isMatch(const char *sref, char *s)
248 {
249 // match per-line delimiter - must kill trailing CR if CRLF
250 int i = strlen(s);
251 if (i != 0 && s[i - 1] == '\r')
252 s[i - 1] = '\0';
253 return (strcmp(sref, s) == 0);
254 }
255
256 static int actualNumStyle(int numberStyle) {
257 if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {
258 return SCE_PL_STRING;
259 } else if (numberStyle == PERLNUM_BAD) {
260 return SCE_PL_ERROR;
261 }
262 return SCE_PL_NUMBER;
263 }
264
265 static int opposite(int ch) {
266 if (ch == '(') return ')';
267 if (ch == '[') return ']';
268 if (ch == '{') return '}';
269 if (ch == '<') return '>';
270 return ch;
271 }
272
273 static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
274 WordList *keywordlists[], Accessor &styler) {
275
276 WordList &keywords = *keywordlists[0];
277
278 // keywords that forces /PATTERN/ at all times; should track vim's behaviour
279 WordList reWords;
280 reWords.Set("elsif if split while");
281
282 // charset classes
283 CharacterSet setWordStart(CharacterSet::setAlpha, "_", 0x80, true);
284 CharacterSet setWord(CharacterSet::setAlphaNum, "_", 0x80, true);
285 CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC");
286 // lexing of "%*</" operators is non-trivial; these are missing in the set below
287 CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~");
288 CharacterSet setQDelim(CharacterSet::setNone, "qrwx");
289 CharacterSet setModifiers(CharacterSet::setAlpha);
290 CharacterSet setPreferRE(CharacterSet::setNone, "*/<%");
291 // setArray and setHash also accepts chars for special vars like $_,
292 // which are then truncated when the next char does not match setVar
293 CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'", 0x80, true);
294 CharacterSet setArray(CharacterSet::setAlpha, "#$_+-", 0x80, true);
295 CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-", 0x80, true);
296 CharacterSet &setPOD = setModifiers;
297 CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@");
298 CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_");
299 CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*];");
300 // for format identifiers
301 CharacterSet setFormatStart(CharacterSet::setAlpha, "_=");
302 CharacterSet &setFormat = setHereDocDelim;
303
304 // Lexer for perl often has to backtrack to start of current style to determine
305 // which characters are being used as quotes, how deeply nested is the
306 // start position and what the termination string is for HERE documents.
307
308 class HereDocCls { // Class to manage HERE doc sequence
309 public:
310 int State; // 0: '<<' encountered
311 // 1: collect the delimiter
312 // 2: here doc text (lines after the delimiter)
313 int Quote; // the char after '<<'
314 bool Quoted; // true if Quote in ('\'','"','`')
315 int DelimiterLength; // strlen(Delimiter)
316 char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf
317 HereDocCls() {
318 State = 0;
319 Quote = 0;
320 Quoted = false;
321 DelimiterLength = 0;
322 Delimiter = new char[HERE_DELIM_MAX];
323 Delimiter[0] = '\0';
324 }
325 void Append(int ch) {
326 Delimiter[DelimiterLength++] = static_cast<char>(ch);
327 Delimiter[DelimiterLength] = '\0';
328 }
329 ~HereDocCls() {
330 delete []Delimiter;
331 }
332 };
333 HereDocCls HereDoc; // TODO: FIFO for stacked here-docs
334
335 class QuoteCls { // Class to manage quote pairs
336 public:
337 int Rep;
338 int Count;
339 int Up, Down;
340 QuoteCls() {
341 this->New(1);
342 }
343 void New(int r = 1) {
344 Rep = r;
345 Count = 0;
346 Up = '\0';
347 Down = '\0';
348 }
349 void Open(int u) {
350 Count++;
351 Up = u;
352 Down = opposite(Up);
353 }
354 };
355 QuoteCls Quote;
356
357 // additional state for number lexing
358 int numState = PERLNUM_DECIMAL;
359 int dotCount = 0;
360
361 unsigned int endPos = startPos + length;
362
363 // Backtrack to beginning of style if required...
364 // If in a long distance lexical state, backtrack to find quote characters.
365 // Includes strings (may be multi-line), numbers (additional state), format
366 // bodies, as well as POD sections.
367 if (initStyle == SCE_PL_HERE_Q
368 || initStyle == SCE_PL_HERE_QQ
369 || initStyle == SCE_PL_HERE_QX
370 || initStyle == SCE_PL_FORMAT
371 ) {
372 int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM;
373 while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) {
374 startPos--;
375 }
376 startPos = styler.LineStart(styler.GetLine(startPos));
377 initStyle = styler.StyleAt(startPos - 1);
378 }
379 if (initStyle == SCE_PL_STRING_Q
380 || initStyle == SCE_PL_STRING_QQ
381 || initStyle == SCE_PL_STRING_QX
382 || initStyle == SCE_PL_STRING_QR
383 || initStyle == SCE_PL_STRING_QW
384 || initStyle == SCE_PL_REGEX
385 || initStyle == SCE_PL_REGSUBST
386 || initStyle == SCE_PL_STRING
387 || initStyle == SCE_PL_BACKTICKS
388 || initStyle == SCE_PL_CHARACTER
389 || initStyle == SCE_PL_NUMBER
390 || initStyle == SCE_PL_IDENTIFIER
391 || initStyle == SCE_PL_ERROR
392 || initStyle == SCE_PL_SUB_PROTOTYPE
393 ) {
394 while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {
395 startPos--;
396 }
397 initStyle = SCE_PL_DEFAULT;
398 } else if (initStyle == SCE_PL_POD
399 || initStyle == SCE_PL_POD_VERB
400 ) {
401 // POD backtracking finds preceeding blank lines and goes back past them
402 int ln = styler.GetLine(startPos);
403 if (ln > 0) {
404 initStyle = styler.StyleAt(styler.LineStart(--ln));
405 if (initStyle == SCE_PL_POD || initStyle == SCE_PL_POD_VERB) {
406 while (ln > 0 && styler.GetLineState(ln) == SCE_PL_DEFAULT)
407 ln--;
408 }
409 startPos = styler.LineStart(++ln);
410 initStyle = styler.StyleAt(startPos - 1);
411 } else {
412 startPos = 0;
413 initStyle = SCE_PL_DEFAULT;
414 }
415 }
416
417 // backFlag, backPos are additional state to aid identifier corner cases.
418 // Look backwards past whitespace and comments in order to detect either
419 // operator or keyword. Later updated as we go along.
420 int backFlag = BACK_NONE;
421 unsigned int backPos = startPos;
422 if (backPos > 0) {
423 backPos--;
424 skipWhitespaceComment(styler, backPos);
425 if (styler.StyleAt(backPos) == SCE_PL_OPERATOR)
426 backFlag = BACK_OPERATOR;
427 else if (styler.StyleAt(backPos) == SCE_PL_WORD)
428 backFlag = BACK_KEYWORD;
429 backPos++;
430 }
431
432 StyleContext sc(startPos, endPos - startPos, initStyle, styler, static_cast<char>(STYLE_MAX));
433
434 for (; sc.More(); sc.Forward()) {
435
436 // Determine if the current state should terminate.
437 switch (sc.state) {
438 case SCE_PL_OPERATOR:
439 sc.SetState(SCE_PL_DEFAULT);
440 backFlag = BACK_OPERATOR;
441 backPos = sc.currentPos;
442 break;
443 case SCE_PL_IDENTIFIER: // identifier, bareword, inputsymbol
444 if ((!setWord.Contains(sc.ch) && sc.ch != '\'')
445 || sc.Match('.', '.')
446 || sc.chPrev == '>') { // end of inputsymbol
447 sc.SetState(SCE_PL_DEFAULT);
448 }
449 break;
450 case SCE_PL_WORD: // keyword, plus special cases
451 if (!setWord.Contains(sc.ch)) {
452 char s[100];
453 sc.GetCurrent(s, sizeof(s));
454 if ((strcmp(s, "__DATA__") == 0) || (strcmp(s, "__END__") == 0)) {
455 sc.ChangeState(SCE_PL_DATASECTION);
456 } else {
457 if ((strcmp(s, "format") == 0)) {
458 sc.SetState(SCE_PL_FORMAT_IDENT);
459 HereDoc.State = 0;
460 } else {
461 sc.SetState(SCE_PL_DEFAULT);
462 }
463 backFlag = BACK_KEYWORD;
464 backPos = sc.currentPos;
465 }
466 }
467 break;
468 case SCE_PL_SCALAR:
469 case SCE_PL_ARRAY:
470 case SCE_PL_HASH:
471 case SCE_PL_SYMBOLTABLE:
472 if (sc.Match(':', ':')) { // skip ::
473 sc.Forward();
474 } else if (!setVar.Contains(sc.ch)) {
475 if (sc.LengthCurrent() == 1) {
476 // Special variable: $(, $_ etc.
477 sc.Forward();
478 }
479 sc.SetState(SCE_PL_DEFAULT);
480 }
481 break;
482 case SCE_PL_NUMBER:
483 // if no early break, number style is terminated at "(go through)"
484 if (sc.ch == '.') {
485 if (sc.chNext == '.') {
486 // double dot is always an operator (go through)
487 } else if (numState <= PERLNUM_FLOAT_EXP) {
488 // non-decimal number or float exponent, consume next dot
489 sc.SetState(SCE_PL_OPERATOR);
490 break;
491 } else { // decimal or vectors allows dots
492 dotCount++;
493 if (numState == PERLNUM_DECIMAL) {
494 if (dotCount <= 1) // number with one dot in it
495 break;
496 if (IsADigit(sc.chNext)) { // really a vector
497 numState = PERLNUM_VECTOR;
498 break;
499 }
500 // number then dot (go through)
501 } else if (IsADigit(sc.chNext)) // vectors
502 break;
503 // vector then dot (go through)
504 }
505 } else if (sc.ch == '_') {
506 // permissive underscoring for number and vector literals
507 break;
508 } else if (numState == PERLNUM_DECIMAL) {
509 if (sc.ch == 'E' || sc.ch == 'e') { // exponent, sign
510 numState = PERLNUM_FLOAT_EXP;
511 if (sc.chNext == '+' || sc.chNext == '-') {
512 sc.Forward();
513 }
514 break;
515 } else if (IsADigit(sc.ch))
516 break;
517 // number then word (go through)
518 } else if (numState == PERLNUM_HEX) {
519 if (IsADigit(sc.ch, 16))
520 break;
521 } else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
522 if (IsADigit(sc.ch)) // vector
523 break;
524 if (setWord.Contains(sc.ch) && dotCount == 0) { // change to word
525 sc.ChangeState(SCE_PL_IDENTIFIER);
526 break;
527 }
528 // vector then word (go through)
529 } else if (IsADigit(sc.ch)) {
530 if (numState == PERLNUM_FLOAT_EXP) {
531 break;
532 } else if (numState == PERLNUM_OCTAL) {
533 if (sc.ch <= '7') break;
534 } else if (numState == PERLNUM_BINARY) {
535 if (sc.ch <= '1') break;
536 }
537 // mark invalid octal, binary numbers (go through)
538 numState = PERLNUM_BAD;
539 break;
540 }
541 // complete current number or vector
542 sc.ChangeState(actualNumStyle(numState));
543 sc.SetState(SCE_PL_DEFAULT);
544 break;
545 case SCE_PL_COMMENTLINE:
546 if (sc.atLineEnd) {
547 sc.SetState(SCE_PL_DEFAULT);
548 }
549 break;
550 case SCE_PL_HERE_DELIM:
551 if (HereDoc.State == 0) { // '<<' encountered
552 int delim_ch = sc.chNext;
553 int ws_skip = 0;
554 HereDoc.State = 1; // pre-init HERE doc class
555 HereDoc.Quote = sc.chNext;
556 HereDoc.Quoted = false;
557 HereDoc.DelimiterLength = 0;
558 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
559 if (IsASpaceOrTab(delim_ch)) {
560 // skip whitespace; legal only for quoted delimiters
561 unsigned int i = sc.currentPos + 1;
562 while ((i < endPos) && IsASpaceOrTab(delim_ch)) {
563 i++;
564 delim_ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
565 }
566 ws_skip = i - sc.currentPos - 1;
567 }
568 if (delim_ch == '\'' || delim_ch == '"' || delim_ch == '`') {
569 // a quoted here-doc delimiter; skip any whitespace
570 sc.Forward(ws_skip + 1);
571 HereDoc.Quote = delim_ch;
572 HereDoc.Quoted = true;
573 } else if ((ws_skip == 0 && setNonHereDoc.Contains(sc.chNext))
574 || ws_skip > 0) {
575 // left shift << or <<= operator cases
576 // restore position if operator
577 sc.ChangeState(SCE_PL_OPERATOR);
578 sc.ForwardSetState(SCE_PL_DEFAULT);
579 backFlag = BACK_OPERATOR;
580 backPos = sc.currentPos;
581 HereDoc.State = 0;
582 } else {
583 // specially handle initial '\' for identifier
584 if (ws_skip == 0 && HereDoc.Quote == '\\')
585 sc.Forward();
586 // an unquoted here-doc delimiter, no special handling
587 // (cannot be prefixed by spaces/tabs), or
588 // symbols terminates; deprecated zero-length delimiter
589 }
590 } else if (HereDoc.State == 1) { // collect the delimiter
591 backFlag = BACK_NONE;
592 if (HereDoc.Quoted) { // a quoted here-doc delimiter
593 if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
594 sc.ForwardSetState(SCE_PL_DEFAULT);
595 } else if (!sc.atLineEnd) {
596 if (sc.Match('\\', static_cast<char>(HereDoc.Quote))) { // escaped quote
597 sc.Forward();
598 }
599 if (sc.ch != '\r') { // skip CR if CRLF
600 HereDoc.Append(sc.ch);
601 }
602 }
603 } else { // an unquoted here-doc delimiter
604 if (setHereDocDelim.Contains(sc.ch)) {
605 HereDoc.Append(sc.ch);
606 } else {
607 sc.SetState(SCE_PL_DEFAULT);
608 }
609 }
610 if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
611 sc.SetState(SCE_PL_ERROR);
612 HereDoc.State = 0;
613 }
614 }
615 break;
616 case SCE_PL_HERE_Q:
617 case SCE_PL_HERE_QQ:
618 case SCE_PL_HERE_QX: {
619 // also implies HereDoc.State == 2
620 sc.Complete();
621 while (!sc.atLineEnd)
622 sc.Forward();
623 char s[HERE_DELIM_MAX];
624 sc.GetCurrent(s, sizeof(s));
625 if (isMatch(HereDoc.Delimiter, s)) {
626 sc.SetState(SCE_PL_DEFAULT);
627 backFlag = BACK_NONE;
628 HereDoc.State = 0;
629 }
630 } break;
631 case SCE_PL_POD:
632 case SCE_PL_POD_VERB: {
633 unsigned int fw = sc.currentPos;
634 int ln = styler.GetLine(fw);
635 if (sc.atLineStart && sc.Match("=cut")) { // end of POD
636 sc.SetState(SCE_PL_POD);
637 sc.Forward(4);
638 sc.SetState(SCE_PL_DEFAULT);
639 styler.SetLineState(ln, SCE_PL_POD);
640 break;
641 }
642 int pod = podLineScan(styler, fw, endPos); // classify POD line
643 styler.SetLineState(ln, pod);
644 if (pod == SCE_PL_DEFAULT) {
645 if (sc.state == SCE_PL_POD_VERB) {
646 unsigned int fw2 = fw;
647 while (fw2 <= endPos && pod == SCE_PL_DEFAULT) {
648 fw = fw2++; // penultimate line (last blank line)
649 pod = podLineScan(styler, fw2, endPos);
650 styler.SetLineState(styler.GetLine(fw2), pod);
651 }
652 if (pod == SCE_PL_POD) { // truncate verbatim POD early
653 sc.SetState(SCE_PL_POD);
654 } else
655 fw = fw2;
656 } else
657 pod = SCE_PL_POD;
658 } else {
659 if (pod == SCE_PL_POD_VERB // still part of current paragraph
660 && (styler.GetLineState(ln - 1) == SCE_PL_POD)) {
661 pod = SCE_PL_POD;
662 styler.SetLineState(ln, pod);
663 } else if (pod == SCE_PL_POD
664 && (styler.GetLineState(ln - 1) == SCE_PL_POD_VERB)) {
665 pod = SCE_PL_POD_VERB;
666 styler.SetLineState(ln, pod);
667 }
668 sc.SetState(pod);
669 }
670 sc.Forward(fw - sc.currentPos); // commit style
671 } break;
672 case SCE_PL_REGEX:
673 case SCE_PL_STRING_QR:
674 if (Quote.Rep <= 0) {
675 if (!setModifiers.Contains(sc.ch))
676 sc.SetState(SCE_PL_DEFAULT);
677 } else if (!Quote.Up && !IsASpace(sc.ch)) {
678 Quote.Open(sc.ch);
679 } else if (sc.ch == '\\' && Quote.Up != '\\') {
680 sc.Forward();
681 } else if (sc.ch == Quote.Down) {
682 Quote.Count--;
683 if (Quote.Count == 0)
684 Quote.Rep--;
685 } else if (sc.ch == Quote.Up) {
686 Quote.Count++;
687 }
688 break;
689 case SCE_PL_REGSUBST:
690 if (Quote.Rep <= 0) {
691 if (!setModifiers.Contains(sc.ch))
692 sc.SetState(SCE_PL_DEFAULT);
693 } else if (!Quote.Up && !IsASpace(sc.ch)) {
694 Quote.Open(sc.ch);
695 } else if (sc.ch == '\\' && Quote.Up != '\\') {
696 sc.Forward();
697 } else if (Quote.Count == 0 && Quote.Rep == 1) {
698 // We matched something like s(...) or tr{...}, Perl 5.10
699 // appears to allow almost any character for use as the
700 // next delimiters. Whitespace and comments are accepted in
701 // between, but we'll limit to whitespace here.
702 // For '#', if no whitespace in between, it's a delimiter.
703 if (IsASpace(sc.ch)) {
704 // Keep going
705 } else if (sc.ch == '#' && IsASpaceOrTab(sc.chPrev)) {
706 sc.SetState(SCE_PL_DEFAULT);
707 } else {
708 Quote.Open(sc.ch);
709 }
710 } else if (sc.ch == Quote.Down) {
711 Quote.Count--;
712 if (Quote.Count == 0)
713 Quote.Rep--;
714 if (Quote.Up == Quote.Down)
715 Quote.Count++;
716 } else if (sc.ch == Quote.Up) {
717 Quote.Count++;
718 }
719 break;
720 case SCE_PL_STRING_Q:
721 case SCE_PL_STRING_QQ:
722 case SCE_PL_STRING_QX:
723 case SCE_PL_STRING_QW:
724 case SCE_PL_STRING:
725 case SCE_PL_CHARACTER:
726 case SCE_PL_BACKTICKS:
727 if (!Quote.Down && !IsASpace(sc.ch)) {
728 Quote.Open(sc.ch);
729 } else if (sc.ch == '\\' && Quote.Up != '\\') {
730 sc.Forward();
731 } else if (sc.ch == Quote.Down) {
732 Quote.Count--;
733 if (Quote.Count == 0)
734 sc.ForwardSetState(SCE_PL_DEFAULT);
735 } else if (sc.ch == Quote.Up) {
736 Quote.Count++;
737 }
738 break;
739 case SCE_PL_SUB_PROTOTYPE: {
740 int i = 0;
741 // forward scan; must all be valid proto characters
742 while (setSubPrototype.Contains(sc.GetRelative(i)))
743 i++;
744 if (sc.GetRelative(i) == ')') { // valid sub prototype
745 sc.Forward(i);
746 sc.ForwardSetState(SCE_PL_DEFAULT);
747 } else {
748 // abandon prototype, restart from '('
749 sc.ChangeState(SCE_PL_OPERATOR);
750 sc.SetState(SCE_PL_DEFAULT);
751 }
752 } break;
753 case SCE_PL_FORMAT: {
754 sc.Complete();
755 while (!sc.atLineEnd)
756 sc.Forward();
757 char s[10];
758 sc.GetCurrent(s, sizeof(s));
759 if (isMatch(".", s))
760 sc.SetState(SCE_PL_DEFAULT);
761 } break;
762 case SCE_PL_ERROR:
763 break;
764 }
765 // Needed for specific continuation styles (one follows the other)
766 switch (sc.state) {
767 // continued from SCE_PL_WORD
768 case SCE_PL_FORMAT_IDENT:
769 // occupies HereDoc state 3 to avoid clashing with HERE docs
770 if (IsASpaceOrTab(sc.ch)) { // skip whitespace
771 sc.ChangeState(SCE_PL_DEFAULT);
772 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
773 sc.Forward();
774 sc.SetState(SCE_PL_FORMAT_IDENT);
775 }
776 if (setFormatStart.Contains(sc.ch)) { // identifier or '='
777 if (sc.ch != '=') {
778 do {
779 sc.Forward();
780 } while (setFormat.Contains(sc.ch));
781 }
782 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
783 sc.Forward();
784 if (sc.ch == '=') {
785 sc.ForwardSetState(SCE_PL_DEFAULT);
786 HereDoc.State = 3;
787 } else {
788 // invalid indentifier; inexact fallback, but hey
789 sc.ChangeState(SCE_PL_IDENTIFIER);
790 sc.SetState(SCE_PL_DEFAULT);
791 }
792 } else {
793 sc.ChangeState(SCE_PL_DEFAULT); // invalid indentifier
794 }
795 backFlag = BACK_NONE;
796 break;
797 }
798
799 // Must check end of HereDoc states here before default state is handled
800 if (HereDoc.State == 1 && sc.atLineEnd) {
801 // Begin of here-doc (the line after the here-doc delimiter):
802 // Lexically, the here-doc starts from the next line after the >>, but the
803 // first line of here-doc seem to follow the style of the last EOL sequence
804 int st_new = SCE_PL_HERE_QQ;
805 HereDoc.State = 2;
806 if (HereDoc.Quoted) {
807 if (sc.state == SCE_PL_HERE_DELIM) {
808 // Missing quote at end of string! We are stricter than perl.
809 // Colour here-doc anyway while marking this bit as an error.
810 sc.ChangeState(SCE_PL_ERROR);
811 }
812 switch (HereDoc.Quote) {
813 case '\'': st_new = SCE_PL_HERE_Q ; break;
814 case '"' : st_new = SCE_PL_HERE_QQ; break;
815 case '`' : st_new = SCE_PL_HERE_QX; break;
816 }
817 } else {
818 if (HereDoc.Quote == '\\')
819 st_new = SCE_PL_HERE_Q;
820 }
821 sc.SetState(st_new);
822 }
823 if (HereDoc.State == 3 && sc.atLineEnd) {
824 // Start of format body.
825 HereDoc.State = 0;
826 sc.SetState(SCE_PL_FORMAT);
827 }
828
829 // Determine if a new state should be entered.
830 if (sc.state == SCE_PL_DEFAULT) {
831 if (IsADigit(sc.ch) ||
832 (IsADigit(sc.chNext) && (sc.ch == '.' || sc.ch == 'v'))) {
833 sc.SetState(SCE_PL_NUMBER);
834 backFlag = BACK_NONE;
835 numState = PERLNUM_DECIMAL;
836 dotCount = 0;
837 if (sc.ch == '0') { // hex,bin,octal
838 if (sc.chNext == 'x') {
839 numState = PERLNUM_HEX;
840 } else if (sc.chNext == 'b') {
841 numState = PERLNUM_BINARY;
842 } else if (IsADigit(sc.chNext)) {
843 numState = PERLNUM_OCTAL;
844 }
845 if (numState != PERLNUM_DECIMAL) {
846 sc.Forward();
847 }
848 } else if (sc.ch == 'v') { // vector
849 numState = PERLNUM_V_VECTOR;
850 }
851 } else if (setWord.Contains(sc.ch)) {
852 // if immediately prefixed by '::', always a bareword
853 sc.SetState(SCE_PL_WORD);
854 if (sc.chPrev == ':' && sc.GetRelative(-2) == ':') {
855 sc.ChangeState(SCE_PL_IDENTIFIER);
856 }
857 unsigned int bk = sc.currentPos;
858 unsigned int fw = sc.currentPos + 1;
859 // first check for possible quote-like delimiter
860 if (sc.ch == 's' && !setWord.Contains(sc.chNext)) {
861 sc.ChangeState(SCE_PL_REGSUBST);
862 Quote.New(2);
863 } else if (sc.ch == 'm' && !setWord.Contains(sc.chNext)) {
864 sc.ChangeState(SCE_PL_REGEX);
865 Quote.New();
866 } else if (sc.ch == 'q' && !setWord.Contains(sc.chNext)) {
867 sc.ChangeState(SCE_PL_STRING_Q);
868 Quote.New();
869 } else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) {
870 sc.ChangeState(SCE_PL_REGSUBST);
871 Quote.New(2);
872 } else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) {
873 sc.ChangeState(SCE_PL_REGSUBST);
874 Quote.New(2);
875 sc.Forward();
876 fw++;
877 } else if (sc.ch == 'q' && setQDelim.Contains(sc.chNext)
878 && !setWord.Contains(sc.GetRelative(2))) {
879 if (sc.chNext == 'q') sc.ChangeState(SCE_PL_STRING_QQ);
880 else if (sc.chNext == 'x') sc.ChangeState(SCE_PL_STRING_QX);
881 else if (sc.chNext == 'r') sc.ChangeState(SCE_PL_STRING_QR);
882 else sc.ChangeState(SCE_PL_STRING_QW); // sc.chNext == 'w'
883 Quote.New();
884 sc.Forward();
885 fw++;
886 } else if (sc.ch == 'x' && (sc.chNext == '=' || // repetition
887 !setWord.Contains(sc.chNext) ||
888 (IsADigit(sc.chPrev) && IsADigit(sc.chNext)))) {
889 sc.ChangeState(SCE_PL_OPERATOR);
890 }
891 // if potentially a keyword, scan forward and grab word, then check
892 // if it's really one; if yes, disambiguation test is performed
893 // otherwise it is always a bareword and we skip a lot of scanning
894 if (sc.state == SCE_PL_WORD) {
895 while (setWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(fw))))
896 fw++;
897 if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) {
898 sc.ChangeState(SCE_PL_IDENTIFIER);
899 }
900 }
901 // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
902 // for quote-like delimiters/keywords, attempt to disambiguate
903 // to select for bareword, change state -> SCE_PL_IDENTIFIER
904 if (sc.state != SCE_PL_IDENTIFIER && bk > 0) {
905 if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos))
906 sc.ChangeState(SCE_PL_IDENTIFIER);
907 }
908 backFlag = BACK_NONE;
909 } else if (sc.ch == '#') {
910 sc.SetState(SCE_PL_COMMENTLINE);
911 } else if (sc.ch == '\"') {
912 sc.SetState(SCE_PL_STRING);
913 Quote.New();
914 Quote.Open(sc.ch);
915 backFlag = BACK_NONE;
916 } else if (sc.ch == '\'') {
917 if (sc.chPrev == '&' && setWordStart.Contains(sc.chNext)) {
918 // Archaic call
919 sc.SetState(SCE_PL_IDENTIFIER);
920 } else {
921 sc.SetState(SCE_PL_CHARACTER);
922 Quote.New();
923 Quote.Open(sc.ch);
924 }
925 backFlag = BACK_NONE;
926 } else if (sc.ch == '`') {
927 sc.SetState(SCE_PL_BACKTICKS);
928 Quote.New();
929 Quote.Open(sc.ch);
930 backFlag = BACK_NONE;
931 } else if (sc.ch == '$') {
932 sc.SetState(SCE_PL_SCALAR);
933 if (sc.chNext == '{') {
934 sc.ForwardSetState(SCE_PL_OPERATOR);
935 } else if (IsASpace(sc.chNext)) {
936 sc.ForwardSetState(SCE_PL_DEFAULT);
937 } else {
938 sc.Forward();
939 if (sc.Match('`', '`') || sc.Match(':', ':')) {
940 sc.Forward();
941 }
942 }
943 backFlag = BACK_NONE;
944 } else if (sc.ch == '@') {
945 sc.SetState(SCE_PL_ARRAY);
946 if (setArray.Contains(sc.chNext)) {
947 // no special treatment
948 } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
949 sc.Forward(2);
950 } else if (sc.chNext == '{' || sc.chNext == '[') {
951 sc.ForwardSetState(SCE_PL_OPERATOR);
952 } else {
953 sc.ChangeState(SCE_PL_OPERATOR);
954 }
955 backFlag = BACK_NONE;
956 } else if (setPreferRE.Contains(sc.ch)) {
957 // Explicit backward peeking to set a consistent preferRE for
958 // any slash found, so no longer need to track preferRE state.
959 // Find first previous significant lexed element and interpret.
960 // A few symbols shares this code for disambiguation.
961 bool preferRE = false;
962 bool isHereDoc = sc.Match('<', '<');
963 bool hereDocSpace = false; // for: SCALAR [whitespace] '<<'
964 unsigned int bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0;
965 unsigned int bkend;
966 sc.Complete();
967 styler.Flush();
968 if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
969 hereDocSpace = true;
970 skipWhitespaceComment(styler, bk);
971 if (bk == 0) {
972 // avoid backward scanning breakage
973 preferRE = true;
974 } else {
975 int bkstyle = styler.StyleAt(bk);
976 int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
977 switch(bkstyle) {
978 case SCE_PL_OPERATOR:
979 preferRE = true;
980 if (bkch == ')' || bkch == ']') {
981 preferRE = false;
982 } else if (bkch == '}') {
983 // backtrack by counting balanced brace pairs
984 // needed to test for variables like ${}, @{} etc.
985 bkstyle = styleBeforeBracePair(styler, bk);
986 if (bkstyle == SCE_PL_SCALAR
987 || bkstyle == SCE_PL_ARRAY
988 || bkstyle == SCE_PL_HASH
989 || bkstyle == SCE_PL_SYMBOLTABLE
990 || bkstyle == SCE_PL_OPERATOR) {
991 preferRE = false;
992 }
993 } else if (bkch == '+' || bkch == '-') {
994 if (bkch == static_cast<unsigned char>(styler.SafeGetCharAt(bk - 1))
995 && bkch != static_cast<unsigned char>(styler.SafeGetCharAt(bk - 2)))
996 // exceptions for operators: unary suffixes ++, --
997 preferRE = false;
998 }
999 break;
1000 case SCE_PL_IDENTIFIER:
1001 preferRE = true;
1002 bkstyle = styleCheckIdentifier(styler, bk);
1003 if ((bkstyle == 1) || (bkstyle == 2)) {
1004 // inputsymbol or var with "->" or "::" before identifier
1005 preferRE = false;
1006 } else if (bkstyle == 3) {
1007 // bare identifier, test cases follows:
1008 if (sc.ch == '/') {
1009 // if '/', /PATTERN/ unless digit/space immediately after '/'
1010 // if '//', always expect defined-or operator to follow identifier
1011 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1012 preferRE = false;
1013 } else if (sc.ch == '*' || sc.ch == '%') {
1014 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1015 preferRE = false;
1016 } else if (sc.ch == '<') {
1017 if (IsASpace(sc.chNext) || sc.chNext == '=')
1018 preferRE = false;
1019 }
1020 }
1021 break;
1022 case SCE_PL_SCALAR: // for $var<< case:
1023 if (isHereDoc && hereDocSpace) // if SCALAR whitespace '<<', *always* a HERE doc
1024 preferRE = true;
1025 break;
1026 case SCE_PL_WORD:
1027 preferRE = true;
1028 // for HERE docs, always true
1029 if (sc.ch == '/') {
1030 // adopt heuristics similar to vim-style rules:
1031 // keywords always forced as /PATTERN/: split, if, elsif, while
1032 // everything else /PATTERN/ unless digit/space immediately after '/'
1033 // for '//', defined-or favoured unless special keywords
1034 bkend = bk + 1;
1035 while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) {
1036 bk--;
1037 }
1038 if (isPerlKeyword(bk, bkend, reWords, styler))
1039 break;
1040 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1041 preferRE = false;
1042 } else if (sc.ch == '*' || sc.ch == '%') {
1043 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1044 preferRE = false;
1045 } else if (sc.ch == '<') {
1046 if (IsASpace(sc.chNext) || sc.chNext == '=')
1047 preferRE = false;
1048 }
1049 break;
1050 // other styles uses the default, preferRE=false
1051 case SCE_PL_POD:
1052 case SCE_PL_HERE_Q:
1053 case SCE_PL_HERE_QQ:
1054 case SCE_PL_HERE_QX:
1055 preferRE = true;
1056 break;
1057 }
1058 }
1059 backFlag = BACK_NONE;
1060 if (isHereDoc) { // handle '<<', HERE doc
1061 if (preferRE) {
1062 sc.SetState(SCE_PL_HERE_DELIM);
1063 HereDoc.State = 0;
1064 } else { // << operator
1065 sc.SetState(SCE_PL_OPERATOR);
1066 sc.Forward();
1067 }
1068 } else if (sc.ch == '*') { // handle '*', typeglob
1069 if (preferRE) {
1070 sc.SetState(SCE_PL_SYMBOLTABLE);
1071 if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1072 sc.Forward(2);
1073 } else if (sc.chNext == '{') {
1074 sc.ForwardSetState(SCE_PL_OPERATOR);
1075 } else {
1076 sc.Forward();
1077 }
1078 } else {
1079 sc.SetState(SCE_PL_OPERATOR);
1080 if (sc.chNext == '*') // exponentiation
1081 sc.Forward();
1082 }
1083 } else if (sc.ch == '%') { // handle '%', hash
1084 if (preferRE) {
1085 sc.SetState(SCE_PL_HASH);
1086 if (setHash.Contains(sc.chNext)) {
1087 sc.Forward();
1088 } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1089 sc.Forward(2);
1090 } else if (sc.chNext == '{') {
1091 sc.ForwardSetState(SCE_PL_OPERATOR);
1092 } else {
1093 sc.ChangeState(SCE_PL_OPERATOR);
1094 }
1095 } else {
1096 sc.SetState(SCE_PL_OPERATOR);
1097 }
1098 } else if (sc.ch == '<') { // handle '<', inputsymbol
1099 if (preferRE) {
1100 // forward scan
1101 int i = inputsymbolScan(styler, sc.currentPos, endPos);
1102 if (i > 0) {
1103 sc.SetState(SCE_PL_IDENTIFIER);
1104 sc.Forward(i);
1105 } else {
1106 sc.SetState(SCE_PL_OPERATOR);
1107 }
1108 } else {
1109 sc.SetState(SCE_PL_OPERATOR);
1110 }
1111 } else { // handle '/', regexp
1112 if (preferRE) {
1113 sc.SetState(SCE_PL_REGEX);
1114 Quote.New();
1115 Quote.Open(sc.ch);
1116 } else { // / and // operators
1117 sc.SetState(SCE_PL_OPERATOR);
1118 if (sc.chNext == '/') {
1119 sc.Forward();
1120 }
1121 }
1122 }
1123 } else if (sc.ch == '=' // POD
1124 && setPOD.Contains(sc.chNext)
1125 && sc.atLineStart) {
1126 sc.SetState(SCE_PL_POD);
1127 backFlag = BACK_NONE;
1128 } else if (sc.ch == '-' && setWordStart.Contains(sc.chNext)) { // extended '-' cases
1129 unsigned int bk = sc.currentPos;
1130 unsigned int fw = 2;
1131 if (setSingleCharOp.Contains(sc.chNext) && // file test operators
1132 !setWord.Contains(sc.GetRelative(2))) {
1133 sc.SetState(SCE_PL_WORD);
1134 } else {
1135 // nominally a minus and bareword; find extent of bareword
1136 while (setWord.Contains(sc.GetRelative(fw)))
1137 fw++;
1138 sc.SetState(SCE_PL_OPERATOR);
1139 }
1140 // force to bareword for hash key => or {variable literal} cases
1141 if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & 2) {
1142 sc.ChangeState(SCE_PL_IDENTIFIER);
1143 }
1144 backFlag = BACK_NONE;
1145 } else if (sc.ch == '(' && sc.currentPos > 0) { // '(' or subroutine prototype
1146 sc.Complete();
1147 if (styleCheckSubPrototype(styler, sc.currentPos - 1)) {
1148 sc.SetState(SCE_PL_SUB_PROTOTYPE);
1149 backFlag = BACK_NONE;
1150 } else {
1151 sc.SetState(SCE_PL_OPERATOR);
1152 }
1153 } else if (setPerlOperator.Contains(sc.ch)) { // operators
1154 sc.SetState(SCE_PL_OPERATOR);
1155 if (sc.Match('.', '.')) { // .. and ...
1156 sc.Forward();
1157 if (sc.chNext == '.') sc.Forward();
1158 }
1159 } else if (sc.ch == 4 || sc.ch == 26) { // ^D and ^Z ends valid perl source
1160 sc.SetState(SCE_PL_DATASECTION);
1161 } else {
1162 // keep colouring defaults
1163 sc.Complete();
1164 }
1165 }
1166 }
1167 sc.Complete();
1168 }
1169
1170 static bool IsCommentLine(int line, Accessor &styler) {
1171 int pos = styler.LineStart(line);
1172 int eol_pos = styler.LineStart(line + 1) - 1;
1173 for (int i = pos; i < eol_pos; i++) {
1174 char ch = styler[i];
1175 int style = styler.StyleAt(i);
1176 if (ch == '#' && style == SCE_PL_COMMENTLINE)
1177 return true;
1178 else if (!IsASpaceOrTab(ch))
1179 return false;
1180 }
1181 return false;
1182 }
1183
1184 static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[],
1185 Accessor &styler) {
1186 bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
1187 bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
1188 // Custom folding of POD and packages
1189
1190 // property fold.perl.pod
1191 // Enable folding Pod blocks when using the Perl lexer.
1192 bool foldPOD = styler.GetPropertyInt("fold.perl.pod", 1) != 0;
1193
1194 // property fold.perl.package
1195 // Enable folding packages when using the Perl lexer.
1196 bool foldPackage = styler.GetPropertyInt("fold.perl.package", 1) != 0;
1197
1198 unsigned int endPos = startPos + length;
1199 int visibleChars = 0;
1200 int lineCurrent = styler.GetLine(startPos);
1201 int levelPrev = SC_FOLDLEVELBASE;
1202 if (lineCurrent > 0)
1203 levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
1204 int levelCurrent = levelPrev;
1205 char chNext = styler[startPos];
1206 char chPrev = styler.SafeGetCharAt(startPos - 1);
1207 int styleNext = styler.StyleAt(startPos);
1208 // Used at end of line to determine if the line was a package definition
1209 bool isPackageLine = false;
1210 bool isPodHeading = false;
1211 for (unsigned int i = startPos; i < endPos; i++) {
1212 char ch = chNext;
1213 chNext = styler.SafeGetCharAt(i + 1);
1214 int style = styleNext;
1215 styleNext = styler.StyleAt(i + 1);
1216 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1217 bool atLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0;
1218 // Comment folding
1219 if (foldComment && atEOL && IsCommentLine(lineCurrent, styler))
1220 {
1221 if (!IsCommentLine(lineCurrent - 1, styler)
1222 && IsCommentLine(lineCurrent + 1, styler))
1223 levelCurrent++;
1224 else if (IsCommentLine(lineCurrent - 1, styler)
1225 && !IsCommentLine(lineCurrent+1, styler))
1226 levelCurrent--;
1227 }
1228 if (style == SCE_PL_OPERATOR) {
1229 if (ch == '{') {
1230 levelCurrent++;
1231 } else if (ch == '}') {
1232 levelCurrent--;
1233 }
1234 }
1235 // Custom POD folding
1236 if (foldPOD && atLineStart) {
1237 int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT;
1238 if (style == SCE_PL_POD) {
1239 if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
1240 levelCurrent++;
1241 else if (styler.Match(i, "=cut"))
1242 levelCurrent--;
1243 else if (styler.Match(i, "=head"))
1244 isPodHeading = true;
1245 } else if (style == SCE_PL_DATASECTION) {
1246 if (ch == '=' && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
1247 levelCurrent++;
1248 else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
1249 levelCurrent--;
1250 else if (styler.Match(i, "=head"))
1251 isPodHeading = true;
1252 // if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1253 // reset needed as level test is vs. SC_FOLDLEVELBASE
1254 else if (styler.Match(i, "__END__"))
1255 levelCurrent = SC_FOLDLEVELBASE;
1256 }
1257 }
1258 // Custom package folding
1259 if (foldPackage && atLineStart) {
1260 if (style == SCE_PL_WORD && styler.Match(i, "package")) {
1261 isPackageLine = true;
1262 }
1263 }
1264
1265 if (atEOL) {
1266 int lev = levelPrev;
1267 if (isPodHeading) {
1268 lev = levelPrev - 1;
1269 lev |= SC_FOLDLEVELHEADERFLAG;
1270 isPodHeading = false;
1271 }
1272 // Check if line was a package declaration
1273 // because packages need "special" treatment
1274 if (isPackageLine) {
1275 lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
1276 levelCurrent = SC_FOLDLEVELBASE + 1;
1277 isPackageLine = false;
1278 }
1279 lev |= levelCurrent << 16;
1280 if (visibleChars == 0 && foldCompact)
1281 lev |= SC_FOLDLEVELWHITEFLAG;
1282 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1283 lev |= SC_FOLDLEVELHEADERFLAG;
1284 if (lev != styler.LevelAt(lineCurrent)) {
1285 styler.SetLevel(lineCurrent, lev);
1286 }
1287 lineCurrent++;
1288 levelPrev = levelCurrent;
1289 visibleChars = 0;
1290 }
1291 if (!isspacechar(ch))
1292 visibleChars++;
1293 chPrev = ch;
1294 }
1295 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1296 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1297 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1298 }
1299
1300 static const char * const perlWordListDesc[] = {
1301 "Keywords",
1302 0
1303 };
1304
1305 LexerModule lmPerl(SCLEX_PERL, ColourisePerlDoc, "perl", FoldPerlDoc, perlWordListDesc, 8);