]>
git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/src/LexPerl.cxx
1 // Scintilla source code edit control
3 ** Lexer for subset of Perl.
5 // Copyright 1998-2007 by Neil Hodgson <neilh@scintilla.org>
6 // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
7 // The License.txt file describes the conditions under which this software may be distributed.
20 #include "Scintilla.h"
24 using namespace Scintilla
;
27 #define PERLNUM_BINARY 1 // order is significant: 1-4 cannot have a dot
29 #define PERLNUM_OCTAL 3
30 #define PERLNUM_FLOAT 4 // actually exponent part
31 #define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
32 #define PERLNUM_VECTOR 6
33 #define PERLNUM_V_VECTOR 7
36 #define BACK_NONE 0 // lookback state for bareword disambiguation:
37 #define BACK_OPERATOR 1 // whitespace/comments are insignificant
38 #define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation
40 #define HERE_DELIM_MAX 256
42 static inline bool isEOLChar(char ch
) {
43 return (ch
== '\r') || (ch
== '\n');
46 static bool isSingleCharOp(char ch
) {
50 return (NULL
!= strstr("rwxoRWXOezsfdlpSbctugkTBMAC", strCharSet
));
53 static inline bool isPerlOperator(char ch
) {
54 if (ch
== '^' || ch
== '&' || ch
== '\\' ||
55 ch
== '(' || ch
== ')' || ch
== '-' || ch
== '+' ||
56 ch
== '=' || ch
== '|' || ch
== '{' || ch
== '}' ||
57 ch
== '[' || ch
== ']' || ch
== ':' || ch
== ';' ||
58 ch
== '>' || ch
== ',' ||
59 ch
== '?' || ch
== '!' || ch
== '.' || ch
== '~')
61 // these chars are already tested before this call
62 // ch == '%' || ch == '*' || ch == '<' || ch == '/' ||
66 static bool isPerlKeyword(unsigned int start
, unsigned int end
, WordList
&keywords
, Accessor
&styler
) {
68 unsigned int i
, len
= end
- start
;
69 if (len
> 30) { len
= 30; }
70 for (i
= 0; i
< len
; i
++, start
++) s
[i
] = styler
[start
];
72 return keywords
.InList(s
);
75 // Note: as lexer uses chars, UTF-8 bytes are considered as <0 values
76 // Note: iswordchar() was used in only one place in LexPerl, it is
77 // unnecessary as '.' is processed as the concatenation operator, so
78 // only isWordStart() is used in LexPerl
80 static inline bool isWordStart(char ch
) {
81 return !isascii(ch
) || isalnum(ch
) || ch
== '_';
84 static inline bool isEndVar(char ch
) {
85 return isascii(ch
) && !isalnum(ch
) && ch
!= '#' && ch
!= '$' &&
86 ch
!= '_' && ch
!= '\'';
89 static inline bool isNonQuote(char ch
) {
90 return !isascii(ch
) || isalnum(ch
) || ch
== '_';
93 static inline char actualNumStyle(int numberStyle
) {
94 if (numberStyle
== PERLNUM_VECTOR
|| numberStyle
== PERLNUM_V_VECTOR
) {
96 } else if (numberStyle
== PERLNUM_BAD
) {
102 static bool isMatch(Accessor
&styler
, int lengthDoc
, int pos
, const char *val
) {
103 if ((pos
+ static_cast<int>(strlen(val
))) >= lengthDoc
) {
107 if (*val
!= styler
[pos
++]) {
115 static char opposite(char ch
) {
127 static void ColourisePerlDoc(unsigned int startPos
, int length
, int initStyle
,
128 WordList
*keywordlists
[], Accessor
&styler
) {
130 // Lexer for perl often has to backtrack to start of current style to determine
131 // which characters are being used as quotes, how deeply nested is the
132 // start position and what the termination string is for here documents
134 WordList
&keywords
= *keywordlists
[0];
136 // keywords that forces /PATTERN/ at all times
138 reWords
.Set("elsif if split while");
142 int State
; // 0: '<<' encountered
143 // 1: collect the delimiter
144 // 2: here doc text (lines after the delimiter)
145 char Quote
; // the char after '<<'
146 bool Quoted
; // true if Quote in ('\'','"','`')
147 int DelimiterLength
; // strlen(Delimiter)
148 char *Delimiter
; // the Delimiter, 256: sizeof PL_tokenbuf
154 Delimiter
= new char[HERE_DELIM_MAX
];
161 HereDocCls HereDoc
; // TODO: FIFO for stacked here-docs
186 int state
= initStyle
;
187 char numState
= PERLNUM_DECIMAL
;
189 unsigned int lengthDoc
= startPos
+ length
;
190 //int sookedpos = 0; // these have no apparent use, see POD state
192 //sooked[sookedpos] = '\0';
194 styler
.StartAt(startPos
, static_cast<char>(STYLE_MAX
));
195 // If in a long distance lexical state, seek to the beginning to find quote characters
196 // Perl strings can be multi-line with embedded newlines, so backtrack.
197 // Perl numbers have additional state during lexing, so backtrack too.
198 if (state
== SCE_PL_HERE_Q
|| state
== SCE_PL_HERE_QQ
|| state
== SCE_PL_HERE_QX
) {
199 while ((startPos
> 1) && (styler
.StyleAt(startPos
) != SCE_PL_HERE_DELIM
)) {
202 startPos
= styler
.LineStart(styler
.GetLine(startPos
));
203 state
= styler
.StyleAt(startPos
- 1);
205 // Backtrack for format body.
206 if (state
== SCE_PL_FORMAT
) {
207 while ((startPos
> 1) && (styler
.StyleAt(startPos
) != SCE_PL_FORMAT_IDENT
)) {
210 startPos
= styler
.LineStart(styler
.GetLine(startPos
));
211 state
= styler
.StyleAt(startPos
- 1);
213 if ( state
== SCE_PL_STRING_Q
214 || state
== SCE_PL_STRING_QQ
215 || state
== SCE_PL_STRING_QX
216 || state
== SCE_PL_STRING_QR
217 || state
== SCE_PL_STRING_QW
218 || state
== SCE_PL_REGEX
219 || state
== SCE_PL_REGSUBST
220 || state
== SCE_PL_STRING
221 || state
== SCE_PL_BACKTICKS
222 || state
== SCE_PL_CHARACTER
223 || state
== SCE_PL_NUMBER
224 || state
== SCE_PL_IDENTIFIER
225 || state
== SCE_PL_ERROR
226 || state
== SCE_PL_SUB_PROTOTYPE
228 while ((startPos
> 1) && (styler
.StyleAt(startPos
- 1) == state
)) {
231 state
= SCE_PL_DEFAULT
;
234 // lookback at start of lexing to set proper state for backflag
235 // after this, they are updated when elements are lexed
236 int backflag
= BACK_NONE
;
237 unsigned int backPos
= startPos
;
240 int sty
= SCE_PL_DEFAULT
;
241 while ((backPos
> 0) && (sty
= styler
.StyleAt(backPos
),
242 sty
== SCE_PL_DEFAULT
|| sty
== SCE_PL_COMMENTLINE
))
244 if (sty
== SCE_PL_OPERATOR
)
245 backflag
= BACK_OPERATOR
;
246 else if (sty
== SCE_PL_WORD
)
247 backflag
= BACK_KEYWORD
;
250 styler
.StartAt(startPos
, static_cast<char>(STYLE_MAX
));
251 char chPrev
= styler
.SafeGetCharAt(startPos
- 1);
254 char chNext
= styler
[startPos
];
255 styler
.StartSegment(startPos
);
257 for (unsigned int i
= startPos
; i
< lengthDoc
; i
++) {
259 // if the current character is not consumed due to the completion of an
260 // earlier style, lexing can be restarted via a simple goto
262 chNext
= styler
.SafeGetCharAt(i
+ 1);
263 char chNext2
= styler
.SafeGetCharAt(i
+ 2);
265 if (styler
.IsLeadByte(ch
)) {
266 chNext
= styler
.SafeGetCharAt(i
+ 2);
271 if ((chPrev
== '\r' && ch
== '\n')) { // skip on DOS/Windows
272 styler
.ColourTo(i
, state
);
277 if (HereDoc
.State
== 1 && isEOLChar(ch
)) {
278 // Begin of here-doc (the line after the here-doc delimiter):
279 // Lexically, the here-doc starts from the next line after the >>, but the
280 // first line of here-doc seem to follow the style of the last EOL sequence
282 if (HereDoc
.Quoted
) {
283 if (state
== SCE_PL_HERE_DELIM
) {
284 // Missing quote at end of string! We are stricter than perl.
285 // Colour here-doc anyway while marking this bit as an error.
286 state
= SCE_PL_ERROR
;
288 styler
.ColourTo(i
- 1, state
);
289 switch (HereDoc
.Quote
) {
291 state
= SCE_PL_HERE_Q
;
294 state
= SCE_PL_HERE_QQ
;
297 state
= SCE_PL_HERE_QX
;
301 styler
.ColourTo(i
- 1, state
);
302 switch (HereDoc
.Quote
) {
304 state
= SCE_PL_HERE_Q
;
307 state
= SCE_PL_HERE_QQ
;
311 if (HereDoc
.State
== 4 && isEOLChar(ch
)) {
312 // Start of format body.
314 styler
.ColourTo(i
- 1, state
);
315 state
= SCE_PL_FORMAT
;
318 if (state
== SCE_PL_DEFAULT
) {
319 if ((isascii(ch
) && isdigit(ch
)) || (isascii(chNext
) && isdigit(chNext
) &&
320 (ch
== '.' || ch
== 'v'))) {
321 state
= SCE_PL_NUMBER
;
322 backflag
= BACK_NONE
;
323 numState
= PERLNUM_DECIMAL
;
325 if (ch
== '0') { // hex,bin,octal
327 numState
= PERLNUM_HEX
;
328 } else if (chNext
== 'b') {
329 numState
= PERLNUM_BINARY
;
330 } else if (isascii(chNext
) && isdigit(chNext
)) {
331 numState
= PERLNUM_OCTAL
;
333 if (numState
!= PERLNUM_DECIMAL
) {
338 } else if (ch
== 'v') { // vector
339 numState
= PERLNUM_V_VECTOR
;
341 } else if (isWordStart(ch
)) {
342 // if immediately prefixed by '::', always a bareword
344 if (chPrev
== ':' && styler
.SafeGetCharAt(i
- 2) == ':') {
345 state
= SCE_PL_IDENTIFIER
;
347 unsigned int kw
= i
+ 1;
348 // first check for possible quote-like delimiter
349 if (ch
== 's' && !isNonQuote(chNext
)) {
350 state
= SCE_PL_REGSUBST
;
352 } else if (ch
== 'm' && !isNonQuote(chNext
)) {
353 state
= SCE_PL_REGEX
;
355 } else if (ch
== 'q' && !isNonQuote(chNext
)) {
356 state
= SCE_PL_STRING_Q
;
358 } else if (ch
== 'y' && !isNonQuote(chNext
)) {
359 state
= SCE_PL_REGSUBST
;
361 } else if (ch
== 't' && chNext
== 'r' && !isNonQuote(chNext2
)) {
362 state
= SCE_PL_REGSUBST
;
365 } else if (ch
== 'q' && (chNext
== 'q' || chNext
== 'r' || chNext
== 'w' || chNext
== 'x') && !isNonQuote(chNext2
)) {
366 if (chNext
== 'q') state
= SCE_PL_STRING_QQ
;
367 else if (chNext
== 'x') state
= SCE_PL_STRING_QX
;
368 else if (chNext
== 'r') state
= SCE_PL_STRING_QR
;
369 else if (chNext
== 'w') state
= SCE_PL_STRING_QW
;
372 } else if (ch
== 'x' && (chNext
== '=' || // repetition
373 !isWordStart(chNext
) ||
374 (isdigit(chPrev
) && isdigit(chNext
)))) {
375 state
= SCE_PL_OPERATOR
;
377 // if potentially a keyword, scan forward and grab word, then check
378 // if it's really one; if yes, disambiguation test is performed
379 // otherwise it is always a bareword and we skip a lot of scanning
380 // note: keywords assumed to be limited to [_a-zA-Z] only
381 if (state
== SCE_PL_WORD
) {
382 while (isWordStart(styler
.SafeGetCharAt(kw
))) kw
++;
383 if (!isPerlKeyword(styler
.GetStartSegment(), kw
, keywords
, styler
)) {
384 state
= SCE_PL_IDENTIFIER
;
387 // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
388 // for quote-like delimiters/keywords, attempt to disambiguate
389 // to select for bareword, change state -> SCE_PL_IDENTIFIER
390 if (state
!= SCE_PL_IDENTIFIER
&& i
> 0) {
392 bool moreback
= false; // true if passed newline/comments
393 bool brace
= false; // true if opening brace found
395 // first look backwards past whitespace/comments for EOLs
396 // if BACK_NONE, neither operator nor keyword, so skip test
397 if (backflag
!= BACK_NONE
) {
398 while (--j
> backPos
) {
399 if (isEOLChar(styler
.SafeGetCharAt(j
)))
402 ch2
= styler
.SafeGetCharAt(j
);
403 if (ch2
== '{' && !moreback
) {
404 // {bareword: possible variable spec
406 } else if ((ch2
== '&' && styler
.SafeGetCharAt(j
- 1) != '&')
407 // &bareword: subroutine call
408 || (ch2
== '>' && styler
.SafeGetCharAt(j
- 1) == '-')
409 // ->bareword: part of variable spec
410 || (ch2
== 'b' && styler
.Match(j
- 2, "su"))) {
411 // sub bareword: subroutine declaration
412 // (implied BACK_KEYWORD, no keywords end in 'sub'!)
413 state
= SCE_PL_IDENTIFIER
;
415 // if status still ambiguous, look forward after word past
416 // tabs/spaces only; if ch2 isn't one of '[{(,' it can never
417 // match anything, so skip the whole thing
419 if (state
!= SCE_PL_IDENTIFIER
420 && (ch2
== '{' || ch2
== '(' || ch2
== '['|| ch2
== ',')
422 while (ch2
= styler
.SafeGetCharAt(j
),
423 (ch2
== ' ' || ch2
== '\t') && j
< lengthDoc
) {
426 if ((ch2
== '}' && brace
)
427 // {bareword}: variable spec
428 || (ch2
== '=' && styler
.SafeGetCharAt(j
+ 1) == '>')) {
429 // [{(, bareword=>: hash literal
430 state
= SCE_PL_IDENTIFIER
;
435 backflag
= BACK_NONE
;
436 // an identifier or bareword
437 if (state
== SCE_PL_IDENTIFIER
) {
438 if ((!isWordStart(chNext
) && chNext
!= '\'')
439 || (chNext
== '.' && chNext2
== '.')) {
440 // We need that if length of word == 1!
441 // This test is copied from the SCE_PL_WORD handler.
442 styler
.ColourTo(i
, SCE_PL_IDENTIFIER
);
443 state
= SCE_PL_DEFAULT
;
446 } else if (state
== SCE_PL_WORD
) {
448 if (ch
== '_' && chNext
== '_' &&
449 (isMatch(styler
, lengthDoc
, styler
.GetStartSegment(), "__DATA__")
450 || isMatch(styler
, lengthDoc
, styler
.GetStartSegment(), "__END__"))) {
451 styler
.ColourTo(i
, SCE_PL_DATASECTION
);
452 state
= SCE_PL_DATASECTION
;
454 if (isMatch(styler
, lengthDoc
, styler
.GetStartSegment(), "format")) {
455 state
= SCE_PL_FORMAT_IDENT
;
458 state
= SCE_PL_DEFAULT
;
460 styler
.ColourTo(i
, SCE_PL_WORD
);
461 backflag
= BACK_KEYWORD
;
464 ch
= styler
.SafeGetCharAt(i
);
465 chNext
= styler
.SafeGetCharAt(i
+ 1);
466 // a repetition operator 'x'
467 } else if (state
== SCE_PL_OPERATOR
) {
468 state
= SCE_PL_DEFAULT
;
470 // quote-like delimiter, skip one char if double-char delimiter
473 chNext
= styler
.SafeGetCharAt(i
+ 1);
475 } else if (ch
== '#') {
476 state
= SCE_PL_COMMENTLINE
;
477 } else if (ch
== '\"') {
478 state
= SCE_PL_STRING
;
481 backflag
= BACK_NONE
;
482 } else if (ch
== '\'') {
485 styler
.ColourTo(i
, state
);
487 state
= SCE_PL_CHARACTER
;
491 backflag
= BACK_NONE
;
492 } else if (ch
== '`') {
493 state
= SCE_PL_BACKTICKS
;
496 backflag
= BACK_NONE
;
497 } else if (ch
== '$') {
498 if ((chNext
== '{') || isspacechar(chNext
)) {
499 styler
.ColourTo(i
, SCE_PL_SCALAR
);
501 state
= SCE_PL_SCALAR
;
502 if ((chNext
== '`' && chNext2
== '`')
503 || (chNext
== ':' && chNext2
== ':')) {
505 ch
= styler
.SafeGetCharAt(i
);
506 chNext
= styler
.SafeGetCharAt(i
+ 1);
513 backflag
= BACK_NONE
;
514 } else if (ch
== '@') {
515 if (!isascii(chNext
) || isalpha(chNext
) || chNext
== '#' || chNext
== '$'
516 || chNext
== '_' || chNext
== '+' || chNext
== '-') {
517 state
= SCE_PL_ARRAY
;
518 } else if (chNext
== ':' && chNext2
== ':') {
519 state
= SCE_PL_ARRAY
;
521 ch
= styler
.SafeGetCharAt(i
);
522 chNext
= styler
.SafeGetCharAt(i
+ 1);
523 } else if (chNext
!= '{' && chNext
!= '[') {
524 styler
.ColourTo(i
, SCE_PL_ARRAY
);
526 styler
.ColourTo(i
, SCE_PL_ARRAY
);
528 backflag
= BACK_NONE
;
529 } else if (ch
== '%') {
530 backflag
= BACK_NONE
;
531 if (!isascii(chNext
) || isalpha(chNext
) || chNext
== '#' || chNext
== '$'
532 || chNext
== '_' || chNext
== '!' || chNext
== '^') {
537 } else if (chNext
== ':' && chNext2
== ':') {
540 ch
= styler
.SafeGetCharAt(i
);
541 chNext
= styler
.SafeGetCharAt(i
+ 1);
542 } else if (chNext
== '{') {
543 styler
.ColourTo(i
, SCE_PL_HASH
);
547 } else if (ch
== '*') {
548 backflag
= BACK_NONE
;
552 if (chNext
== ':' && chNext2
== ':') {
553 state
= SCE_PL_SYMBOLTABLE
;
555 ch
= styler
.SafeGetCharAt(i
);
556 chNext
= styler
.SafeGetCharAt(i
+ 1);
557 } else if (!isascii(chNext
) || isalpha(chNext
) || chNext
== '_'
558 || NULL
!= strstr("^/|,\\\";#%^:?<>)[]", strch
)) {
559 state
= SCE_PL_SYMBOLTABLE
;
563 } else if (chNext
== '{') {
564 styler
.ColourTo(i
, SCE_PL_SYMBOLTABLE
);
566 if (chNext
== '*') { // exponentiation
573 } else if (ch
== '/' || (ch
== '<' && chNext
== '<')) {
574 // Explicit backward peeking to set a consistent preferRE for
575 // any slash found, so no longer need to track preferRE state.
576 // Find first previous significant lexed element and interpret.
577 // Test for HERE doc start '<<' shares this code, helps to
578 // determine if it should be an operator.
579 bool preferRE
= false;
580 bool isHereDoc
= (ch
== '<');
581 bool hereDocSpace
= false; // these are for corner case:
582 bool hereDocScalar
= false; // SCALAR [whitespace] '<<'
583 unsigned int bk
= (i
> 0)? i
- 1: 0;
587 if (styler
.StyleAt(bk
) == SCE_PL_DEFAULT
)
589 while ((bk
> 0) && (styler
.StyleAt(bk
) == SCE_PL_DEFAULT
||
590 styler
.StyleAt(bk
) == SCE_PL_COMMENTLINE
)) {
594 // position 0 won't really be checked; rarely happens
595 // hard to fix due to an unsigned index i
598 int bkstyle
= styler
.StyleAt(bk
);
599 bkch
= styler
.SafeGetCharAt(bk
);
601 case SCE_PL_OPERATOR
:
603 if (bkch
== ')' || bkch
== ']') {
605 } else if (bkch
== '}') {
606 // backtrack further, count balanced brace pairs
607 // if a brace pair found, see if it's a variable
610 bkstyle
= styler
.StyleAt(bk
);
611 if (bkstyle
== SCE_PL_OPERATOR
) {
612 bkch
= styler
.SafeGetCharAt(bk
);
613 if (bkch
== ';') { // early out
615 } else if (bkch
== '}') {
617 } else if (bkch
== '{') {
618 if (--braceCount
== 0)
624 // at beginning, true
625 } else if (braceCount
== 0) {
626 // balanced { found, bk>0, skip more whitespace
627 if (styler
.StyleAt(--bk
) == SCE_PL_DEFAULT
) {
629 bkstyle
= styler
.StyleAt(--bk
);
630 if (bkstyle
!= SCE_PL_DEFAULT
)
634 bkstyle
= styler
.StyleAt(bk
);
635 if (bkstyle
== SCE_PL_SCALAR
636 || bkstyle
== SCE_PL_ARRAY
637 || bkstyle
== SCE_PL_HASH
638 || bkstyle
== SCE_PL_SYMBOLTABLE
639 || bkstyle
== SCE_PL_OPERATOR
) {
645 case SCE_PL_IDENTIFIER
:
647 if (bkch
== '>') { // inputsymbol
651 // backtrack to find "->" or "::" before identifier
652 while (bk
> 0 && styler
.StyleAt(bk
) == SCE_PL_IDENTIFIER
) {
656 bkstyle
= styler
.StyleAt(bk
);
657 if (bkstyle
== SCE_PL_DEFAULT
||
658 bkstyle
== SCE_PL_COMMENTLINE
) {
659 } else if (bkstyle
== SCE_PL_OPERATOR
) {
660 bkch
= styler
.SafeGetCharAt(bk
);
661 // test for "->" and "::"
662 if ((bkch
== '>' && styler
.SafeGetCharAt(bk
- 1) == '-')
663 || (bkch
== ':' && styler
.SafeGetCharAt(bk
- 1) == ':')) {
668 // bare identifier, if '/', /PATTERN/ unless digit/space immediately after '/'
670 (isspacechar(chNext
) || isdigit(chNext
)))
672 // HERE docs cannot have a space after the >>
673 if (isspacechar(chNext
))
680 case SCE_PL_SCALAR
: // for $var<< case
681 hereDocScalar
= true;
683 // for HERE docs, always true for preferRE
688 // adopt heuristics similar to vim-style rules:
689 // keywords always forced as /PATTERN/: split, if, elsif, while
690 // everything else /PATTERN/ unless digit/space immediately after '/'
692 while (bk
> 0 && styler
.StyleAt(bk
-1) == SCE_PL_WORD
) {
695 if (isPerlKeyword(bk
, bkend
, reWords
, styler
))
697 if (isspacechar(chNext
) || isdigit(chNext
))
700 // other styles uses the default, preferRE=false
702 case SCE_PL_POD_VERB
:
710 backflag
= BACK_NONE
;
711 if (isHereDoc
) { // handle HERE doc
712 // if SCALAR whitespace '<<', *always* a HERE doc
713 if (preferRE
|| (hereDocSpace
&& hereDocScalar
)) {
714 state
= SCE_PL_HERE_DELIM
;
716 } else { // << operator
722 } else { // handle regexp
724 state
= SCE_PL_REGEX
;
727 } else { // / operator
731 } else if (ch
== '<') {
732 // looks forward for matching > on same line
733 unsigned int fw
= i
+ 1;
734 while (fw
< lengthDoc
) {
735 char fwch
= styler
.SafeGetCharAt(fw
);
737 if (styler
.SafeGetCharAt(fw
-1) != '\\' ||
738 styler
.SafeGetCharAt(fw
-2) != '\\')
740 } else if (isEOLChar(fwch
) || isspacechar(fwch
)) {
742 } else if (fwch
== '>') {
743 if ((fw
- i
) == 2 && // '<=>' case
744 styler
.SafeGetCharAt(fw
-1) == '=') {
747 styler
.ColourTo(fw
, SCE_PL_IDENTIFIER
);
750 chNext
= styler
.SafeGetCharAt(i
+1);
756 } else if (ch
== '=' // POD
758 && (isEOLChar(chPrev
))) {
760 backflag
= BACK_NONE
;
762 //sooked[sookedpos] = '\0';
763 } else if (ch
== '-' // file test operators
764 && isSingleCharOp(chNext
)
765 && !isalnum((chNext2
= styler
.SafeGetCharAt(i
+2)))) {
766 styler
.ColourTo(i
+ 1, SCE_PL_WORD
);
767 state
= SCE_PL_DEFAULT
;
771 backflag
= BACK_NONE
;
772 } else if (ch
== '-' // bareword promotion (-FOO cases)
773 && ((isascii(chNext
) && isalpha(chNext
)) || chNext
== '_')
774 && backflag
!= BACK_NONE
) {
775 state
= SCE_PL_IDENTIFIER
;
776 backflag
= BACK_NONE
;
777 } else if (ch
== '(' && i
> 0) {
778 // backtrack to identify if we're starting a sub prototype
779 // for generality, we need to ignore whitespace/comments
780 unsigned int bk
= i
- 1; // i > 0 tested above
782 while (bk
> 0 && (styler
.StyleAt(bk
) == SCE_PL_DEFAULT
||
783 styler
.StyleAt(bk
) == SCE_PL_COMMENTLINE
)) {
786 if (bk
== 0 || styler
.StyleAt(bk
) != SCE_PL_IDENTIFIER
) // check identifier
788 while (bk
> 0 && (styler
.StyleAt(bk
) == SCE_PL_IDENTIFIER
)) {
791 while (bk
> 0 && (styler
.StyleAt(bk
) == SCE_PL_DEFAULT
||
792 styler
.StyleAt(bk
) == SCE_PL_COMMENTLINE
)) {
795 if (bk
< 2 || styler
.StyleAt(bk
) != SCE_PL_WORD
// check "sub" keyword
796 || !styler
.Match(bk
- 2, "sub")) // assume suffix is unique!
798 state
= SCE_PL_SUB_PROTOTYPE
;
799 backflag
= BACK_NONE
;
800 backPos
= i
; // needed for restart
801 } else if (isPerlOperator(ch
)) {
802 if (ch
== '.' && chNext
== '.') { // .. and ...
804 if (chNext2
== '.') { i
++; }
805 state
= SCE_PL_DEFAULT
;
806 ch
= styler
.SafeGetCharAt(i
);
807 chNext
= styler
.SafeGetCharAt(i
+ 1);
810 styler
.ColourTo(i
, SCE_PL_OPERATOR
);
811 backflag
= BACK_OPERATOR
;
813 } else if (ch
== 4 || ch
== 26) { // ^D and ^Z ends valid perl source
814 styler
.ColourTo(i
, SCE_PL_DATASECTION
);
815 state
= SCE_PL_DATASECTION
;
817 // keep colouring defaults to make restart easier
818 styler
.ColourTo(i
, SCE_PL_DEFAULT
);
820 } else if (state
== SCE_PL_NUMBER
) {
823 // double dot is always an operator
825 } else if (numState
<= PERLNUM_FLOAT
) {
826 // non-decimal number or float exponent, consume next dot
827 styler
.ColourTo(i
- 1, SCE_PL_NUMBER
);
828 state
= SCE_PL_DEFAULT
;
830 } else { // decimal or vectors allows dots
832 if (numState
== PERLNUM_DECIMAL
) {
834 if (isdigit(chNext
)) { // really a vector
835 numState
= PERLNUM_VECTOR
;
836 } else // number then dot
840 if (!isdigit(chNext
)) // vector then dot
844 } else if (ch
== '_') {
845 // permissive underscoring for number and vector literals
846 } else if (!isascii(ch
) || isalnum(ch
)) {
847 if (numState
== PERLNUM_VECTOR
|| numState
== PERLNUM_V_VECTOR
) {
848 if (!isascii(ch
) || isalpha(ch
)) {
849 if (dotCount
== 0) { // change to word
850 state
= SCE_PL_IDENTIFIER
;
851 } else { // vector then word
855 } else if (numState
== PERLNUM_DECIMAL
) {
856 if (ch
== 'E' || ch
== 'e') { // exponent
857 numState
= PERLNUM_FLOAT
;
858 if (chNext
== '+' || chNext
== '-') {
863 } else if (!isascii(ch
) || !isdigit(ch
)) { // number then word
866 } else if (numState
== PERLNUM_FLOAT
) {
867 if (!isdigit(ch
)) { // float then word
870 } else if (numState
== PERLNUM_OCTAL
) {
874 numState
= PERLNUM_BAD
;
875 } else if (numState
== PERLNUM_BINARY
) {
879 numState
= PERLNUM_BAD
;
880 } else if (numState
== PERLNUM_HEX
) {
881 int ch2
= toupper(ch
);
882 if (!isdigit(ch
) && !(ch2
>= 'A' && ch2
<= 'F'))
884 } else {//(numState == PERLNUM_BAD) {
889 // complete current number or vector
891 styler
.ColourTo(i
- 1, actualNumStyle(numState
));
892 state
= SCE_PL_DEFAULT
;
895 } else if (state
== SCE_PL_IDENTIFIER
) {
896 if (!isWordStart(chNext
) && chNext
!= '\'') {
897 styler
.ColourTo(i
, SCE_PL_IDENTIFIER
);
898 state
= SCE_PL_DEFAULT
;
902 if (state
== SCE_PL_COMMENTLINE
) {
904 styler
.ColourTo(i
- 1, state
);
905 state
= SCE_PL_DEFAULT
;
907 } else if (isEOLChar(chNext
)) {
908 styler
.ColourTo(i
, state
);
909 state
= SCE_PL_DEFAULT
;
911 } else if (state
== SCE_PL_HERE_DELIM
) {
913 // From perldata.pod:
914 // ------------------
915 // A line-oriented form of quoting is based on the shell ``here-doc''
917 // Following a << you specify a string to terminate the quoted material,
918 // and all lines following the current line down to the terminating
919 // string are the value of the item.
920 // The terminating string may be either an identifier (a word),
921 // or some quoted text.
922 // If quoted, the type of quotes you use determines the treatment of
923 // the text, just as in regular quoting.
924 // An unquoted identifier works like double quotes.
925 // There must be no space between the << and the identifier.
926 // (If you put a space it will be treated as a null identifier,
927 // which is valid, and matches the first empty line.)
928 // (This is deprecated, -w warns of this syntax)
929 // The terminating string must appear by itself (unquoted and with no
930 // surrounding whitespace) on the terminating line.
934 // Specifier format is: <<[-]WORD
935 // Optional '-' is for removal of leading tabs from here-doc.
936 // Whitespace acceptable after <<[-] operator.
938 if (HereDoc
.State
== 0) { // '<<' encountered
939 bool gotspace
= false;
940 unsigned int oldi
= i
;
941 if (chNext
== ' ' || chNext
== '\t') {
942 // skip whitespace; legal for quoted delimiters
946 chNext
= styler
.SafeGetCharAt(i
+ 1);
947 } while ((i
+ 1 < lengthDoc
) && (chNext
== ' ' || chNext
== '\t'));
948 chNext2
= styler
.SafeGetCharAt(i
+ 2);
951 HereDoc
.Quote
= chNext
;
952 HereDoc
.Quoted
= false;
953 HereDoc
.DelimiterLength
= 0;
954 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
955 if (chNext
== '\'' || chNext
== '"' || chNext
== '`') {
956 // a quoted here-doc delimiter
960 HereDoc
.Quoted
= true;
961 } else if (isspacechar(chNext
) || isdigit(chNext
) || chNext
== '\\'
962 || chNext
== '=' || chNext
== '$' || chNext
== '@'
963 || ((isalpha(chNext
) || chNext
== '_') && gotspace
)) {
964 // left shift << or <<= operator cases
965 // restore position if operator
967 styler
.ColourTo(i
, SCE_PL_OPERATOR
);
968 state
= SCE_PL_DEFAULT
;
969 backflag
= BACK_OPERATOR
;
974 // an unquoted here-doc delimiter, no special handling
975 // (cannot be prefixed by spaces/tabs), or
976 // symbols terminates; deprecated zero-length delimiter
979 } else if (HereDoc
.State
== 1) { // collect the delimiter
980 backflag
= BACK_NONE
;
981 if (HereDoc
.Quoted
) { // a quoted here-doc delimiter
982 if (ch
== HereDoc
.Quote
) { // closing quote => end of delimiter
983 styler
.ColourTo(i
, state
);
984 state
= SCE_PL_DEFAULT
;
986 if (ch
== '\\' && chNext
== HereDoc
.Quote
) { // escaped quote
991 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
++] = ch
;
992 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
994 } else { // an unquoted here-doc delimiter
995 if (isalnum(ch
) || ch
== '_') {
996 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
++] = ch
;
997 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
999 styler
.ColourTo(i
- 1, state
);
1000 state
= SCE_PL_DEFAULT
;
1004 if (HereDoc
.DelimiterLength
>= HERE_DELIM_MAX
- 1) {
1005 styler
.ColourTo(i
- 1, state
);
1006 state
= SCE_PL_ERROR
;
1010 } else if (HereDoc
.State
== 2) {
1011 // state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX
1012 if (isEOLChar(chPrev
) && isMatch(styler
, lengthDoc
, i
, HereDoc
.Delimiter
)) {
1013 i
+= HereDoc
.DelimiterLength
;
1014 chPrev
= styler
.SafeGetCharAt(i
- 1);
1015 ch
= styler
.SafeGetCharAt(i
);
1016 if (isEOLChar(ch
)) {
1017 styler
.ColourTo(i
- 1, state
);
1018 state
= SCE_PL_DEFAULT
;
1019 backflag
= BACK_NONE
;
1023 chNext
= styler
.SafeGetCharAt(i
+ 1);
1025 } else if (state
== SCE_PL_POD
1026 || state
== SCE_PL_POD_VERB
) {
1027 if (isEOLChar(chPrev
)) {
1028 if (ch
== ' ' || ch
== '\t') {
1029 styler
.ColourTo(i
- 1, state
);
1030 state
= SCE_PL_POD_VERB
;
1032 styler
.ColourTo(i
- 1, state
);
1035 if (isMatch(styler
, lengthDoc
, i
, "=cut")) {
1036 styler
.ColourTo(i
- 1 + 4, state
);
1038 state
= SCE_PL_DEFAULT
;
1039 ch
= styler
.SafeGetCharAt(i
);
1040 //chNext = styler.SafeGetCharAt(i + 1);
1046 } else if (state
== SCE_PL_SCALAR
// variable names
1047 || state
== SCE_PL_ARRAY
1048 || state
== SCE_PL_HASH
1049 || state
== SCE_PL_SYMBOLTABLE
) {
1050 if (ch
== ':' && chNext
== ':') { // skip ::
1055 else if (isEndVar(ch
)) {
1056 if (i
== (styler
.GetStartSegment() + 1)) {
1057 // Special variable: $(, $_ etc.
1058 styler
.ColourTo(i
, state
);
1059 state
= SCE_PL_DEFAULT
;
1061 styler
.ColourTo(i
- 1, state
);
1062 state
= SCE_PL_DEFAULT
;
1066 } else if (state
== SCE_PL_REGEX
1067 || state
== SCE_PL_STRING_QR
1069 if (!Quote
.Up
&& !isspacechar(ch
)) {
1071 } else if (ch
== '\\' && Quote
.Up
!= '\\') {
1072 // SG: Is it save to skip *every* escaped char?
1075 chNext
= styler
.SafeGetCharAt(i
+ 1);
1077 if (ch
== Quote
.Down
/*&& chPrev != '\\'*/) {
1079 if (Quote
.Count
== 0) {
1081 if (Quote
.Up
== Quote
.Down
) {
1085 if (!isalpha(chNext
)) {
1086 if (Quote
.Rep
<= 0) {
1087 styler
.ColourTo(i
, state
);
1088 state
= SCE_PL_DEFAULT
;
1092 } else if (ch
== Quote
.Up
/*&& chPrev != '\\'*/) {
1094 } else if (!isascii(chNext
) || !isalpha(chNext
)) {
1095 if (Quote
.Rep
<= 0) {
1096 styler
.ColourTo(i
, state
);
1097 state
= SCE_PL_DEFAULT
;
1102 } else if (state
== SCE_PL_REGSUBST
) {
1103 if (!Quote
.Up
&& !isspacechar(ch
)) {
1105 } else if (ch
== '\\' && Quote
.Up
!= '\\') {
1106 // SG: Is it save to skip *every* escaped char?
1109 chNext
= styler
.SafeGetCharAt(i
+ 1);
1111 if (Quote
.Count
== 0 && Quote
.Rep
== 1) {
1112 /* We matched something like s(...) or tr{...}
1113 * and are looking for the next matcher characters,
1114 * which could be either bracketed ({...}) or non-bracketed
1117 * Number-signs are problematic. If they occur after
1118 * the close of the first part, treat them like
1119 * a Quote.Up char, even if they actually start comments.
1121 * If we find an alnum, we end the regsubst, and punt.
1123 * Eric Promislow ericp@activestate.com Aug 9,2000
1125 if (isspacechar(ch
)) {
1128 else if (!isascii(ch
) || isalnum(ch
)) {
1129 styler
.ColourTo(i
, state
);
1130 state
= SCE_PL_DEFAULT
;
1135 } else if (ch
== Quote
.Down
/*&& chPrev != '\\'*/) {
1137 if (Quote
.Count
== 0) {
1140 if (!isascii(chNext
) || !isalpha(chNext
)) {
1141 if (Quote
.Rep
<= 0) {
1142 styler
.ColourTo(i
, state
);
1143 state
= SCE_PL_DEFAULT
;
1147 if (Quote
.Up
== Quote
.Down
) {
1150 } else if (ch
== Quote
.Up
/*&& chPrev != '\\'*/) {
1152 } else if (!isascii(chNext
) || !isalpha(chNext
)) {
1153 if (Quote
.Rep
<= 0) {
1154 styler
.ColourTo(i
, state
);
1155 state
= SCE_PL_DEFAULT
;
1160 } else if (state
== SCE_PL_STRING_Q
1161 || state
== SCE_PL_STRING_QQ
1162 || state
== SCE_PL_STRING_QX
1163 || state
== SCE_PL_STRING_QW
1164 || state
== SCE_PL_STRING
1165 || state
== SCE_PL_CHARACTER
1166 || state
== SCE_PL_BACKTICKS
1168 if (!Quote
.Down
&& !isspacechar(ch
)) {
1170 } else if (ch
== '\\' && Quote
.Up
!= '\\') {
1173 chNext
= styler
.SafeGetCharAt(i
+ 1);
1174 } else if (ch
== Quote
.Down
) {
1176 if (Quote
.Count
== 0) {
1178 if (Quote
.Rep
<= 0) {
1179 styler
.ColourTo(i
, state
);
1180 state
= SCE_PL_DEFAULT
;
1183 if (Quote
.Up
== Quote
.Down
) {
1187 } else if (ch
== Quote
.Up
) {
1190 } else if (state
== SCE_PL_SUB_PROTOTYPE
) {
1194 if (NULL
!= strstr("\\[$@%&*];", strch
)) {
1196 } else if (ch
== ')') {
1197 styler
.ColourTo(i
, state
);
1198 state
= SCE_PL_DEFAULT
;
1200 // abandon prototype, restart from '('
1202 styler
.ColourTo(i
, SCE_PL_OPERATOR
);
1203 ch
= styler
.SafeGetCharAt(i
);
1204 chNext
= styler
.SafeGetCharAt(i
+ 1);
1205 state
= SCE_PL_DEFAULT
;
1207 } else if (state
== SCE_PL_FORMAT_IDENT
) {
1208 // occupies different HereDoc states to avoid clashing with HERE docs
1209 if (HereDoc
.State
== 0) {
1210 if ((isascii(ch
) && isalpha(ch
)) || ch
== '_' // probable identifier
1211 || ch
== '=') { // no identifier
1213 HereDoc
.Quoted
= false; // whitespace flag
1214 } else if (ch
== ' ' || ch
== '\t') {
1215 styler
.ColourTo(i
, SCE_PL_DEFAULT
);
1217 state
= SCE_PL_DEFAULT
;
1222 if (HereDoc
.State
== 3) { // with just a '=', state goes 0->3->4
1224 styler
.ColourTo(i
, SCE_PL_FORMAT_IDENT
);
1225 state
= SCE_PL_DEFAULT
;
1227 } else if (ch
== ' ' || ch
== '\t') {
1228 HereDoc
.Quoted
= true;
1229 } else if (isEOLChar(ch
) || (HereDoc
.Quoted
&& ch
!= '=')) {
1230 // abandon format, restart from after 'format'
1232 ch
= styler
.SafeGetCharAt(i
);
1233 chNext
= styler
.SafeGetCharAt(i
+ 1);
1234 state
= SCE_PL_DEFAULT
;
1238 } else if (state
== SCE_PL_FORMAT
) {
1239 if (isEOLChar(chPrev
)) {
1240 styler
.ColourTo(i
- 1, state
);
1241 if (ch
== '.' && isEOLChar(chNext
)) {
1242 styler
.ColourTo(i
, state
);
1243 state
= SCE_PL_DEFAULT
;
1248 if (state
== SCE_PL_ERROR
) {
1253 styler
.ColourTo(lengthDoc
- 1, state
);
1256 static bool IsCommentLine(int line
, Accessor
&styler
) {
1257 int pos
= styler
.LineStart(line
);
1258 int eol_pos
= styler
.LineStart(line
+ 1) - 1;
1259 for (int i
= pos
; i
< eol_pos
; i
++) {
1260 char ch
= styler
[i
];
1261 int style
= styler
.StyleAt(i
);
1262 if (ch
== '#' && style
== SCE_PL_COMMENTLINE
)
1264 else if (ch
!= ' ' && ch
!= '\t')
1270 static void FoldPerlDoc(unsigned int startPos
, int length
, int, WordList
*[],
1272 bool foldComment
= styler
.GetPropertyInt("fold.comment") != 0;
1273 bool foldCompact
= styler
.GetPropertyInt("fold.compact", 1) != 0;
1274 // Custom folding of POD and packages
1275 bool foldPOD
= styler
.GetPropertyInt("fold.perl.pod", 1) != 0;
1276 bool foldPackage
= styler
.GetPropertyInt("fold.perl.package", 1) != 0;
1277 unsigned int endPos
= startPos
+ length
;
1278 int visibleChars
= 0;
1279 int lineCurrent
= styler
.GetLine(startPos
);
1280 int levelPrev
= SC_FOLDLEVELBASE
;
1281 if (lineCurrent
> 0)
1282 levelPrev
= styler
.LevelAt(lineCurrent
- 1) >> 16;
1283 int levelCurrent
= levelPrev
;
1284 char chNext
= styler
[startPos
];
1285 char chPrev
= styler
.SafeGetCharAt(startPos
- 1);
1286 int styleNext
= styler
.StyleAt(startPos
);
1287 // Used at end of line to determine if the line was a package definition
1288 bool isPackageLine
= false;
1289 bool isPodHeading
= false;
1290 for (unsigned int i
= startPos
; i
< endPos
; i
++) {
1292 chNext
= styler
.SafeGetCharAt(i
+ 1);
1293 int style
= styleNext
;
1294 styleNext
= styler
.StyleAt(i
+ 1);
1295 bool atEOL
= (ch
== '\r' && chNext
!= '\n') || (ch
== '\n');
1296 bool atLineStart
= isEOLChar(chPrev
) || i
== 0;
1298 if (foldComment
&& atEOL
&& IsCommentLine(lineCurrent
, styler
))
1300 if (!IsCommentLine(lineCurrent
- 1, styler
)
1301 && IsCommentLine(lineCurrent
+ 1, styler
))
1303 else if (IsCommentLine(lineCurrent
- 1, styler
)
1304 && !IsCommentLine(lineCurrent
+1, styler
))
1307 if (style
== SCE_C_OPERATOR
) {
1310 } else if (ch
== '}') {
1314 // Custom POD folding
1315 if (foldPOD
&& atLineStart
) {
1316 int stylePrevCh
= (i
) ? styler
.StyleAt(i
- 1):SCE_PL_DEFAULT
;
1317 if (style
== SCE_PL_POD
) {
1318 if (stylePrevCh
!= SCE_PL_POD
&& stylePrevCh
!= SCE_PL_POD_VERB
)
1320 else if (styler
.Match(i
, "=cut"))
1322 else if (styler
.Match(i
, "=head"))
1323 isPodHeading
= true;
1324 } else if (style
== SCE_PL_DATASECTION
) {
1325 if (ch
== '=' && isalpha(chNext
) && levelCurrent
== SC_FOLDLEVELBASE
)
1327 else if (styler
.Match(i
, "=cut") && levelCurrent
> SC_FOLDLEVELBASE
)
1329 else if (styler
.Match(i
, "=head"))
1330 isPodHeading
= true;
1331 // if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1332 // reset needed as level test is vs. SC_FOLDLEVELBASE
1333 else if (styler
.Match(i
, "__END__"))
1334 levelCurrent
= SC_FOLDLEVELBASE
;
1337 // Custom package folding
1338 if (foldPackage
&& atLineStart
) {
1339 if (style
== SCE_PL_WORD
&& styler
.Match(i
, "package")) {
1340 isPackageLine
= true;
1345 int lev
= levelPrev
;
1347 lev
= levelPrev
- 1;
1348 lev
|= SC_FOLDLEVELHEADERFLAG
;
1349 isPodHeading
= false;
1351 // Check if line was a package declaration
1352 // because packages need "special" treatment
1353 if (isPackageLine
) {
1354 lev
= SC_FOLDLEVELBASE
| SC_FOLDLEVELHEADERFLAG
;
1355 levelCurrent
= SC_FOLDLEVELBASE
+ 1;
1356 isPackageLine
= false;
1358 lev
|= levelCurrent
<< 16;
1359 if (visibleChars
== 0 && foldCompact
)
1360 lev
|= SC_FOLDLEVELWHITEFLAG
;
1361 if ((levelCurrent
> levelPrev
) && (visibleChars
> 0))
1362 lev
|= SC_FOLDLEVELHEADERFLAG
;
1363 if (lev
!= styler
.LevelAt(lineCurrent
)) {
1364 styler
.SetLevel(lineCurrent
, lev
);
1367 levelPrev
= levelCurrent
;
1370 if (!isspacechar(ch
))
1374 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1375 int flagsNext
= styler
.LevelAt(lineCurrent
) & ~SC_FOLDLEVELNUMBERMASK
;
1376 styler
.SetLevel(lineCurrent
, levelPrev
| flagsNext
);
1379 static const char * const perlWordListDesc
[] = {
1384 LexerModule
lmPerl(SCLEX_PERL
, ColourisePerlDoc
, "perl", FoldPerlDoc
, perlWordListDesc
, 8);