]>
git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/src/LexPerl.cxx
1 // Scintilla source code edit control
3 ** Lexer for subset of Perl.
5 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
6 // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
7 // The License.txt file describes the conditions under which this software may be distributed.
20 #include "Scintilla.h"
23 #define PERLNUM_BINARY 1 // order is significant: 1-4 cannot have a dot
25 #define PERLNUM_OCTAL 3
26 #define PERLNUM_FLOAT 4 // actually exponent part
27 #define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
28 #define PERLNUM_VECTOR 6
29 #define PERLNUM_V_VECTOR 7
32 #define BACK_NONE 0 // lookback state for bareword disambiguation:
33 #define BACK_OPERATOR 1 // whitespace/comments are insignificant
34 #define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation
36 #define HERE_DELIM_MAX 256
38 static inline bool isEOLChar(char ch
) {
39 return (ch
== '\r') || (ch
== '\n');
42 static bool isSingleCharOp(char ch
) {
46 return (NULL
!= strstr("rwxoRWXOezsfdlpSbctugkTBMAC", strCharSet
));
49 static inline bool isPerlOperator(char ch
) {
50 if (ch
== '^' || ch
== '&' || ch
== '\\' ||
51 ch
== '(' || ch
== ')' || ch
== '-' || ch
== '+' ||
52 ch
== '=' || ch
== '|' || ch
== '{' || ch
== '}' ||
53 ch
== '[' || ch
== ']' || ch
== ':' || ch
== ';' ||
54 ch
== '>' || ch
== ',' ||
55 ch
== '?' || ch
== '!' || ch
== '.' || ch
== '~')
57 // these chars are already tested before this call
58 // ch == '%' || ch == '*' || ch == '<' || ch == '/' ||
62 static bool isPerlKeyword(unsigned int start
, unsigned int end
, WordList
&keywords
, Accessor
&styler
) {
64 unsigned int i
, len
= end
- start
;
65 if (len
> 30) { len
= 30; }
66 for (i
= 0; i
< len
; i
++, start
++) s
[i
] = styler
[start
];
68 return keywords
.InList(s
);
71 static inline bool isEndVar(char ch
) {
72 return !isalnum(ch
) && ch
!= '#' && ch
!= '$' &&
73 ch
!= '_' && ch
!= '\'';
77 static inline bool isNonQuote(char ch
) {
78 return isalnum(ch
) || ch
== '_';
81 static inline char actualNumStyle(int numberStyle
) {
82 if (numberStyle
== PERLNUM_VECTOR
|| numberStyle
== PERLNUM_V_VECTOR
) {
84 } else if (numberStyle
== PERLNUM_BAD
) {
90 static bool isMatch(Accessor
&styler
, int lengthDoc
, int pos
, const char *val
) {
91 if ((pos
+ static_cast<int>(strlen(val
))) >= lengthDoc
) {
95 if (*val
!= styler
[pos
++]) {
103 static char opposite(char ch
) {
115 static void ColourisePerlDoc(unsigned int startPos
, int length
, int initStyle
,
116 WordList
*keywordlists
[], Accessor
&styler
) {
118 // Lexer for perl often has to backtrack to start of current style to determine
119 // which characters are being used as quotes, how deeply nested is the
120 // start position and what the termination string is for here documents
122 WordList
&keywords
= *keywordlists
[0];
126 int State
; // 0: '<<' encountered
127 // 1: collect the delimiter
128 // 2: here doc text (lines after the delimiter)
129 char Quote
; // the char after '<<'
130 bool Quoted
; // true if Quote in ('\'','"','`')
131 int DelimiterLength
; // strlen(Delimiter)
132 char *Delimiter
; // the Delimiter, 256: sizeof PL_tokenbuf
138 Delimiter
= new char[HERE_DELIM_MAX
];
145 HereDocCls HereDoc
; // TODO: FIFO for stacked here-docs
170 int state
= initStyle
;
171 char numState
= PERLNUM_DECIMAL
;
173 unsigned int lengthDoc
= startPos
+ length
;
174 //int sookedpos = 0; // these have no apparent use, see POD state
176 //sooked[sookedpos] = '\0';
178 // If in a long distance lexical state, seek to the beginning to find quote characters
179 // Perl strings can be multi-line with embedded newlines, so backtrack.
180 // Perl numbers have additional state during lexing, so backtrack too.
181 if (state
== SCE_PL_HERE_Q
|| state
== SCE_PL_HERE_QQ
|| state
== SCE_PL_HERE_QX
) {
182 while ((startPos
> 1) && (styler
.StyleAt(startPos
) != SCE_PL_HERE_DELIM
)) {
185 startPos
= styler
.LineStart(styler
.GetLine(startPos
));
186 state
= styler
.StyleAt(startPos
- 1);
188 if ( state
== SCE_PL_STRING_Q
189 || state
== SCE_PL_STRING_QQ
190 || state
== SCE_PL_STRING_QX
191 || state
== SCE_PL_STRING_QR
192 || state
== SCE_PL_STRING_QW
193 || state
== SCE_PL_REGEX
194 || state
== SCE_PL_REGSUBST
195 || state
== SCE_PL_STRING
196 || state
== SCE_PL_BACKTICKS
197 || state
== SCE_PL_CHARACTER
198 || state
== SCE_PL_NUMBER
199 || state
== SCE_PL_IDENTIFIER
200 || state
== SCE_PL_ERROR
202 while ((startPos
> 1) && (styler
.StyleAt(startPos
- 1) == state
)) {
205 state
= SCE_PL_DEFAULT
;
208 // lookback at start of lexing to set proper state for backflag
209 // after this, they are updated when elements are lexed
210 int backflag
= BACK_NONE
;
211 unsigned int backPos
= startPos
;
214 int sty
= SCE_PL_DEFAULT
;
215 while ((backPos
> 0) && (sty
= styler
.StyleAt(backPos
),
216 sty
== SCE_PL_DEFAULT
|| sty
== SCE_PL_COMMENTLINE
))
218 if (sty
== SCE_PL_OPERATOR
)
219 backflag
= BACK_OPERATOR
;
220 else if (sty
== SCE_PL_WORD
)
221 backflag
= BACK_KEYWORD
;
224 styler
.StartAt(startPos
);
225 char chPrev
= styler
.SafeGetCharAt(startPos
- 1);
228 char chNext
= styler
[startPos
];
229 styler
.StartSegment(startPos
);
231 for (unsigned int i
= startPos
; i
< lengthDoc
; i
++) {
233 // if the current character is not consumed due to the completion of an
234 // earlier style, lexing can be restarted via a simple goto
236 chNext
= styler
.SafeGetCharAt(i
+ 1);
237 char chNext2
= styler
.SafeGetCharAt(i
+ 2);
239 if (styler
.IsLeadByte(ch
)) {
240 chNext
= styler
.SafeGetCharAt(i
+ 2);
245 if ((chPrev
== '\r' && ch
== '\n')) { // skip on DOS/Windows
246 styler
.ColourTo(i
, state
);
251 if (HereDoc
.State
== 1 && isEOLChar(ch
)) {
252 // Begin of here-doc (the line after the here-doc delimiter):
253 // Lexically, the here-doc starts from the next line after the >>, but the
254 // first line of here-doc seem to follow the style of the last EOL sequence
256 if (HereDoc
.Quoted
) {
257 if (state
== SCE_PL_HERE_DELIM
) {
258 // Missing quote at end of string! We are stricter than perl.
259 // Colour here-doc anyway while marking this bit as an error.
260 state
= SCE_PL_ERROR
;
262 styler
.ColourTo(i
- 1, state
);
263 switch (HereDoc
.Quote
) {
265 state
= SCE_PL_HERE_Q
;
268 state
= SCE_PL_HERE_QQ
;
271 state
= SCE_PL_HERE_QX
;
275 styler
.ColourTo(i
- 1, state
);
276 switch (HereDoc
.Quote
) {
278 state
= SCE_PL_HERE_Q
;
281 state
= SCE_PL_HERE_QQ
;
286 if (state
== SCE_PL_DEFAULT
) {
287 if (isdigit(ch
) || (isdigit(chNext
) &&
288 (ch
== '.' || ch
== 'v'))) {
289 state
= SCE_PL_NUMBER
;
290 backflag
= BACK_NONE
;
291 numState
= PERLNUM_DECIMAL
;
293 if (ch
== '0') { // hex,bin,octal
295 numState
= PERLNUM_HEX
;
296 } else if (chNext
== 'b') {
297 numState
= PERLNUM_BINARY
;
298 } else if (isdigit(chNext
)) {
299 numState
= PERLNUM_OCTAL
;
301 if (numState
!= PERLNUM_DECIMAL
) {
306 } else if (ch
== 'v') { // vector
307 numState
= PERLNUM_V_VECTOR
;
309 } else if (iswordstart(ch
)) {
310 // if immediately prefixed by '::', always a bareword
312 if (chPrev
== ':' && styler
.SafeGetCharAt(i
- 2) == ':') {
313 state
= SCE_PL_IDENTIFIER
;
315 unsigned int kw
= i
+ 1;
316 // first check for possible quote-like delimiter
317 if (ch
== 's' && !isNonQuote(chNext
)) {
318 state
= SCE_PL_REGSUBST
;
320 } else if (ch
== 'm' && !isNonQuote(chNext
)) {
321 state
= SCE_PL_REGEX
;
323 } else if (ch
== 'q' && !isNonQuote(chNext
)) {
324 state
= SCE_PL_STRING_Q
;
326 } else if (ch
== 'y' && !isNonQuote(chNext
)) {
327 state
= SCE_PL_REGSUBST
;
329 } else if (ch
== 't' && chNext
== 'r' && !isNonQuote(chNext2
)) {
330 state
= SCE_PL_REGSUBST
;
333 } else if (ch
== 'q' && (chNext
== 'q' || chNext
== 'r' || chNext
== 'w' || chNext
== 'x') && !isNonQuote(chNext2
)) {
334 if (chNext
== 'q') state
= SCE_PL_STRING_QQ
;
335 else if (chNext
== 'x') state
= SCE_PL_STRING_QX
;
336 else if (chNext
== 'r') state
= SCE_PL_STRING_QR
;
337 else if (chNext
== 'w') state
= SCE_PL_STRING_QW
;
340 } else if (ch
== 'x' && (chNext
== '=' || // repetition
341 (chNext
!= '_' && !isalnum(chNext
)) ||
342 (isdigit(chPrev
) && isdigit(chNext
)))) {
343 state
= SCE_PL_OPERATOR
;
345 // if potentially a keyword, scan forward and grab word, then check
346 // if it's really one; if yes, disambiguation test is performed
347 // otherwise it is always a bareword and we skip a lot of scanning
348 // note: keywords assumed to be limited to [_a-zA-Z] only
349 if (state
== SCE_PL_WORD
) {
350 while (iswordstart(styler
.SafeGetCharAt(kw
))) kw
++;
351 if (!isPerlKeyword(styler
.GetStartSegment(), kw
, keywords
, styler
)) {
352 state
= SCE_PL_IDENTIFIER
;
355 // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
356 // for quote-like delimiters/keywords, attempt to disambiguate
357 // to select for bareword, change state -> SCE_PL_IDENTIFIER
358 if (state
!= SCE_PL_IDENTIFIER
&& i
> 0) {
360 bool moreback
= false; // true if passed newline/comments
361 bool brace
= false; // true if opening brace found
363 // first look backwards past whitespace/comments for EOLs
364 // if BACK_NONE, neither operator nor keyword, so skip test
365 if (backflag
!= BACK_NONE
) {
366 while (--j
> backPos
) {
367 if (isEOLChar(styler
.SafeGetCharAt(j
)))
370 ch2
= styler
.SafeGetCharAt(j
);
371 if (ch2
== '{' && !moreback
) {
372 // {bareword: possible variable spec
374 } else if ((ch2
== '&')
375 // &bareword: subroutine call
376 || (ch2
== '>' && styler
.SafeGetCharAt(j
- 1) == '-')
377 // ->bareword: part of variable spec
378 || (ch2
== 'b' && styler
.Match(j
- 2, "su"))) {
379 // sub bareword: subroutine declaration
380 // (implied BACK_KEYWORD, no keywords end in 'sub'!)
381 state
= SCE_PL_IDENTIFIER
;
383 // if status still ambiguous, look forward after word past
384 // tabs/spaces only; if ch2 isn't one of '[{(,' it can never
385 // match anything, so skip the whole thing
387 if (state
!= SCE_PL_IDENTIFIER
388 && (ch2
== '{' || ch2
== '(' || ch2
== '['|| ch2
== ',')
390 while (ch2
= styler
.SafeGetCharAt(j
),
391 (ch2
== ' ' || ch2
== '\t') && j
< lengthDoc
) {
394 if ((ch2
== '}' && brace
)
395 // {bareword}: variable spec
396 || (ch2
== '=' && styler
.SafeGetCharAt(j
+ 1) == '>')) {
397 // [{(, bareword=>: hash literal
398 state
= SCE_PL_IDENTIFIER
;
403 backflag
= BACK_NONE
;
404 // an identifier or bareword
405 if (state
== SCE_PL_IDENTIFIER
) {
406 if ((!iswordchar(chNext
) && chNext
!= '\'')
407 || (chNext
== '.' && chNext2
== '.')) {
408 // We need that if length of word == 1!
409 // This test is copied from the SCE_PL_WORD handler.
410 styler
.ColourTo(i
, SCE_PL_IDENTIFIER
);
411 state
= SCE_PL_DEFAULT
;
414 } else if (state
== SCE_PL_WORD
) {
416 if (ch
== '_' && chNext
== '_' &&
417 (isMatch(styler
, lengthDoc
, styler
.GetStartSegment(), "__DATA__")
418 || isMatch(styler
, lengthDoc
, styler
.GetStartSegment(), "__END__"))) {
419 styler
.ColourTo(i
, SCE_PL_DATASECTION
);
420 state
= SCE_PL_DATASECTION
;
422 styler
.ColourTo(i
, SCE_PL_WORD
);
423 state
= SCE_PL_DEFAULT
;
424 backflag
= BACK_KEYWORD
;
427 ch
= styler
.SafeGetCharAt(i
);
428 chNext
= styler
.SafeGetCharAt(i
+ 1);
429 // a repetition operator 'x'
430 } else if (state
== SCE_PL_OPERATOR
) {
431 styler
.ColourTo(i
, SCE_PL_OPERATOR
);
432 state
= SCE_PL_DEFAULT
;
433 // quote-like delimiter, skip one char if double-char delimiter
436 chNext
= styler
.SafeGetCharAt(i
+ 1);
438 } else if (ch
== '#') {
439 state
= SCE_PL_COMMENTLINE
;
440 } else if (ch
== '\"') {
441 state
= SCE_PL_STRING
;
444 backflag
= BACK_NONE
;
445 } else if (ch
== '\'') {
448 styler
.ColourTo(i
, state
);
450 state
= SCE_PL_CHARACTER
;
454 backflag
= BACK_NONE
;
455 } else if (ch
== '`') {
456 state
= SCE_PL_BACKTICKS
;
459 backflag
= BACK_NONE
;
460 } else if (ch
== '$') {
461 if ((chNext
== '{') || isspacechar(chNext
)) {
462 styler
.ColourTo(i
, SCE_PL_SCALAR
);
464 state
= SCE_PL_SCALAR
;
465 if (chNext
== '`' && chNext2
== '`') {
467 ch
= styler
.SafeGetCharAt(i
);
468 chNext
= styler
.SafeGetCharAt(i
+ 1);
475 backflag
= BACK_NONE
;
476 } else if (ch
== '@') {
477 if (isalpha(chNext
) || chNext
== '#' || chNext
== '$'
478 || chNext
== '_' || chNext
== '+' || chNext
== '-') {
479 state
= SCE_PL_ARRAY
;
480 } else if (chNext
!= '{' && chNext
!= '[') {
481 styler
.ColourTo(i
, SCE_PL_ARRAY
);
483 styler
.ColourTo(i
, SCE_PL_ARRAY
);
485 backflag
= BACK_NONE
;
486 } else if (ch
== '%') {
487 if (isalpha(chNext
) || chNext
== '#' || chNext
== '$'
488 || chNext
== '_' || chNext
== '!' || chNext
== '^') {
493 } else if (chNext
== '{') {
494 styler
.ColourTo(i
, SCE_PL_HASH
);
496 styler
.ColourTo(i
, SCE_PL_OPERATOR
);
498 backflag
= BACK_NONE
;
499 } else if (ch
== '*') {
503 if (isalpha(chNext
) || chNext
== '_' ||
504 NULL
!= strstr("^/|,\\\";#%^:?<>)[]", strch
)) {
505 state
= SCE_PL_SYMBOLTABLE
;
509 } else if (chNext
== '{') {
510 styler
.ColourTo(i
, SCE_PL_SYMBOLTABLE
);
512 if (chNext
== '*') { // exponentiation
517 styler
.ColourTo(i
, SCE_PL_OPERATOR
);
519 backflag
= BACK_NONE
;
520 } else if (ch
== '/' || (ch
== '<' && chNext
== '<')) {
521 // Explicit backward peeking to set a consistent preferRE for
522 // any slash found, so no longer need to track preferRE state.
523 // Find first previous significant lexed element and interpret.
524 // Test for HERE doc start '<<' shares this code, helps to
525 // determine if it should be an operator.
526 bool preferRE
= false;
527 bool isHereDoc
= (ch
== '<');
528 bool hereDocSpace
= false; // these are for corner case:
529 bool hereDocScalar
= false; // SCALAR [whitespace] '<<'
530 unsigned int bk
= (i
> 0)? i
- 1: 0;
533 if (styler
.StyleAt(bk
) == SCE_PL_DEFAULT
)
535 while ((bk
> 0) && (styler
.StyleAt(bk
) == SCE_PL_DEFAULT
||
536 styler
.StyleAt(bk
) == SCE_PL_COMMENTLINE
)) {
540 // position 0 won't really be checked; rarely happens
541 // hard to fix due to an unsigned index i
544 int bkstyle
= styler
.StyleAt(bk
);
545 bkch
= styler
.SafeGetCharAt(bk
);
547 case SCE_PL_OPERATOR
:
549 if (bkch
== ')' || bkch
== ']') {
551 } else if (bkch
== '}') {
552 // backtrack further, count balanced brace pairs
553 // if a brace pair found, see if it's a variable
556 bkstyle
= styler
.StyleAt(bk
);
557 if (bkstyle
== SCE_PL_OPERATOR
) {
558 bkch
= styler
.SafeGetCharAt(bk
);
559 if (bkch
== ';') { // early out
561 } else if (bkch
== '}') {
563 } else if (bkch
== '{') {
564 if (--braceCount
== 0)
570 // at beginning, true
571 } else if (braceCount
== 0) {
572 // balanced { found, bk>0, skip more whitespace
573 if (styler
.StyleAt(--bk
) == SCE_PL_DEFAULT
) {
575 bkstyle
= styler
.StyleAt(--bk
);
576 if (bkstyle
!= SCE_PL_DEFAULT
)
580 bkstyle
= styler
.StyleAt(bk
);
581 if (bkstyle
== SCE_PL_SCALAR
582 || bkstyle
== SCE_PL_ARRAY
583 || bkstyle
== SCE_PL_HASH
584 || bkstyle
== SCE_PL_SYMBOLTABLE
585 || bkstyle
== SCE_PL_OPERATOR
) {
591 case SCE_PL_IDENTIFIER
:
593 if (bkch
== '>') { // inputsymbol
597 // backtrack to find "->" or "::" before identifier
598 while (bk
> 0 && styler
.StyleAt(bk
) == SCE_PL_IDENTIFIER
) {
602 bkstyle
= styler
.StyleAt(bk
);
603 if (bkstyle
== SCE_PL_DEFAULT
||
604 bkstyle
== SCE_PL_COMMENTLINE
) {
605 } else if (bkstyle
== SCE_PL_OPERATOR
) {
606 // gcc 3.2.3 bloats if more compact form used
607 bkch
= styler
.SafeGetCharAt(bk
);
608 if (bkch
== '>') { // "->"
609 if (styler
.SafeGetCharAt(bk
- 1) == '-') {
613 } else if (bkch
== ':') { // "::"
614 if (styler
.SafeGetCharAt(bk
- 1) == ':') {
619 } else {// bare identifier, usually a function call but Perl
620 // optimizes them as pseudo-constants, then the next
621 // '/' will be a divide; favour divide over regex
622 // if there is a whitespace after the '/'
623 if (isspacechar(chNext
)) {
631 case SCE_PL_SCALAR
: // for $var<< case
632 hereDocScalar
= true;
634 // other styles uses the default, preferRE=false
637 case SCE_PL_POD_VERB
:
645 if (isHereDoc
) { // handle HERE doc
646 // if SCALAR whitespace '<<', *always* a HERE doc
647 if (preferRE
|| (hereDocSpace
&& hereDocScalar
)) {
648 state
= SCE_PL_HERE_DELIM
;
650 } else { // << operator
654 styler
.ColourTo(i
, SCE_PL_OPERATOR
);
656 } else { // handle regexp
658 state
= SCE_PL_REGEX
;
661 } else { // / operator
662 styler
.ColourTo(i
, SCE_PL_OPERATOR
);
665 backflag
= BACK_NONE
;
666 } else if (ch
== '<') {
667 // looks forward for matching > on same line
668 unsigned int fw
= i
+ 1;
669 while (fw
< lengthDoc
) {
670 char fwch
= styler
.SafeGetCharAt(fw
);
672 if (styler
.SafeGetCharAt(fw
-1) != '\\' ||
673 styler
.SafeGetCharAt(fw
-2) != '\\')
675 } else if (isEOLChar(fwch
) || isspacechar(fwch
)) {
677 } else if (fwch
== '>') {
678 if ((fw
- i
) == 2 && // '<=>' case
679 styler
.SafeGetCharAt(fw
-1) == '=') {
680 styler
.ColourTo(fw
, SCE_PL_OPERATOR
);
682 styler
.ColourTo(fw
, SCE_PL_IDENTIFIER
);
686 chNext
= styler
.SafeGetCharAt(i
+1);
690 styler
.ColourTo(i
, SCE_PL_OPERATOR
);
691 backflag
= BACK_NONE
;
692 } else if (ch
== '=' // POD
694 && (isEOLChar(chPrev
))) {
696 backflag
= BACK_NONE
;
698 //sooked[sookedpos] = '\0';
699 } else if (ch
== '-' // file test operators
700 && isSingleCharOp(chNext
)
701 && !isalnum((chNext2
= styler
.SafeGetCharAt(i
+2)))) {
702 styler
.ColourTo(i
+ 1, SCE_PL_WORD
);
703 state
= SCE_PL_DEFAULT
;
707 backflag
= BACK_NONE
;
708 } else if (isPerlOperator(ch
)) {
709 if (ch
== '.' && chNext
== '.') { // .. and ...
711 if (chNext2
== '.') { i
++; }
712 state
= SCE_PL_DEFAULT
;
713 ch
= styler
.SafeGetCharAt(i
);
714 chNext
= styler
.SafeGetCharAt(i
+ 1);
716 styler
.ColourTo(i
, SCE_PL_OPERATOR
);
717 backflag
= BACK_OPERATOR
;
720 // keep colouring defaults to make restart easier
721 styler
.ColourTo(i
, SCE_PL_DEFAULT
);
723 } else if (state
== SCE_PL_NUMBER
) {
726 // double dot is always an operator
728 } else if (numState
<= PERLNUM_FLOAT
) {
729 // non-decimal number or float exponent, consume next dot
730 styler
.ColourTo(i
- 1, SCE_PL_NUMBER
);
731 styler
.ColourTo(i
, SCE_PL_OPERATOR
);
732 state
= SCE_PL_DEFAULT
;
733 } else { // decimal or vectors allows dots
735 if (numState
== PERLNUM_DECIMAL
) {
737 if (isdigit(chNext
)) { // really a vector
738 numState
= PERLNUM_VECTOR
;
739 } else // number then dot
743 if (!isdigit(chNext
)) // vector then dot
747 } else if (ch
== '_' && numState
== PERLNUM_DECIMAL
) {
748 if (!isdigit(chNext
)) {
751 } else if (isalnum(ch
)) {
752 if (numState
== PERLNUM_VECTOR
|| numState
== PERLNUM_V_VECTOR
) {
754 if (dotCount
== 0) { // change to word
755 state
= SCE_PL_IDENTIFIER
;
756 } else { // vector then word
760 } else if (numState
== PERLNUM_DECIMAL
) {
761 if (ch
== 'E' || ch
== 'e') { // exponent
762 numState
= PERLNUM_FLOAT
;
763 if (chNext
== '+' || chNext
== '-') {
768 } else if (!isdigit(ch
)) { // number then word
771 } else if (numState
== PERLNUM_FLOAT
) {
772 if (!isdigit(ch
)) { // float then word
775 } else if (numState
== PERLNUM_OCTAL
) {
779 numState
= PERLNUM_BAD
;
780 } else if (numState
== PERLNUM_BINARY
) {
784 numState
= PERLNUM_BAD
;
785 } else if (numState
== PERLNUM_HEX
) {
786 int ch2
= toupper(ch
);
787 if (!isdigit(ch
) && !(ch2
>= 'A' && ch2
<= 'F'))
789 } else {//(numState == PERLNUM_BAD) {
794 // complete current number or vector
796 styler
.ColourTo(i
- 1, actualNumStyle(numState
));
797 state
= SCE_PL_DEFAULT
;
800 } else if (state
== SCE_PL_IDENTIFIER
) {
801 if (!iswordstart(chNext
) && chNext
!= '\'') {
802 styler
.ColourTo(i
, SCE_PL_IDENTIFIER
);
803 state
= SCE_PL_DEFAULT
;
807 if (state
== SCE_PL_COMMENTLINE
) {
809 styler
.ColourTo(i
- 1, state
);
810 state
= SCE_PL_DEFAULT
;
812 } else if (isEOLChar(chNext
)) {
813 styler
.ColourTo(i
, state
);
814 state
= SCE_PL_DEFAULT
;
816 } else if (state
== SCE_PL_HERE_DELIM
) {
818 // From perldata.pod:
819 // ------------------
820 // A line-oriented form of quoting is based on the shell ``here-doc''
822 // Following a << you specify a string to terminate the quoted material,
823 // and all lines following the current line down to the terminating
824 // string are the value of the item.
825 // The terminating string may be either an identifier (a word),
826 // or some quoted text.
827 // If quoted, the type of quotes you use determines the treatment of
828 // the text, just as in regular quoting.
829 // An unquoted identifier works like double quotes.
830 // There must be no space between the << and the identifier.
831 // (If you put a space it will be treated as a null identifier,
832 // which is valid, and matches the first empty line.)
833 // (This is deprecated, -w warns of this syntax)
834 // The terminating string must appear by itself (unquoted and with no
835 // surrounding whitespace) on the terminating line.
839 // Specifier format is: <<[-]WORD
840 // Optional '-' is for removal of leading tabs from here-doc.
841 // Whitespace acceptable after <<[-] operator.
843 if (HereDoc
.State
== 0) { // '<<' encountered
844 bool gotspace
= false;
845 unsigned int oldi
= i
;
846 if (chNext
== ' ' || chNext
== '\t') {
847 // skip whitespace; legal for quoted delimiters
851 chNext
= styler
.SafeGetCharAt(i
+ 1);
852 } while ((i
+ 1 < lengthDoc
) && (chNext
== ' ' || chNext
== '\t'));
853 chNext2
= styler
.SafeGetCharAt(i
+ 2);
856 HereDoc
.Quote
= chNext
;
857 HereDoc
.Quoted
= false;
858 HereDoc
.DelimiterLength
= 0;
859 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
860 if (chNext
== '\'' || chNext
== '"' || chNext
== '`') {
861 // a quoted here-doc delimiter
865 HereDoc
.Quoted
= true;
866 } else if (isspacechar(chNext
) || isdigit(chNext
) || chNext
== '\\'
867 || chNext
== '=' || chNext
== '$' || chNext
== '@'
868 || ((isalpha(chNext
) || chNext
== '_') && gotspace
)) {
869 // left shift << or <<= operator cases
870 // restore position if operator
872 styler
.ColourTo(i
, SCE_PL_OPERATOR
);
873 state
= SCE_PL_DEFAULT
;
877 // an unquoted here-doc delimiter, no special handling
878 // (cannot be prefixed by spaces/tabs), or
879 // symbols terminates; deprecated zero-length delimiter
882 } else if (HereDoc
.State
== 1) { // collect the delimiter
883 backflag
= BACK_NONE
;
884 if (HereDoc
.Quoted
) { // a quoted here-doc delimiter
885 if (ch
== HereDoc
.Quote
) { // closing quote => end of delimiter
886 styler
.ColourTo(i
, state
);
887 state
= SCE_PL_DEFAULT
;
889 if (ch
== '\\' && chNext
== HereDoc
.Quote
) { // escaped quote
894 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
++] = ch
;
895 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
897 } else { // an unquoted here-doc delimiter
898 if (isalnum(ch
) || ch
== '_') {
899 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
++] = ch
;
900 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
902 styler
.ColourTo(i
- 1, state
);
903 state
= SCE_PL_DEFAULT
;
907 if (HereDoc
.DelimiterLength
>= HERE_DELIM_MAX
- 1) {
908 styler
.ColourTo(i
- 1, state
);
909 state
= SCE_PL_ERROR
;
913 } else if (HereDoc
.State
== 2) {
914 // state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX
915 if (isEOLChar(chPrev
) && isMatch(styler
, lengthDoc
, i
, HereDoc
.Delimiter
)) {
916 i
+= HereDoc
.DelimiterLength
;
917 chPrev
= styler
.SafeGetCharAt(i
- 1);
918 ch
= styler
.SafeGetCharAt(i
);
920 styler
.ColourTo(i
- 1, state
);
921 state
= SCE_PL_DEFAULT
;
922 backflag
= BACK_NONE
;
926 chNext
= styler
.SafeGetCharAt(i
+ 1);
928 } else if (state
== SCE_PL_POD
929 || state
== SCE_PL_POD_VERB
) {
930 if (isEOLChar(chPrev
)) {
931 if (ch
== ' ' || ch
== '\t') {
932 styler
.ColourTo(i
- 1, state
);
933 state
= SCE_PL_POD_VERB
;
935 styler
.ColourTo(i
- 1, state
);
938 if (isMatch(styler
, lengthDoc
, i
, "=cut")) {
939 styler
.ColourTo(i
- 1 + 4, state
);
941 state
= SCE_PL_DEFAULT
;
942 ch
= styler
.SafeGetCharAt(i
);
943 //chNext = styler.SafeGetCharAt(i + 1);
949 } else if (state
== SCE_PL_SCALAR
// variable names
950 || state
== SCE_PL_ARRAY
951 || state
== SCE_PL_HASH
952 || state
== SCE_PL_SYMBOLTABLE
) {
953 if (ch
== ':' && chNext
== ':') { // skip ::
958 else if (isEndVar(ch
)) {
959 if (i
== (styler
.GetStartSegment() + 1)) {
960 // Special variable: $(, $_ etc.
961 styler
.ColourTo(i
, state
);
962 state
= SCE_PL_DEFAULT
;
964 styler
.ColourTo(i
- 1, state
);
965 state
= SCE_PL_DEFAULT
;
969 } else if (state
== SCE_PL_REGEX
970 || state
== SCE_PL_STRING_QR
972 if (!Quote
.Up
&& !isspacechar(ch
)) {
974 } else if (ch
== '\\' && Quote
.Up
!= '\\') {
975 // SG: Is it save to skip *every* escaped char?
978 chNext
= styler
.SafeGetCharAt(i
+ 1);
980 if (ch
== Quote
.Down
/*&& chPrev != '\\'*/) {
982 if (Quote
.Count
== 0) {
984 if (Quote
.Up
== Quote
.Down
) {
988 if (!isalpha(chNext
)) {
989 if (Quote
.Rep
<= 0) {
990 styler
.ColourTo(i
, state
);
991 state
= SCE_PL_DEFAULT
;
995 } else if (ch
== Quote
.Up
/*&& chPrev != '\\'*/) {
997 } else if (!isalpha(chNext
)) {
998 if (Quote
.Rep
<= 0) {
999 styler
.ColourTo(i
, state
);
1000 state
= SCE_PL_DEFAULT
;
1005 } else if (state
== SCE_PL_REGSUBST
) {
1006 if (!Quote
.Up
&& !isspacechar(ch
)) {
1008 } else if (ch
== '\\' && Quote
.Up
!= '\\') {
1009 // SG: Is it save to skip *every* escaped char?
1012 chNext
= styler
.SafeGetCharAt(i
+ 1);
1014 if (Quote
.Count
== 0 && Quote
.Rep
== 1) {
1015 /* We matched something like s(...) or tr{...}
1016 * and are looking for the next matcher characters,
1017 * which could be either bracketed ({...}) or non-bracketed
1020 * Number-signs are problematic. If they occur after
1021 * the close of the first part, treat them like
1022 * a Quote.Up char, even if they actually start comments.
1024 * If we find an alnum, we end the regsubst, and punt.
1026 * Eric Promislow ericp@activestate.com Aug 9,2000
1028 if (isspacechar(ch
)) {
1031 else if (isalnum(ch
)) {
1032 styler
.ColourTo(i
, state
);
1033 state
= SCE_PL_DEFAULT
;
1038 } else if (ch
== Quote
.Down
/*&& chPrev != '\\'*/) {
1040 if (Quote
.Count
== 0) {
1043 if (!isalpha(chNext
)) {
1044 if (Quote
.Rep
<= 0) {
1045 styler
.ColourTo(i
, state
);
1046 state
= SCE_PL_DEFAULT
;
1050 if (Quote
.Up
== Quote
.Down
) {
1053 } else if (ch
== Quote
.Up
/*&& chPrev != '\\'*/) {
1055 } else if (!isalpha(chNext
)) {
1056 if (Quote
.Rep
<= 0) {
1057 styler
.ColourTo(i
, state
);
1058 state
= SCE_PL_DEFAULT
;
1063 } else if (state
== SCE_PL_STRING_Q
1064 || state
== SCE_PL_STRING_QQ
1065 || state
== SCE_PL_STRING_QX
1066 || state
== SCE_PL_STRING_QW
1067 || state
== SCE_PL_STRING
1068 || state
== SCE_PL_CHARACTER
1069 || state
== SCE_PL_BACKTICKS
1071 if (!Quote
.Down
&& !isspacechar(ch
)) {
1073 } else if (ch
== '\\' && Quote
.Up
!= '\\') {
1076 chNext
= styler
.SafeGetCharAt(i
+ 1);
1077 } else if (ch
== Quote
.Down
) {
1079 if (Quote
.Count
== 0) {
1081 if (Quote
.Rep
<= 0) {
1082 styler
.ColourTo(i
, state
);
1083 state
= SCE_PL_DEFAULT
;
1086 if (Quote
.Up
== Quote
.Down
) {
1090 } else if (ch
== Quote
.Up
) {
1095 if (state
== SCE_PL_ERROR
) {
1100 styler
.ColourTo(lengthDoc
- 1, state
);
1103 static bool IsCommentLine(int line
, Accessor
&styler
) {
1104 int pos
= styler
.LineStart(line
);
1105 int eol_pos
= styler
.LineStart(line
+ 1) - 1;
1106 for (int i
= pos
; i
< eol_pos
; i
++) {
1107 char ch
= styler
[i
];
1108 int style
= styler
.StyleAt(i
);
1109 if (ch
== '#' && style
== SCE_PL_COMMENTLINE
)
1111 else if (ch
!= ' ' && ch
!= '\t')
1117 static void FoldPerlDoc(unsigned int startPos
, int length
, int, WordList
*[],
1119 bool foldComment
= styler
.GetPropertyInt("fold.comment") != 0;
1120 bool foldCompact
= styler
.GetPropertyInt("fold.compact", 1) != 0;
1121 // Custom folding of POD and packages
1122 bool foldPOD
= styler
.GetPropertyInt("fold.perl.pod", 1) != 0;
1123 bool foldPackage
= styler
.GetPropertyInt("fold.perl.package", 1) != 0;
1124 unsigned int endPos
= startPos
+ length
;
1125 int visibleChars
= 0;
1126 int lineCurrent
= styler
.GetLine(startPos
);
1127 int levelPrev
= SC_FOLDLEVELBASE
;
1128 if (lineCurrent
> 0)
1129 levelPrev
= styler
.LevelAt(lineCurrent
- 1) >> 16;
1130 int levelCurrent
= levelPrev
;
1131 char chNext
= styler
[startPos
];
1132 char chPrev
= styler
.SafeGetCharAt(startPos
- 1);
1133 int styleNext
= styler
.StyleAt(startPos
);
1134 // Used at end of line to determine if the line was a package definition
1135 bool isPackageLine
= false;
1136 bool isPodHeading
= false;
1137 for (unsigned int i
= startPos
; i
< endPos
; i
++) {
1139 chNext
= styler
.SafeGetCharAt(i
+ 1);
1140 int style
= styleNext
;
1141 styleNext
= styler
.StyleAt(i
+ 1);
1142 bool atEOL
= (ch
== '\r' && chNext
!= '\n') || (ch
== '\n');
1143 bool atLineStart
= isEOLChar(chPrev
) || i
== 0;
1145 if (foldComment
&& atEOL
&& IsCommentLine(lineCurrent
, styler
))
1147 if (!IsCommentLine(lineCurrent
- 1, styler
)
1148 && IsCommentLine(lineCurrent
+ 1, styler
))
1150 else if (IsCommentLine(lineCurrent
- 1, styler
)
1151 && !IsCommentLine(lineCurrent
+1, styler
))
1154 if (style
== SCE_C_OPERATOR
) {
1157 } else if (ch
== '}') {
1161 // Custom POD folding
1162 if (foldPOD
&& atLineStart
) {
1163 int stylePrevCh
= (i
) ? styler
.StyleAt(i
- 1):SCE_PL_DEFAULT
;
1164 if (style
== SCE_PL_POD
) {
1165 if (stylePrevCh
!= SCE_PL_POD
&& stylePrevCh
!= SCE_PL_POD_VERB
)
1167 else if (styler
.Match(i
, "=cut"))
1169 else if (styler
.Match(i
, "=head"))
1170 isPodHeading
= true;
1171 } else if (style
== SCE_PL_DATASECTION
) {
1172 if (ch
== '=' && isalpha(chNext
) && levelCurrent
== SC_FOLDLEVELBASE
)
1174 else if (styler
.Match(i
, "=cut") && levelCurrent
> SC_FOLDLEVELBASE
)
1176 else if (styler
.Match(i
, "=head"))
1177 isPodHeading
= true;
1178 // if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1179 // reset needed as level test is vs. SC_FOLDLEVELBASE
1180 else if (styler
.Match(i
, "__END__"))
1181 levelCurrent
= SC_FOLDLEVELBASE
;
1184 // Custom package folding
1185 if (foldPackage
&& atLineStart
) {
1186 if (style
== SCE_PL_WORD
&& styler
.Match(i
, "package")) {
1187 isPackageLine
= true;
1192 int lev
= levelPrev
;
1194 lev
= levelPrev
- 1;
1195 lev
|= SC_FOLDLEVELHEADERFLAG
;
1196 isPodHeading
= false;
1198 // Check if line was a package declaration
1199 // because packages need "special" treatment
1200 if (isPackageLine
) {
1201 lev
= SC_FOLDLEVELBASE
| SC_FOLDLEVELHEADERFLAG
;
1202 levelCurrent
= SC_FOLDLEVELBASE
+ 1;
1203 isPackageLine
= false;
1205 lev
|= levelCurrent
<< 16;
1206 if (visibleChars
== 0 && foldCompact
)
1207 lev
|= SC_FOLDLEVELWHITEFLAG
;
1208 if ((levelCurrent
> levelPrev
) && (visibleChars
> 0))
1209 lev
|= SC_FOLDLEVELHEADERFLAG
;
1210 if (lev
!= styler
.LevelAt(lineCurrent
)) {
1211 styler
.SetLevel(lineCurrent
, lev
);
1214 levelPrev
= levelCurrent
;
1217 if (!isspacechar(ch
))
1221 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1222 int flagsNext
= styler
.LevelAt(lineCurrent
) & ~SC_FOLDLEVELNUMBERMASK
;
1223 styler
.SetLevel(lineCurrent
, levelPrev
| flagsNext
);
1226 static const char * const perlWordListDesc
[] = {
1231 LexerModule
lmPerl(SCLEX_PERL
, ColourisePerlDoc
, "perl", FoldPerlDoc
, perlWordListDesc
);