]>
git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/src/LexPerl.cxx
1 // Scintilla source code edit control
3 ** Lexer for subset of Perl.
5 // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
19 #include "Scintilla.h"
22 static inline bool isEOLChar(char ch
) {
23 return (ch
== '\r') || (ch
== '\n');
26 static bool isSingleCharOp(char ch
) {
30 return (NULL
!= strstr("rwxoRWXOezsfdlpSbctugkTBMAC", strCharSet
));
33 static inline bool isPerlOperator(char ch
) {
36 // '.' left out as it is used to make up numbers
37 if (ch
== '%' || ch
== '^' || ch
== '&' || ch
== '*' || ch
== '\\' ||
38 ch
== '(' || ch
== ')' || ch
== '-' || ch
== '+' ||
39 ch
== '=' || ch
== '|' || ch
== '{' || ch
== '}' ||
40 ch
== '[' || ch
== ']' || ch
== ':' || ch
== ';' ||
41 ch
== '<' || ch
== '>' || ch
== ',' || ch
== '/' ||
42 ch
== '?' || ch
== '!' || ch
== '.' || ch
== '~')
47 static int classifyWordPerl(unsigned int start
, unsigned int end
, WordList
&keywords
, Accessor
&styler
) {
49 bool wordIsNumber
= isdigit(styler
[start
]) || (styler
[start
] == '.');
50 for (unsigned int i
= 0; i
< end
- start
+ 1 && i
< 30; i
++) {
51 s
[i
] = styler
[start
+ i
];
54 char chAttr
= SCE_PL_IDENTIFIER
;
56 chAttr
= SCE_PL_NUMBER
;
58 if (keywords
.InList(s
))
61 styler
.ColourTo(end
, chAttr
);
65 static inline bool isEndVar(char ch
) {
66 return !isalnum(ch
) && ch
!= '#' && ch
!= '$' &&
67 ch
!= '_' && ch
!= '\'';
70 static bool isMatch(Accessor
&styler
, int lengthDoc
, int pos
, const char *val
) {
71 if ((pos
+ static_cast<int>(strlen(val
))) >= lengthDoc
) {
75 if (*val
!= styler
[pos
++]) {
83 static char opposite(char ch
) {
95 static void ColourisePerlDoc(unsigned int startPos
, int length
, int initStyle
,
96 WordList
*keywordlists
[], Accessor
&styler
) {
98 // Lexer for perl often has to backtrack to start of current style to determine
99 // which characters are being used as quotes, how deeply nested is the
100 // start position and what the termination string is for here documents
102 WordList
&keywords
= *keywordlists
[0];
106 int State
; // 0: '<<' encountered
107 // 1: collect the delimiter
108 // 2: here doc text (lines after the delimiter)
109 char Quote
; // the char after '<<'
110 bool Quoted
; // true if Quote in ('\'','"','`')
111 int DelimiterLength
; // strlen(Delimiter)
112 char Delimiter
[256]; // the Delimiter, 256: sizeof PL_tokenbuf
119 HereDocCls HereDoc
; // TODO: FIFO for stacked here-docs
146 bool preferRE
= true;
147 sooked
[sookedpos
] = '\0';
148 int state
= initStyle
;
149 unsigned int lengthDoc
= startPos
+ length
;
151 // If in a long distance lexical state, seek to the beginning to find quote characters
152 if (state
== SCE_PL_HERE_Q
|| state
== SCE_PL_HERE_QQ
|| state
== SCE_PL_HERE_QX
) {
153 while ((startPos
> 1) && (styler
.StyleAt(startPos
) != SCE_PL_HERE_DELIM
)) {
156 startPos
= styler
.LineStart(styler
.GetLine(startPos
));
157 state
= styler
.StyleAt(startPos
- 1);
159 if ( state
== SCE_PL_STRING_Q
160 || state
== SCE_PL_STRING_QQ
161 || state
== SCE_PL_STRING_QX
162 || state
== SCE_PL_STRING_QR
163 || state
== SCE_PL_STRING_QW
164 || state
== SCE_PL_REGEX
165 || state
== SCE_PL_REGSUBST
167 while ((startPos
> 1) && (styler
.StyleAt(startPos
- 1) == state
)) {
170 state
= SCE_PL_DEFAULT
;
173 styler
.StartAt(startPos
);
174 char chPrev
= styler
.SafeGetCharAt(startPos
- 1);
177 char chNext
= styler
[startPos
];
178 styler
.StartSegment(startPos
);
180 for (unsigned int i
= startPos
; i
< lengthDoc
; i
++) {
182 chNext
= styler
.SafeGetCharAt(i
+ 1);
183 char chNext2
= styler
.SafeGetCharAt(i
+ 2);
185 if (styler
.IsLeadByte(ch
)) {
186 chNext
= styler
.SafeGetCharAt(i
+ 2);
191 if ((chPrev
== '\r' && ch
== '\n')) { // skip on DOS/Windows
196 if (HereDoc
.State
== 1 && isEOLChar(ch
)) {
197 // Begin of here-doc (the line after the here-doc delimiter):
199 styler
.ColourTo(i
- 1, state
);
200 if (HereDoc
.Quoted
) {
201 if (state
== SCE_PL_HERE_DELIM
) {
202 // Missing quote at end of string! We are stricter than perl.
203 state
= SCE_PL_ERROR
;
205 switch (HereDoc
.Quote
) {
207 state
= SCE_PL_HERE_Q
;
210 state
= SCE_PL_HERE_QQ
;
213 state
= SCE_PL_HERE_QX
;
218 switch (HereDoc
.Quote
) {
220 state
= SCE_PL_HERE_Q
;
223 state
= SCE_PL_HERE_QQ
;
228 if (state
== SCE_PL_DEFAULT
) {
229 if (iswordstart(ch
)) {
230 styler
.ColourTo(i
- 1, state
);
231 if (ch
== 's' && !isalnum(chNext
)) {
232 state
= SCE_PL_REGSUBST
;
234 } else if (ch
== 'm' && !isalnum(chNext
)) {
235 state
= SCE_PL_REGEX
;
237 } else if (ch
== 'q' && !isalnum(chNext
)) {
238 state
= SCE_PL_STRING_Q
;
240 } else if (ch
== 'y' && !isalnum(chNext
)) {
241 state
= SCE_PL_REGSUBST
;
243 } else if (ch
== 't' && chNext
== 'r' && !isalnum(chNext2
)) {
244 state
= SCE_PL_REGSUBST
;
248 } else if (ch
== 'q' && (chNext
== 'q' || chNext
== 'r' || chNext
== 'w' || chNext
== 'x') && !isalnum(chNext2
)) {
249 if (chNext
== 'q') state
= SCE_PL_STRING_QQ
;
250 else if (chNext
== 'x') state
= SCE_PL_STRING_QX
;
251 else if (chNext
== 'r') state
= SCE_PL_STRING_QR
;
252 else if (chNext
== 'w') state
= SCE_PL_STRING_QW
;
259 if ((!iswordchar(chNext
) && chNext
!= '\'')
260 || (chNext
== '.' && chNext2
== '.')) {
261 // We need that if length of word == 1!
262 // This test is copied from the SCE_PL_WORD handler.
263 classifyWordPerl(styler
.GetStartSegment(), i
, keywords
, styler
);
264 state
= SCE_PL_DEFAULT
;
267 } else if (ch
== '#') {
268 styler
.ColourTo(i
- 1, state
);
269 state
= SCE_PL_COMMENTLINE
;
270 } else if (ch
== '\"') {
271 styler
.ColourTo(i
- 1, state
);
272 state
= SCE_PL_STRING
;
275 } else if (ch
== '\'') {
278 styler
.ColourTo(i
, state
);
280 styler
.ColourTo(i
- 1, state
);
281 state
= SCE_PL_CHARACTER
;
285 } else if (ch
== '`') {
286 styler
.ColourTo(i
- 1, state
);
287 state
= SCE_PL_BACKTICKS
;
290 } else if (ch
== '$') {
292 styler
.ColourTo(i
- 1, state
);
293 if ((chNext
== '{') || isspacechar(chNext
)) {
294 styler
.ColourTo(i
, SCE_PL_SCALAR
);
296 state
= SCE_PL_SCALAR
;
301 } else if (ch
== '@') {
303 styler
.ColourTo(i
- 1, state
);
304 if (isalpha(chNext
) || chNext
== '#' || chNext
== '$' || chNext
== '_') {
305 state
= SCE_PL_ARRAY
;
306 } else if (chNext
!= '{' && chNext
!= '[') {
307 styler
.ColourTo(i
, SCE_PL_ARRAY
);
311 styler
.ColourTo(i
, SCE_PL_ARRAY
);
313 } else if (ch
== '%') {
315 styler
.ColourTo(i
- 1, state
);
316 if (isalpha(chNext
) || chNext
== '#' || chNext
== '$' || chNext
== '_') {
318 } else if (chNext
== '{') {
319 styler
.ColourTo(i
, SCE_PL_HASH
);
321 styler
.ColourTo(i
, SCE_PL_OPERATOR
);
323 } else if (ch
== '*') {
324 styler
.ColourTo(i
- 1, state
);
325 state
= SCE_PL_SYMBOLTABLE
;
326 } else if (ch
== '/' && preferRE
) {
327 styler
.ColourTo(i
- 1, state
);
328 state
= SCE_PL_REGEX
;
331 } else if (ch
== '<' && chNext
== '<') {
332 styler
.ColourTo(i
- 1, state
);
333 state
= SCE_PL_HERE_DELIM
;
337 && (isEOLChar(chPrev
))) {
338 styler
.ColourTo(i
- 1, state
);
341 sooked
[sookedpos
] = '\0';
343 && isSingleCharOp(chNext
)
344 && !isalnum((chNext2
= styler
.SafeGetCharAt(i
+2)))) {
345 styler
.ColourTo(i
- 1, state
);
346 styler
.ColourTo(i
+ 1, SCE_PL_WORD
);
347 state
= SCE_PL_DEFAULT
;
351 chNext
= chNext2
= styler
.SafeGetCharAt(i
+ 1);
352 } else if (isPerlOperator(ch
)) {
353 if (ch
== ')' || ch
== ']')
357 styler
.ColourTo(i
- 1, state
);
358 styler
.ColourTo(i
, SCE_PL_OPERATOR
);
360 } else if (state
== SCE_PL_WORD
) {
361 if ((!iswordchar(chNext
) && chNext
!= '\'')
362 || (chNext
== '.' && chNext2
== '.')) {
363 // ".." is always an operator if preceded by a SCE_PL_WORD.
364 // Archaic Perl has quotes inside names
365 if (isMatch(styler
, lengthDoc
, styler
.GetStartSegment(), "__DATA__")) {
366 styler
.ColourTo(i
, SCE_PL_DATASECTION
);
367 state
= SCE_PL_DATASECTION
;
368 } else if (isMatch(styler
, lengthDoc
, styler
.GetStartSegment(), "__END__")) {
369 styler
.ColourTo(i
, SCE_PL_DATASECTION
);
370 state
= SCE_PL_DATASECTION
;
372 if (classifyWordPerl(styler
.GetStartSegment(), i
, keywords
, styler
) == SCE_PL_WORD
)
374 state
= SCE_PL_DEFAULT
;
379 if (state
== SCE_PL_COMMENTLINE
) {
381 styler
.ColourTo(i
- 1, state
);
382 state
= SCE_PL_DEFAULT
;
384 } else if (state
== SCE_PL_HERE_DELIM
) {
386 // From perldata.pod:
387 // ------------------
388 // A line-oriented form of quoting is based on the shell ``here-doc''
390 // Following a << you specify a string to terminate the quoted material,
391 // and all lines following the current line down to the terminating
392 // string are the value of the item.
393 // The terminating string may be either an identifier (a word),
394 // or some quoted text.
395 // If quoted, the type of quotes you use determines the treatment of
396 // the text, just as in regular quoting.
397 // An unquoted identifier works like double quotes.
398 // There must be no space between the << and the identifier.
399 // (If you put a space it will be treated as a null identifier,
400 // which is valid, and matches the first empty line.)
401 // The terminating string must appear by itself (unquoted and with no
402 // surrounding whitespace) on the terminating line.
404 if (HereDoc
.State
== 0) { // '<<' encountered
406 HereDoc
.Quote
= chNext
;
407 HereDoc
.Quoted
= false;
408 HereDoc
.DelimiterLength
= 0;
409 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
410 if (chNext
== '\'' || chNext
== '"' || chNext
== '`') { // a quoted here-doc delimiter
414 HereDoc
.Quoted
= true;
415 } else if (chNext
== '\\') { // ref?
419 } else if (isalnum(chNext
) || chNext
== '_') { // an unquoted here-doc delimiter
421 else if (isspacechar(chNext
)) { // deprecated here-doc delimiter || TODO: left shift operator
426 } else if (HereDoc
.State
== 1) { // collect the delimiter
427 if (HereDoc
.Quoted
) { // a quoted here-doc delimiter
428 if (ch
== HereDoc
.Quote
) { // closing quote => end of delimiter
429 styler
.ColourTo(i
, state
);
430 state
= SCE_PL_DEFAULT
;
435 if (ch
== '\\' && chNext
== HereDoc
.Quote
) { // escaped quote
440 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
++] = ch
;
441 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
443 } else { // an unquoted here-doc delimiter
444 if (isalnum(ch
) || ch
== '_') {
445 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
++] = ch
;
446 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
448 styler
.ColourTo(i
- 1, state
);
449 state
= SCE_PL_DEFAULT
;
452 if (HereDoc
.DelimiterLength
>= static_cast<int>(sizeof(HereDoc
.Delimiter
)) - 1) {
453 styler
.ColourTo(i
- 1, state
);
454 state
= SCE_PL_ERROR
;
457 } else if (HereDoc
.State
== 2) {
458 // state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX
459 if (isEOLChar(chPrev
) && isMatch(styler
, lengthDoc
, i
, HereDoc
.Delimiter
)) {
460 i
+= HereDoc
.DelimiterLength
;
461 chNext
= styler
.SafeGetCharAt(i
);
462 if (isEOLChar(chNext
)) {
463 styler
.ColourTo(i
- 1, state
);
464 state
= SCE_PL_DEFAULT
;
468 chNext
= styler
.SafeGetCharAt(i
+ 1);
470 } else if (state
== SCE_PL_POD
) {
471 if (ch
== '=' && isEOLChar(chPrev
)) {
472 if (isMatch(styler
, lengthDoc
, i
, "=cut")) {
473 styler
.ColourTo(i
- 1 + 4, state
);
475 state
= SCE_PL_DEFAULT
;
476 ch
= styler
.SafeGetCharAt(i
);
477 chNext
= styler
.SafeGetCharAt(i
+ 1);
480 } else if (state
== SCE_PL_SCALAR
) {
482 if (i
== (styler
.GetStartSegment() + 1)) {
483 // Special variable: $(, $_ etc.
484 styler
.ColourTo(i
, state
);
486 styler
.ColourTo(i
- 1, state
);
488 state
= SCE_PL_DEFAULT
;
490 } else if (state
== SCE_PL_ARRAY
) {
492 styler
.ColourTo(i
- 1, state
);
493 state
= SCE_PL_DEFAULT
;
495 } else if (state
== SCE_PL_HASH
) {
497 styler
.ColourTo(i
- 1, state
);
498 state
= SCE_PL_DEFAULT
;
500 } else if (state
== SCE_PL_SYMBOLTABLE
) {
502 styler
.ColourTo(i
- 1, state
);
503 state
= SCE_PL_DEFAULT
;
505 } else if (state
== SCE_PL_REGEX
506 || state
== SCE_PL_STRING_QR
508 if (!Quote
.Up
&& !isspacechar(ch
)) {
510 } else if (ch
== '\\' && Quote
.Up
!= '\\') {
511 // SG: Is it save to skip *every* escaped char?
514 chNext
= styler
.SafeGetCharAt(i
+ 1);
516 if (ch
== Quote
.Down
/*&& chPrev != '\\'*/) {
518 if (Quote
.Count
== 0) {
520 if (Quote
.Up
== Quote
.Down
) {
524 if (!isalpha(chNext
)) {
525 if (Quote
.Rep
<= 0) {
526 styler
.ColourTo(i
, state
);
527 state
= SCE_PL_DEFAULT
;
531 } else if (ch
== Quote
.Up
/*&& chPrev != '\\'*/) {
533 } else if (!isalpha(chNext
)) {
534 if (Quote
.Rep
<= 0) {
535 styler
.ColourTo(i
, state
);
536 state
= SCE_PL_DEFAULT
;
541 } else if (state
== SCE_PL_REGSUBST
) {
542 if (!Quote
.Up
&& !isspacechar(ch
)) {
544 } else if (ch
== '\\' && Quote
.Up
!= '\\') {
545 // SG: Is it save to skip *every* escaped char?
548 chNext
= styler
.SafeGetCharAt(i
+ 1);
550 if (Quote
.Count
== 0 && Quote
.Rep
== 1) {
551 /* We matched something like s(...) or tr{...}
552 * and are looking for the next matcher characters,
553 * which could be either bracketed ({...}) or non-bracketed
556 * Number-signs are problematic. If they occur after
557 * the close of the first part, treat them like
558 * a Quote.Up char, even if they actually start comments.
560 * If we find an alnum, we end the regsubst, and punt.
562 * Eric Promislow ericp@activestate.com Aug 9,2000
564 if (isspacechar(ch
)) {
567 else if (isalnum(ch
)) {
568 styler
.ColourTo(i
, state
);
569 state
= SCE_PL_DEFAULT
;
574 } else if (ch
== Quote
.Down
/*&& chPrev != '\\'*/) {
576 if (Quote
.Count
== 0) {
579 if (!isalpha(chNext
)) {
580 if (Quote
.Rep
<= 0) {
581 styler
.ColourTo(i
, state
);
582 state
= SCE_PL_DEFAULT
;
586 if (Quote
.Up
== Quote
.Down
) {
589 } else if (ch
== Quote
.Up
/*&& chPrev != '\\'*/) {
591 } else if (!isalpha(chNext
)) {
592 if (Quote
.Rep
<= 0) {
593 styler
.ColourTo(i
, state
);
594 state
= SCE_PL_DEFAULT
;
599 } else if (state
== SCE_PL_STRING_Q
600 || state
== SCE_PL_STRING_QQ
601 || state
== SCE_PL_STRING_QX
602 || state
== SCE_PL_STRING_QW
603 || state
== SCE_PL_STRING
604 || state
== SCE_PL_CHARACTER
605 || state
== SCE_PL_BACKTICKS
607 if (!Quote
.Down
&& !isspacechar(ch
)) {
609 } else if (ch
== '\\' && Quote
.Up
!= '\\') {
612 chNext
= styler
.SafeGetCharAt(i
+ 1);
613 } else if (ch
== Quote
.Down
) {
615 if (Quote
.Count
== 0) {
617 if (Quote
.Rep
<= 0) {
618 styler
.ColourTo(i
, state
);
619 state
= SCE_PL_DEFAULT
;
622 if (Quote
.Up
== Quote
.Down
) {
626 } else if (ch
== Quote
.Up
) {
631 if (state
== SCE_PL_DEFAULT
) { // One of the above succeeded
633 state
= SCE_PL_COMMENTLINE
;
634 } else if (ch
== '\"') {
635 state
= SCE_PL_STRING
;
638 } else if (ch
== '\'') {
639 state
= SCE_PL_CHARACTER
;
642 } else if (iswordstart(ch
)) {
645 } else if (isPerlOperator(ch
)) {
646 if (ch
== ')' || ch
== ']')
650 styler
.ColourTo(i
, SCE_PL_OPERATOR
);
654 if (state
== SCE_PL_ERROR
) {
659 styler
.ColourTo(lengthDoc
- 1, state
);
662 static void FoldPerlDoc(unsigned int startPos
, int length
, int, WordList
*[],
664 bool foldComment
= styler
.GetPropertyInt("fold.comment") != 0;
665 bool foldCompact
= styler
.GetPropertyInt("fold.compact", 1) != 0;
666 unsigned int endPos
= startPos
+ length
;
667 int visibleChars
= 0;
668 int lineCurrent
= styler
.GetLine(startPos
);
669 int levelPrev
= styler
.LevelAt(lineCurrent
) & SC_FOLDLEVELNUMBERMASK
;
670 int levelCurrent
= levelPrev
;
671 char chNext
= styler
[startPos
];
672 int styleNext
= styler
.StyleAt(startPos
);
673 for (unsigned int i
= startPos
; i
< endPos
; i
++) {
675 chNext
= styler
.SafeGetCharAt(i
+ 1);
676 int style
= styleNext
;
677 styleNext
= styler
.StyleAt(i
+ 1);
678 bool atEOL
= (ch
== '\r' && chNext
!= '\n') || (ch
== '\n');
679 if (foldComment
&& (style
== SCE_PL_COMMENTLINE
)) {
680 if ((ch
== '/') && (chNext
== '/')) {
681 char chNext2
= styler
.SafeGetCharAt(i
+ 2);
682 if (chNext2
== '{') {
684 } else if (chNext2
== '}') {
689 if (style
== SCE_C_OPERATOR
) {
692 } else if (ch
== '}') {
698 if (visibleChars
== 0 && foldCompact
)
699 lev
|= SC_FOLDLEVELWHITEFLAG
;
700 if ((levelCurrent
> levelPrev
) && (visibleChars
> 0))
701 lev
|= SC_FOLDLEVELHEADERFLAG
;
702 if (lev
!= styler
.LevelAt(lineCurrent
)) {
703 styler
.SetLevel(lineCurrent
, lev
);
706 levelPrev
= levelCurrent
;
709 if (!isspacechar(ch
))
712 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
713 int flagsNext
= styler
.LevelAt(lineCurrent
) & ~SC_FOLDLEVELNUMBERMASK
;
714 styler
.SetLevel(lineCurrent
, levelPrev
| flagsNext
);
717 static const char * const perlWordListDesc
[] = {
722 LexerModule
lmPerl(SCLEX_PERL
, ColourisePerlDoc
, "perl", FoldPerlDoc
, perlWordListDesc
);