]> git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/src/LexPerl.cxx
Update Scintilla to version 1.75
[wxWidgets.git] / src / stc / scintilla / src / LexPerl.cxx
1 // Scintilla source code edit control
2 /** @file LexPerl.cxx
3 ** Lexer for subset of Perl.
4 **/
5 // Copyright 1998-2007 by Neil Hodgson <neilh@scintilla.org>
6 // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
7 // The License.txt file describes the conditions under which this software may be distributed.
8
9 #include <stdlib.h>
10 #include <string.h>
11 #include <ctype.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14
15 #include "Platform.h"
16
17 #include "PropSet.h"
18 #include "Accessor.h"
19 #include "KeyWords.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
22
23 #ifdef SCI_NAMESPACE
24 using namespace Scintilla;
25 #endif
26
27 #define PERLNUM_BINARY 1 // order is significant: 1-4 cannot have a dot
28 #define PERLNUM_HEX 2
29 #define PERLNUM_OCTAL 3
30 #define PERLNUM_FLOAT 4 // actually exponent part
31 #define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
32 #define PERLNUM_VECTOR 6
33 #define PERLNUM_V_VECTOR 7
34 #define PERLNUM_BAD 8
35
36 #define BACK_NONE 0 // lookback state for bareword disambiguation:
37 #define BACK_OPERATOR 1 // whitespace/comments are insignificant
38 #define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation
39
40 #define HERE_DELIM_MAX 256
41
42 static inline bool isEOLChar(char ch) {
43 return (ch == '\r') || (ch == '\n');
44 }
45
46 static bool isSingleCharOp(char ch) {
47 char strCharSet[2];
48 strCharSet[0] = ch;
49 strCharSet[1] = '\0';
50 return (NULL != strstr("rwxoRWXOezsfdlpSbctugkTBMAC", strCharSet));
51 }
52
53 static inline bool isPerlOperator(char ch) {
54 if (ch == '^' || ch == '&' || ch == '\\' ||
55 ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
56 ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
57 ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
58 ch == '>' || ch == ',' ||
59 ch == '?' || ch == '!' || ch == '.' || ch == '~')
60 return true;
61 // these chars are already tested before this call
62 // ch == '%' || ch == '*' || ch == '<' || ch == '/' ||
63 return false;
64 }
65
66 static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
67 char s[100];
68 unsigned int i, len = end - start;
69 if (len > 30) { len = 30; }
70 for (i = 0; i < len; i++, start++) s[i] = styler[start];
71 s[i] = '\0';
72 return keywords.InList(s);
73 }
74
75 // Note: as lexer uses chars, UTF-8 bytes are considered as <0 values
76 // Note: iswordchar() was used in only one place in LexPerl, it is
77 // unnecessary as '.' is processed as the concatenation operator, so
78 // only isWordStart() is used in LexPerl
79
80 static inline bool isWordStart(char ch) {
81 return !isascii(ch) || isalnum(ch) || ch == '_';
82 }
83
84 static inline bool isEndVar(char ch) {
85 return isascii(ch) && !isalnum(ch) && ch != '#' && ch != '$' &&
86 ch != '_' && ch != '\'';
87 }
88
89 static inline bool isNonQuote(char ch) {
90 return !isascii(ch) || isalnum(ch) || ch == '_';
91 }
92
93 static inline char actualNumStyle(int numberStyle) {
94 if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {
95 return SCE_PL_STRING;
96 } else if (numberStyle == PERLNUM_BAD) {
97 return SCE_PL_ERROR;
98 }
99 return SCE_PL_NUMBER;
100 }
101
102 static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
103 if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
104 return false;
105 }
106 while (*val) {
107 if (*val != styler[pos++]) {
108 return false;
109 }
110 val++;
111 }
112 return true;
113 }
114
115 static char opposite(char ch) {
116 if (ch == '(')
117 return ')';
118 if (ch == '[')
119 return ']';
120 if (ch == '{')
121 return '}';
122 if (ch == '<')
123 return '>';
124 return ch;
125 }
126
127 static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
128 WordList *keywordlists[], Accessor &styler) {
129
130 // Lexer for perl often has to backtrack to start of current style to determine
131 // which characters are being used as quotes, how deeply nested is the
132 // start position and what the termination string is for here documents
133
134 WordList &keywords = *keywordlists[0];
135
136 // keywords that forces /PATTERN/ at all times
137 WordList reWords;
138 reWords.Set("elsif if split while");
139
140 class HereDocCls {
141 public:
142 int State; // 0: '<<' encountered
143 // 1: collect the delimiter
144 // 2: here doc text (lines after the delimiter)
145 char Quote; // the char after '<<'
146 bool Quoted; // true if Quote in ('\'','"','`')
147 int DelimiterLength; // strlen(Delimiter)
148 char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf
149 HereDocCls() {
150 State = 0;
151 Quote = 0;
152 Quoted = false;
153 DelimiterLength = 0;
154 Delimiter = new char[HERE_DELIM_MAX];
155 Delimiter[0] = '\0';
156 }
157 ~HereDocCls() {
158 delete []Delimiter;
159 }
160 };
161 HereDocCls HereDoc; // TODO: FIFO for stacked here-docs
162
163 class QuoteCls {
164 public:
165 int Rep;
166 int Count;
167 char Up;
168 char Down;
169 QuoteCls() {
170 this->New(1);
171 }
172 void New(int r) {
173 Rep = r;
174 Count = 0;
175 Up = '\0';
176 Down = '\0';
177 }
178 void Open(char u) {
179 Count++;
180 Up = u;
181 Down = opposite(Up);
182 }
183 };
184 QuoteCls Quote;
185
186 int state = initStyle;
187 char numState = PERLNUM_DECIMAL;
188 int dotCount = 0;
189 unsigned int lengthDoc = startPos + length;
190 //int sookedpos = 0; // these have no apparent use, see POD state
191 //char sooked[100];
192 //sooked[sookedpos] = '\0';
193
194 styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
195 // If in a long distance lexical state, seek to the beginning to find quote characters
196 // Perl strings can be multi-line with embedded newlines, so backtrack.
197 // Perl numbers have additional state during lexing, so backtrack too.
198 if (state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX) {
199 while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_HERE_DELIM)) {
200 startPos--;
201 }
202 startPos = styler.LineStart(styler.GetLine(startPos));
203 state = styler.StyleAt(startPos - 1);
204 }
205 // Backtrack for format body.
206 if (state == SCE_PL_FORMAT) {
207 while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_FORMAT_IDENT)) {
208 startPos--;
209 }
210 startPos = styler.LineStart(styler.GetLine(startPos));
211 state = styler.StyleAt(startPos - 1);
212 }
213 if ( state == SCE_PL_STRING_Q
214 || state == SCE_PL_STRING_QQ
215 || state == SCE_PL_STRING_QX
216 || state == SCE_PL_STRING_QR
217 || state == SCE_PL_STRING_QW
218 || state == SCE_PL_REGEX
219 || state == SCE_PL_REGSUBST
220 || state == SCE_PL_STRING
221 || state == SCE_PL_BACKTICKS
222 || state == SCE_PL_CHARACTER
223 || state == SCE_PL_NUMBER
224 || state == SCE_PL_IDENTIFIER
225 || state == SCE_PL_ERROR
226 || state == SCE_PL_SUB_PROTOTYPE
227 ) {
228 while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) {
229 startPos--;
230 }
231 state = SCE_PL_DEFAULT;
232 }
233
234 // lookback at start of lexing to set proper state for backflag
235 // after this, they are updated when elements are lexed
236 int backflag = BACK_NONE;
237 unsigned int backPos = startPos;
238 if (backPos > 0) {
239 backPos--;
240 int sty = SCE_PL_DEFAULT;
241 while ((backPos > 0) && (sty = styler.StyleAt(backPos),
242 sty == SCE_PL_DEFAULT || sty == SCE_PL_COMMENTLINE))
243 backPos--;
244 if (sty == SCE_PL_OPERATOR)
245 backflag = BACK_OPERATOR;
246 else if (sty == SCE_PL_WORD)
247 backflag = BACK_KEYWORD;
248 }
249
250 styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
251 char chPrev = styler.SafeGetCharAt(startPos - 1);
252 if (startPos == 0)
253 chPrev = '\n';
254 char chNext = styler[startPos];
255 styler.StartSegment(startPos);
256
257 for (unsigned int i = startPos; i < lengthDoc; i++) {
258 char ch = chNext;
259 // if the current character is not consumed due to the completion of an
260 // earlier style, lexing can be restarted via a simple goto
261 restartLexer:
262 chNext = styler.SafeGetCharAt(i + 1);
263 char chNext2 = styler.SafeGetCharAt(i + 2);
264
265 if (styler.IsLeadByte(ch)) {
266 chNext = styler.SafeGetCharAt(i + 2);
267 chPrev = ' ';
268 i += 1;
269 continue;
270 }
271 if ((chPrev == '\r' && ch == '\n')) { // skip on DOS/Windows
272 styler.ColourTo(i, state);
273 chPrev = ch;
274 continue;
275 }
276
277 if (HereDoc.State == 1 && isEOLChar(ch)) {
278 // Begin of here-doc (the line after the here-doc delimiter):
279 // Lexically, the here-doc starts from the next line after the >>, but the
280 // first line of here-doc seem to follow the style of the last EOL sequence
281 HereDoc.State = 2;
282 if (HereDoc.Quoted) {
283 if (state == SCE_PL_HERE_DELIM) {
284 // Missing quote at end of string! We are stricter than perl.
285 // Colour here-doc anyway while marking this bit as an error.
286 state = SCE_PL_ERROR;
287 }
288 styler.ColourTo(i - 1, state);
289 switch (HereDoc.Quote) {
290 case '\'':
291 state = SCE_PL_HERE_Q ;
292 break;
293 case '"':
294 state = SCE_PL_HERE_QQ;
295 break;
296 case '`':
297 state = SCE_PL_HERE_QX;
298 break;
299 }
300 } else {
301 styler.ColourTo(i - 1, state);
302 switch (HereDoc.Quote) {
303 case '\\':
304 state = SCE_PL_HERE_Q ;
305 break;
306 default :
307 state = SCE_PL_HERE_QQ;
308 }
309 }
310 }
311 if (HereDoc.State == 4 && isEOLChar(ch)) {
312 // Start of format body.
313 HereDoc.State = 0;
314 styler.ColourTo(i - 1, state);
315 state = SCE_PL_FORMAT;
316 }
317
318 if (state == SCE_PL_DEFAULT) {
319 if ((isascii(ch) && isdigit(ch)) || (isascii(chNext) && isdigit(chNext) &&
320 (ch == '.' || ch == 'v'))) {
321 state = SCE_PL_NUMBER;
322 backflag = BACK_NONE;
323 numState = PERLNUM_DECIMAL;
324 dotCount = 0;
325 if (ch == '0') { // hex,bin,octal
326 if (chNext == 'x') {
327 numState = PERLNUM_HEX;
328 } else if (chNext == 'b') {
329 numState = PERLNUM_BINARY;
330 } else if (isascii(chNext) && isdigit(chNext)) {
331 numState = PERLNUM_OCTAL;
332 }
333 if (numState != PERLNUM_DECIMAL) {
334 i++;
335 ch = chNext;
336 chNext = chNext2;
337 }
338 } else if (ch == 'v') { // vector
339 numState = PERLNUM_V_VECTOR;
340 }
341 } else if (isWordStart(ch)) {
342 // if immediately prefixed by '::', always a bareword
343 state = SCE_PL_WORD;
344 if (chPrev == ':' && styler.SafeGetCharAt(i - 2) == ':') {
345 state = SCE_PL_IDENTIFIER;
346 }
347 unsigned int kw = i + 1;
348 // first check for possible quote-like delimiter
349 if (ch == 's' && !isNonQuote(chNext)) {
350 state = SCE_PL_REGSUBST;
351 Quote.New(2);
352 } else if (ch == 'm' && !isNonQuote(chNext)) {
353 state = SCE_PL_REGEX;
354 Quote.New(1);
355 } else if (ch == 'q' && !isNonQuote(chNext)) {
356 state = SCE_PL_STRING_Q;
357 Quote.New(1);
358 } else if (ch == 'y' && !isNonQuote(chNext)) {
359 state = SCE_PL_REGSUBST;
360 Quote.New(2);
361 } else if (ch == 't' && chNext == 'r' && !isNonQuote(chNext2)) {
362 state = SCE_PL_REGSUBST;
363 Quote.New(2);
364 kw++;
365 } else if (ch == 'q' && (chNext == 'q' || chNext == 'r' || chNext == 'w' || chNext == 'x') && !isNonQuote(chNext2)) {
366 if (chNext == 'q') state = SCE_PL_STRING_QQ;
367 else if (chNext == 'x') state = SCE_PL_STRING_QX;
368 else if (chNext == 'r') state = SCE_PL_STRING_QR;
369 else if (chNext == 'w') state = SCE_PL_STRING_QW;
370 Quote.New(1);
371 kw++;
372 } else if (ch == 'x' && (chNext == '=' || // repetition
373 !isWordStart(chNext) ||
374 (isdigit(chPrev) && isdigit(chNext)))) {
375 state = SCE_PL_OPERATOR;
376 }
377 // if potentially a keyword, scan forward and grab word, then check
378 // if it's really one; if yes, disambiguation test is performed
379 // otherwise it is always a bareword and we skip a lot of scanning
380 // note: keywords assumed to be limited to [_a-zA-Z] only
381 if (state == SCE_PL_WORD) {
382 while (isWordStart(styler.SafeGetCharAt(kw))) kw++;
383 if (!isPerlKeyword(styler.GetStartSegment(), kw, keywords, styler)) {
384 state = SCE_PL_IDENTIFIER;
385 }
386 }
387 // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
388 // for quote-like delimiters/keywords, attempt to disambiguate
389 // to select for bareword, change state -> SCE_PL_IDENTIFIER
390 if (state != SCE_PL_IDENTIFIER && i > 0) {
391 unsigned int j = i;
392 bool moreback = false; // true if passed newline/comments
393 bool brace = false; // true if opening brace found
394 char ch2;
395 // first look backwards past whitespace/comments for EOLs
396 // if BACK_NONE, neither operator nor keyword, so skip test
397 if (backflag != BACK_NONE) {
398 while (--j > backPos) {
399 if (isEOLChar(styler.SafeGetCharAt(j)))
400 moreback = true;
401 }
402 ch2 = styler.SafeGetCharAt(j);
403 if (ch2 == '{' && !moreback) {
404 // {bareword: possible variable spec
405 brace = true;
406 } else if ((ch2 == '&' && styler.SafeGetCharAt(j - 1) != '&')
407 // &bareword: subroutine call
408 || (ch2 == '>' && styler.SafeGetCharAt(j - 1) == '-')
409 // ->bareword: part of variable spec
410 || (ch2 == 'b' && styler.Match(j - 2, "su"))) {
411 // sub bareword: subroutine declaration
412 // (implied BACK_KEYWORD, no keywords end in 'sub'!)
413 state = SCE_PL_IDENTIFIER;
414 }
415 // if status still ambiguous, look forward after word past
416 // tabs/spaces only; if ch2 isn't one of '[{(,' it can never
417 // match anything, so skip the whole thing
418 j = kw;
419 if (state != SCE_PL_IDENTIFIER
420 && (ch2 == '{' || ch2 == '(' || ch2 == '['|| ch2 == ',')
421 && kw < lengthDoc) {
422 while (ch2 = styler.SafeGetCharAt(j),
423 (ch2 == ' ' || ch2 == '\t') && j < lengthDoc) {
424 j++;
425 }
426 if ((ch2 == '}' && brace)
427 // {bareword}: variable spec
428 || (ch2 == '=' && styler.SafeGetCharAt(j + 1) == '>')) {
429 // [{(, bareword=>: hash literal
430 state = SCE_PL_IDENTIFIER;
431 }
432 }
433 }
434 }
435 backflag = BACK_NONE;
436 // an identifier or bareword
437 if (state == SCE_PL_IDENTIFIER) {
438 if ((!isWordStart(chNext) && chNext != '\'')
439 || (chNext == '.' && chNext2 == '.')) {
440 // We need that if length of word == 1!
441 // This test is copied from the SCE_PL_WORD handler.
442 styler.ColourTo(i, SCE_PL_IDENTIFIER);
443 state = SCE_PL_DEFAULT;
444 }
445 // a keyword
446 } else if (state == SCE_PL_WORD) {
447 i = kw - 1;
448 if (ch == '_' && chNext == '_' &&
449 (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__DATA__")
450 || isMatch(styler, lengthDoc, styler.GetStartSegment(), "__END__"))) {
451 styler.ColourTo(i, SCE_PL_DATASECTION);
452 state = SCE_PL_DATASECTION;
453 } else {
454 if (isMatch(styler, lengthDoc, styler.GetStartSegment(), "format")) {
455 state = SCE_PL_FORMAT_IDENT;
456 HereDoc.State = 0;
457 } else {
458 state = SCE_PL_DEFAULT;
459 }
460 styler.ColourTo(i, SCE_PL_WORD);
461 backflag = BACK_KEYWORD;
462 backPos = i;
463 }
464 ch = styler.SafeGetCharAt(i);
465 chNext = styler.SafeGetCharAt(i + 1);
466 // a repetition operator 'x'
467 } else if (state == SCE_PL_OPERATOR) {
468 state = SCE_PL_DEFAULT;
469 goto handleOperator;
470 // quote-like delimiter, skip one char if double-char delimiter
471 } else {
472 i = kw - 1;
473 chNext = styler.SafeGetCharAt(i + 1);
474 }
475 } else if (ch == '#') {
476 state = SCE_PL_COMMENTLINE;
477 } else if (ch == '\"') {
478 state = SCE_PL_STRING;
479 Quote.New(1);
480 Quote.Open(ch);
481 backflag = BACK_NONE;
482 } else if (ch == '\'') {
483 if (chPrev == '&') {
484 // Archaic call
485 styler.ColourTo(i, state);
486 } else {
487 state = SCE_PL_CHARACTER;
488 Quote.New(1);
489 Quote.Open(ch);
490 }
491 backflag = BACK_NONE;
492 } else if (ch == '`') {
493 state = SCE_PL_BACKTICKS;
494 Quote.New(1);
495 Quote.Open(ch);
496 backflag = BACK_NONE;
497 } else if (ch == '$') {
498 if ((chNext == '{') || isspacechar(chNext)) {
499 styler.ColourTo(i, SCE_PL_SCALAR);
500 } else {
501 state = SCE_PL_SCALAR;
502 if ((chNext == '`' && chNext2 == '`')
503 || (chNext == ':' && chNext2 == ':')) {
504 i += 2;
505 ch = styler.SafeGetCharAt(i);
506 chNext = styler.SafeGetCharAt(i + 1);
507 } else {
508 i++;
509 ch = chNext;
510 chNext = chNext2;
511 }
512 }
513 backflag = BACK_NONE;
514 } else if (ch == '@') {
515 if (!isascii(chNext) || isalpha(chNext) || chNext == '#' || chNext == '$'
516 || chNext == '_' || chNext == '+' || chNext == '-') {
517 state = SCE_PL_ARRAY;
518 } else if (chNext == ':' && chNext2 == ':') {
519 state = SCE_PL_ARRAY;
520 i += 2;
521 ch = styler.SafeGetCharAt(i);
522 chNext = styler.SafeGetCharAt(i + 1);
523 } else if (chNext != '{' && chNext != '[') {
524 styler.ColourTo(i, SCE_PL_ARRAY);
525 } else {
526 styler.ColourTo(i, SCE_PL_ARRAY);
527 }
528 backflag = BACK_NONE;
529 } else if (ch == '%') {
530 backflag = BACK_NONE;
531 if (!isascii(chNext) || isalpha(chNext) || chNext == '#' || chNext == '$'
532 || chNext == '_' || chNext == '!' || chNext == '^') {
533 state = SCE_PL_HASH;
534 i++;
535 ch = chNext;
536 chNext = chNext2;
537 } else if (chNext == ':' && chNext2 == ':') {
538 state = SCE_PL_HASH;
539 i += 2;
540 ch = styler.SafeGetCharAt(i);
541 chNext = styler.SafeGetCharAt(i + 1);
542 } else if (chNext == '{') {
543 styler.ColourTo(i, SCE_PL_HASH);
544 } else {
545 goto handleOperator;
546 }
547 } else if (ch == '*') {
548 backflag = BACK_NONE;
549 char strch[2];
550 strch[0] = chNext;
551 strch[1] = '\0';
552 if (chNext == ':' && chNext2 == ':') {
553 state = SCE_PL_SYMBOLTABLE;
554 i += 2;
555 ch = styler.SafeGetCharAt(i);
556 chNext = styler.SafeGetCharAt(i + 1);
557 } else if (!isascii(chNext) || isalpha(chNext) || chNext == '_'
558 || NULL != strstr("^/|,\\\";#%^:?<>)[]", strch)) {
559 state = SCE_PL_SYMBOLTABLE;
560 i++;
561 ch = chNext;
562 chNext = chNext2;
563 } else if (chNext == '{') {
564 styler.ColourTo(i, SCE_PL_SYMBOLTABLE);
565 } else {
566 if (chNext == '*') { // exponentiation
567 i++;
568 ch = chNext;
569 chNext = chNext2;
570 }
571 goto handleOperator;
572 }
573 } else if (ch == '/' || (ch == '<' && chNext == '<')) {
574 // Explicit backward peeking to set a consistent preferRE for
575 // any slash found, so no longer need to track preferRE state.
576 // Find first previous significant lexed element and interpret.
577 // Test for HERE doc start '<<' shares this code, helps to
578 // determine if it should be an operator.
579 bool preferRE = false;
580 bool isHereDoc = (ch == '<');
581 bool hereDocSpace = false; // these are for corner case:
582 bool hereDocScalar = false; // SCALAR [whitespace] '<<'
583 unsigned int bk = (i > 0)? i - 1: 0;
584 unsigned int bkend;
585 char bkch;
586 styler.Flush();
587 if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
588 hereDocSpace = true;
589 while ((bk > 0) && (styler.StyleAt(bk) == SCE_PL_DEFAULT ||
590 styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) {
591 bk--;
592 }
593 if (bk == 0) {
594 // position 0 won't really be checked; rarely happens
595 // hard to fix due to an unsigned index i
596 preferRE = true;
597 } else {
598 int bkstyle = styler.StyleAt(bk);
599 bkch = styler.SafeGetCharAt(bk);
600 switch(bkstyle) {
601 case SCE_PL_OPERATOR:
602 preferRE = true;
603 if (bkch == ')' || bkch == ']') {
604 preferRE = false;
605 } else if (bkch == '}') {
606 // backtrack further, count balanced brace pairs
607 // if a brace pair found, see if it's a variable
608 int braceCount = 1;
609 while (--bk > 0) {
610 bkstyle = styler.StyleAt(bk);
611 if (bkstyle == SCE_PL_OPERATOR) {
612 bkch = styler.SafeGetCharAt(bk);
613 if (bkch == ';') { // early out
614 break;
615 } else if (bkch == '}') {
616 braceCount++;
617 } else if (bkch == '{') {
618 if (--braceCount == 0)
619 break;
620 }
621 }
622 }
623 if (bk == 0) {
624 // at beginning, true
625 } else if (braceCount == 0) {
626 // balanced { found, bk>0, skip more whitespace
627 if (styler.StyleAt(--bk) == SCE_PL_DEFAULT) {
628 while (bk > 0) {
629 bkstyle = styler.StyleAt(--bk);
630 if (bkstyle != SCE_PL_DEFAULT)
631 break;
632 }
633 }
634 bkstyle = styler.StyleAt(bk);
635 if (bkstyle == SCE_PL_SCALAR
636 || bkstyle == SCE_PL_ARRAY
637 || bkstyle == SCE_PL_HASH
638 || bkstyle == SCE_PL_SYMBOLTABLE
639 || bkstyle == SCE_PL_OPERATOR) {
640 preferRE = false;
641 }
642 }
643 }
644 break;
645 case SCE_PL_IDENTIFIER:
646 preferRE = true;
647 if (bkch == '>') { // inputsymbol
648 preferRE = false;
649 break;
650 }
651 // backtrack to find "->" or "::" before identifier
652 while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
653 bk--;
654 }
655 while (bk > 0) {
656 bkstyle = styler.StyleAt(bk);
657 if (bkstyle == SCE_PL_DEFAULT ||
658 bkstyle == SCE_PL_COMMENTLINE) {
659 } else if (bkstyle == SCE_PL_OPERATOR) {
660 bkch = styler.SafeGetCharAt(bk);
661 // test for "->" and "::"
662 if ((bkch == '>' && styler.SafeGetCharAt(bk - 1) == '-')
663 || (bkch == ':' && styler.SafeGetCharAt(bk - 1) == ':')) {
664 preferRE = false;
665 break;
666 }
667 } else {
668 // bare identifier, if '/', /PATTERN/ unless digit/space immediately after '/'
669 if (!isHereDoc &&
670 (isspacechar(chNext) || isdigit(chNext)))
671 preferRE = false;
672 // HERE docs cannot have a space after the >>
673 if (isspacechar(chNext))
674 preferRE = false;
675 break;
676 }
677 bk--;
678 }
679 break;
680 case SCE_PL_SCALAR: // for $var<< case
681 hereDocScalar = true;
682 break;
683 // for HERE docs, always true for preferRE
684 case SCE_PL_WORD:
685 preferRE = true;
686 if (isHereDoc)
687 break;
688 // adopt heuristics similar to vim-style rules:
689 // keywords always forced as /PATTERN/: split, if, elsif, while
690 // everything else /PATTERN/ unless digit/space immediately after '/'
691 bkend = bk + 1;
692 while (bk > 0 && styler.StyleAt(bk-1) == SCE_PL_WORD) {
693 bk--;
694 }
695 if (isPerlKeyword(bk, bkend, reWords, styler))
696 break;
697 if (isspacechar(chNext) || isdigit(chNext))
698 preferRE = false;
699 break;
700 // other styles uses the default, preferRE=false
701 case SCE_PL_POD:
702 case SCE_PL_POD_VERB:
703 case SCE_PL_HERE_Q:
704 case SCE_PL_HERE_QQ:
705 case SCE_PL_HERE_QX:
706 preferRE = true;
707 break;
708 }
709 }
710 backflag = BACK_NONE;
711 if (isHereDoc) { // handle HERE doc
712 // if SCALAR whitespace '<<', *always* a HERE doc
713 if (preferRE || (hereDocSpace && hereDocScalar)) {
714 state = SCE_PL_HERE_DELIM;
715 HereDoc.State = 0;
716 } else { // << operator
717 i++;
718 ch = chNext;
719 chNext = chNext2;
720 goto handleOperator;
721 }
722 } else { // handle regexp
723 if (preferRE) {
724 state = SCE_PL_REGEX;
725 Quote.New(1);
726 Quote.Open(ch);
727 } else { // / operator
728 goto handleOperator;
729 }
730 }
731 } else if (ch == '<') {
732 // looks forward for matching > on same line
733 unsigned int fw = i + 1;
734 while (fw < lengthDoc) {
735 char fwch = styler.SafeGetCharAt(fw);
736 if (fwch == ' ') {
737 if (styler.SafeGetCharAt(fw-1) != '\\' ||
738 styler.SafeGetCharAt(fw-2) != '\\')
739 goto handleOperator;
740 } else if (isEOLChar(fwch) || isspacechar(fwch)) {
741 goto handleOperator;
742 } else if (fwch == '>') {
743 if ((fw - i) == 2 && // '<=>' case
744 styler.SafeGetCharAt(fw-1) == '=') {
745 goto handleOperator;
746 }
747 styler.ColourTo(fw, SCE_PL_IDENTIFIER);
748 i = fw;
749 ch = fwch;
750 chNext = styler.SafeGetCharAt(i+1);
751 }
752 fw++;
753 }
754 if (fw == lengthDoc)
755 goto handleOperator;
756 } else if (ch == '=' // POD
757 && isalpha(chNext)
758 && (isEOLChar(chPrev))) {
759 state = SCE_PL_POD;
760 backflag = BACK_NONE;
761 //sookedpos = 0;
762 //sooked[sookedpos] = '\0';
763 } else if (ch == '-' // file test operators
764 && isSingleCharOp(chNext)
765 && !isalnum((chNext2 = styler.SafeGetCharAt(i+2)))) {
766 styler.ColourTo(i + 1, SCE_PL_WORD);
767 state = SCE_PL_DEFAULT;
768 i++;
769 ch = chNext;
770 chNext = chNext2;
771 backflag = BACK_NONE;
772 } else if (ch == '-' // bareword promotion (-FOO cases)
773 && ((isascii(chNext) && isalpha(chNext)) || chNext == '_')
774 && backflag != BACK_NONE) {
775 state = SCE_PL_IDENTIFIER;
776 backflag = BACK_NONE;
777 } else if (ch == '(' && i > 0) {
778 // backtrack to identify if we're starting a sub prototype
779 // for generality, we need to ignore whitespace/comments
780 unsigned int bk = i - 1; // i > 0 tested above
781 styler.Flush();
782 while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_DEFAULT ||
783 styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) {
784 bk--;
785 }
786 if (bk == 0 || styler.StyleAt(bk) != SCE_PL_IDENTIFIER) // check identifier
787 goto handleOperator;
788 while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_IDENTIFIER)) {
789 bk--;
790 }
791 while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_DEFAULT ||
792 styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) {
793 bk--;
794 }
795 if (bk < 2 || styler.StyleAt(bk) != SCE_PL_WORD // check "sub" keyword
796 || !styler.Match(bk - 2, "sub")) // assume suffix is unique!
797 goto handleOperator;
798 state = SCE_PL_SUB_PROTOTYPE;
799 backflag = BACK_NONE;
800 backPos = i; // needed for restart
801 } else if (isPerlOperator(ch)) {
802 if (ch == '.' && chNext == '.') { // .. and ...
803 i++;
804 if (chNext2 == '.') { i++; }
805 state = SCE_PL_DEFAULT;
806 ch = styler.SafeGetCharAt(i);
807 chNext = styler.SafeGetCharAt(i + 1);
808 }
809 handleOperator:
810 styler.ColourTo(i, SCE_PL_OPERATOR);
811 backflag = BACK_OPERATOR;
812 backPos = i;
813 } else if (ch == 4 || ch == 26) { // ^D and ^Z ends valid perl source
814 styler.ColourTo(i, SCE_PL_DATASECTION);
815 state = SCE_PL_DATASECTION;
816 } else {
817 // keep colouring defaults to make restart easier
818 styler.ColourTo(i, SCE_PL_DEFAULT);
819 }
820 } else if (state == SCE_PL_NUMBER) {
821 if (ch == '.') {
822 if (chNext == '.') {
823 // double dot is always an operator
824 goto numAtEnd;
825 } else if (numState <= PERLNUM_FLOAT) {
826 // non-decimal number or float exponent, consume next dot
827 styler.ColourTo(i - 1, SCE_PL_NUMBER);
828 state = SCE_PL_DEFAULT;
829 goto handleOperator;
830 } else { // decimal or vectors allows dots
831 dotCount++;
832 if (numState == PERLNUM_DECIMAL) {
833 if (dotCount > 1) {
834 if (isdigit(chNext)) { // really a vector
835 numState = PERLNUM_VECTOR;
836 } else // number then dot
837 goto numAtEnd;
838 }
839 } else { // vectors
840 if (!isdigit(chNext)) // vector then dot
841 goto numAtEnd;
842 }
843 }
844 } else if (ch == '_') {
845 // permissive underscoring for number and vector literals
846 } else if (!isascii(ch) || isalnum(ch)) {
847 if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
848 if (!isascii(ch) || isalpha(ch)) {
849 if (dotCount == 0) { // change to word
850 state = SCE_PL_IDENTIFIER;
851 } else { // vector then word
852 goto numAtEnd;
853 }
854 }
855 } else if (numState == PERLNUM_DECIMAL) {
856 if (ch == 'E' || ch == 'e') { // exponent
857 numState = PERLNUM_FLOAT;
858 if (chNext == '+' || chNext == '-') {
859 i++;
860 ch = chNext;
861 chNext = chNext2;
862 }
863 } else if (!isascii(ch) || !isdigit(ch)) { // number then word
864 goto numAtEnd;
865 }
866 } else if (numState == PERLNUM_FLOAT) {
867 if (!isdigit(ch)) { // float then word
868 goto numAtEnd;
869 }
870 } else if (numState == PERLNUM_OCTAL) {
871 if (!isdigit(ch))
872 goto numAtEnd;
873 else if (ch > '7')
874 numState = PERLNUM_BAD;
875 } else if (numState == PERLNUM_BINARY) {
876 if (!isdigit(ch))
877 goto numAtEnd;
878 else if (ch > '1')
879 numState = PERLNUM_BAD;
880 } else if (numState == PERLNUM_HEX) {
881 int ch2 = toupper(ch);
882 if (!isdigit(ch) && !(ch2 >= 'A' && ch2 <= 'F'))
883 goto numAtEnd;
884 } else {//(numState == PERLNUM_BAD) {
885 if (!isdigit(ch))
886 goto numAtEnd;
887 }
888 } else {
889 // complete current number or vector
890 numAtEnd:
891 styler.ColourTo(i - 1, actualNumStyle(numState));
892 state = SCE_PL_DEFAULT;
893 goto restartLexer;
894 }
895 } else if (state == SCE_PL_IDENTIFIER) {
896 if (!isWordStart(chNext) && chNext != '\'') {
897 styler.ColourTo(i, SCE_PL_IDENTIFIER);
898 state = SCE_PL_DEFAULT;
899 ch = ' ';
900 }
901 } else {
902 if (state == SCE_PL_COMMENTLINE) {
903 if (isEOLChar(ch)) {
904 styler.ColourTo(i - 1, state);
905 state = SCE_PL_DEFAULT;
906 goto restartLexer;
907 } else if (isEOLChar(chNext)) {
908 styler.ColourTo(i, state);
909 state = SCE_PL_DEFAULT;
910 }
911 } else if (state == SCE_PL_HERE_DELIM) {
912 //
913 // From perldata.pod:
914 // ------------------
915 // A line-oriented form of quoting is based on the shell ``here-doc''
916 // syntax.
917 // Following a << you specify a string to terminate the quoted material,
918 // and all lines following the current line down to the terminating
919 // string are the value of the item.
920 // The terminating string may be either an identifier (a word),
921 // or some quoted text.
922 // If quoted, the type of quotes you use determines the treatment of
923 // the text, just as in regular quoting.
924 // An unquoted identifier works like double quotes.
925 // There must be no space between the << and the identifier.
926 // (If you put a space it will be treated as a null identifier,
927 // which is valid, and matches the first empty line.)
928 // (This is deprecated, -w warns of this syntax)
929 // The terminating string must appear by itself (unquoted and with no
930 // surrounding whitespace) on the terminating line.
931 //
932 // From Bash info:
933 // ---------------
934 // Specifier format is: <<[-]WORD
935 // Optional '-' is for removal of leading tabs from here-doc.
936 // Whitespace acceptable after <<[-] operator.
937 //
938 if (HereDoc.State == 0) { // '<<' encountered
939 bool gotspace = false;
940 unsigned int oldi = i;
941 if (chNext == ' ' || chNext == '\t') {
942 // skip whitespace; legal for quoted delimiters
943 gotspace = true;
944 do {
945 i++;
946 chNext = styler.SafeGetCharAt(i + 1);
947 } while ((i + 1 < lengthDoc) && (chNext == ' ' || chNext == '\t'));
948 chNext2 = styler.SafeGetCharAt(i + 2);
949 }
950 HereDoc.State = 1;
951 HereDoc.Quote = chNext;
952 HereDoc.Quoted = false;
953 HereDoc.DelimiterLength = 0;
954 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
955 if (chNext == '\'' || chNext == '"' || chNext == '`') {
956 // a quoted here-doc delimiter
957 i++;
958 ch = chNext;
959 chNext = chNext2;
960 HereDoc.Quoted = true;
961 } else if (isspacechar(chNext) || isdigit(chNext) || chNext == '\\'
962 || chNext == '=' || chNext == '$' || chNext == '@'
963 || ((isalpha(chNext) || chNext == '_') && gotspace)) {
964 // left shift << or <<= operator cases
965 // restore position if operator
966 i = oldi;
967 styler.ColourTo(i, SCE_PL_OPERATOR);
968 state = SCE_PL_DEFAULT;
969 backflag = BACK_OPERATOR;
970 backPos = i;
971 HereDoc.State = 0;
972 goto restartLexer;
973 } else {
974 // an unquoted here-doc delimiter, no special handling
975 // (cannot be prefixed by spaces/tabs), or
976 // symbols terminates; deprecated zero-length delimiter
977 }
978
979 } else if (HereDoc.State == 1) { // collect the delimiter
980 backflag = BACK_NONE;
981 if (HereDoc.Quoted) { // a quoted here-doc delimiter
982 if (ch == HereDoc.Quote) { // closing quote => end of delimiter
983 styler.ColourTo(i, state);
984 state = SCE_PL_DEFAULT;
985 } else {
986 if (ch == '\\' && chNext == HereDoc.Quote) { // escaped quote
987 i++;
988 ch = chNext;
989 chNext = chNext2;
990 }
991 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
992 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
993 }
994 } else { // an unquoted here-doc delimiter
995 if (isalnum(ch) || ch == '_') {
996 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
997 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
998 } else {
999 styler.ColourTo(i - 1, state);
1000 state = SCE_PL_DEFAULT;
1001 goto restartLexer;
1002 }
1003 }
1004 if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
1005 styler.ColourTo(i - 1, state);
1006 state = SCE_PL_ERROR;
1007 goto restartLexer;
1008 }
1009 }
1010 } else if (HereDoc.State == 2) {
1011 // state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX
1012 if (isEOLChar(chPrev) && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
1013 i += HereDoc.DelimiterLength;
1014 chPrev = styler.SafeGetCharAt(i - 1);
1015 ch = styler.SafeGetCharAt(i);
1016 if (isEOLChar(ch)) {
1017 styler.ColourTo(i - 1, state);
1018 state = SCE_PL_DEFAULT;
1019 backflag = BACK_NONE;
1020 HereDoc.State = 0;
1021 goto restartLexer;
1022 }
1023 chNext = styler.SafeGetCharAt(i + 1);
1024 }
1025 } else if (state == SCE_PL_POD
1026 || state == SCE_PL_POD_VERB) {
1027 if (isEOLChar(chPrev)) {
1028 if (ch == ' ' || ch == '\t') {
1029 styler.ColourTo(i - 1, state);
1030 state = SCE_PL_POD_VERB;
1031 } else {
1032 styler.ColourTo(i - 1, state);
1033 state = SCE_PL_POD;
1034 if (ch == '=') {
1035 if (isMatch(styler, lengthDoc, i, "=cut")) {
1036 styler.ColourTo(i - 1 + 4, state);
1037 i += 4;
1038 state = SCE_PL_DEFAULT;
1039 ch = styler.SafeGetCharAt(i);
1040 //chNext = styler.SafeGetCharAt(i + 1);
1041 goto restartLexer;
1042 }
1043 }
1044 }
1045 }
1046 } else if (state == SCE_PL_SCALAR // variable names
1047 || state == SCE_PL_ARRAY
1048 || state == SCE_PL_HASH
1049 || state == SCE_PL_SYMBOLTABLE) {
1050 if (ch == ':' && chNext == ':') { // skip ::
1051 i++;
1052 ch = chNext;
1053 chNext = chNext2;
1054 }
1055 else if (isEndVar(ch)) {
1056 if (i == (styler.GetStartSegment() + 1)) {
1057 // Special variable: $(, $_ etc.
1058 styler.ColourTo(i, state);
1059 state = SCE_PL_DEFAULT;
1060 } else {
1061 styler.ColourTo(i - 1, state);
1062 state = SCE_PL_DEFAULT;
1063 goto restartLexer;
1064 }
1065 }
1066 } else if (state == SCE_PL_REGEX
1067 || state == SCE_PL_STRING_QR
1068 ) {
1069 if (!Quote.Up && !isspacechar(ch)) {
1070 Quote.Open(ch);
1071 } else if (ch == '\\' && Quote.Up != '\\') {
1072 // SG: Is it save to skip *every* escaped char?
1073 i++;
1074 ch = chNext;
1075 chNext = styler.SafeGetCharAt(i + 1);
1076 } else {
1077 if (ch == Quote.Down /*&& chPrev != '\\'*/) {
1078 Quote.Count--;
1079 if (Quote.Count == 0) {
1080 Quote.Rep--;
1081 if (Quote.Up == Quote.Down) {
1082 Quote.Count++;
1083 }
1084 }
1085 if (!isalpha(chNext)) {
1086 if (Quote.Rep <= 0) {
1087 styler.ColourTo(i, state);
1088 state = SCE_PL_DEFAULT;
1089 ch = ' ';
1090 }
1091 }
1092 } else if (ch == Quote.Up /*&& chPrev != '\\'*/) {
1093 Quote.Count++;
1094 } else if (!isascii(chNext) || !isalpha(chNext)) {
1095 if (Quote.Rep <= 0) {
1096 styler.ColourTo(i, state);
1097 state = SCE_PL_DEFAULT;
1098 ch = ' ';
1099 }
1100 }
1101 }
1102 } else if (state == SCE_PL_REGSUBST) {
1103 if (!Quote.Up && !isspacechar(ch)) {
1104 Quote.Open(ch);
1105 } else if (ch == '\\' && Quote.Up != '\\') {
1106 // SG: Is it save to skip *every* escaped char?
1107 i++;
1108 ch = chNext;
1109 chNext = styler.SafeGetCharAt(i + 1);
1110 } else {
1111 if (Quote.Count == 0 && Quote.Rep == 1) {
1112 /* We matched something like s(...) or tr{...}
1113 * and are looking for the next matcher characters,
1114 * which could be either bracketed ({...}) or non-bracketed
1115 * (/.../).
1116 *
1117 * Number-signs are problematic. If they occur after
1118 * the close of the first part, treat them like
1119 * a Quote.Up char, even if they actually start comments.
1120 *
1121 * If we find an alnum, we end the regsubst, and punt.
1122 *
1123 * Eric Promislow ericp@activestate.com Aug 9,2000
1124 */
1125 if (isspacechar(ch)) {
1126 // Keep going
1127 }
1128 else if (!isascii(ch) || isalnum(ch)) {
1129 styler.ColourTo(i, state);
1130 state = SCE_PL_DEFAULT;
1131 ch = ' ';
1132 } else {
1133 Quote.Open(ch);
1134 }
1135 } else if (ch == Quote.Down /*&& chPrev != '\\'*/) {
1136 Quote.Count--;
1137 if (Quote.Count == 0) {
1138 Quote.Rep--;
1139 }
1140 if (!isascii(chNext) || !isalpha(chNext)) {
1141 if (Quote.Rep <= 0) {
1142 styler.ColourTo(i, state);
1143 state = SCE_PL_DEFAULT;
1144 ch = ' ';
1145 }
1146 }
1147 if (Quote.Up == Quote.Down) {
1148 Quote.Count++;
1149 }
1150 } else if (ch == Quote.Up /*&& chPrev != '\\'*/) {
1151 Quote.Count++;
1152 } else if (!isascii(chNext) || !isalpha(chNext)) {
1153 if (Quote.Rep <= 0) {
1154 styler.ColourTo(i, state);
1155 state = SCE_PL_DEFAULT;
1156 ch = ' ';
1157 }
1158 }
1159 }
1160 } else if (state == SCE_PL_STRING_Q
1161 || state == SCE_PL_STRING_QQ
1162 || state == SCE_PL_STRING_QX
1163 || state == SCE_PL_STRING_QW
1164 || state == SCE_PL_STRING
1165 || state == SCE_PL_CHARACTER
1166 || state == SCE_PL_BACKTICKS
1167 ) {
1168 if (!Quote.Down && !isspacechar(ch)) {
1169 Quote.Open(ch);
1170 } else if (ch == '\\' && Quote.Up != '\\') {
1171 i++;
1172 ch = chNext;
1173 chNext = styler.SafeGetCharAt(i + 1);
1174 } else if (ch == Quote.Down) {
1175 Quote.Count--;
1176 if (Quote.Count == 0) {
1177 Quote.Rep--;
1178 if (Quote.Rep <= 0) {
1179 styler.ColourTo(i, state);
1180 state = SCE_PL_DEFAULT;
1181 ch = ' ';
1182 }
1183 if (Quote.Up == Quote.Down) {
1184 Quote.Count++;
1185 }
1186 }
1187 } else if (ch == Quote.Up) {
1188 Quote.Count++;
1189 }
1190 } else if (state == SCE_PL_SUB_PROTOTYPE) {
1191 char strch[2];
1192 strch[0] = ch;
1193 strch[1] = '\0';
1194 if (NULL != strstr("\\[$@%&*];", strch)) {
1195 // keep going
1196 } else if (ch == ')') {
1197 styler.ColourTo(i, state);
1198 state = SCE_PL_DEFAULT;
1199 } else {
1200 // abandon prototype, restart from '('
1201 i = backPos;
1202 styler.ColourTo(i, SCE_PL_OPERATOR);
1203 ch = styler.SafeGetCharAt(i);
1204 chNext = styler.SafeGetCharAt(i + 1);
1205 state = SCE_PL_DEFAULT;
1206 }
1207 } else if (state == SCE_PL_FORMAT_IDENT) {
1208 // occupies different HereDoc states to avoid clashing with HERE docs
1209 if (HereDoc.State == 0) {
1210 if ((isascii(ch) && isalpha(ch)) || ch == '_' // probable identifier
1211 || ch == '=') { // no identifier
1212 HereDoc.State = 3;
1213 HereDoc.Quoted = false; // whitespace flag
1214 } else if (ch == ' ' || ch == '\t') {
1215 styler.ColourTo(i, SCE_PL_DEFAULT);
1216 } else {
1217 state = SCE_PL_DEFAULT;
1218 HereDoc.State = 0;
1219 goto restartLexer;
1220 }
1221 }
1222 if (HereDoc.State == 3) { // with just a '=', state goes 0->3->4
1223 if (ch == '=') {
1224 styler.ColourTo(i, SCE_PL_FORMAT_IDENT);
1225 state = SCE_PL_DEFAULT;
1226 HereDoc.State = 4;
1227 } else if (ch == ' ' || ch == '\t') {
1228 HereDoc.Quoted = true;
1229 } else if (isEOLChar(ch) || (HereDoc.Quoted && ch != '=')) {
1230 // abandon format, restart from after 'format'
1231 i = backPos + 1;
1232 ch = styler.SafeGetCharAt(i);
1233 chNext = styler.SafeGetCharAt(i + 1);
1234 state = SCE_PL_DEFAULT;
1235 HereDoc.State = 0;
1236 }
1237 }
1238 } else if (state == SCE_PL_FORMAT) {
1239 if (isEOLChar(chPrev)) {
1240 styler.ColourTo(i - 1, state);
1241 if (ch == '.' && isEOLChar(chNext)) {
1242 styler.ColourTo(i, state);
1243 state = SCE_PL_DEFAULT;
1244 }
1245 }
1246 }
1247 }
1248 if (state == SCE_PL_ERROR) {
1249 break;
1250 }
1251 chPrev = ch;
1252 }
1253 styler.ColourTo(lengthDoc - 1, state);
1254 }
1255
1256 static bool IsCommentLine(int line, Accessor &styler) {
1257 int pos = styler.LineStart(line);
1258 int eol_pos = styler.LineStart(line + 1) - 1;
1259 for (int i = pos; i < eol_pos; i++) {
1260 char ch = styler[i];
1261 int style = styler.StyleAt(i);
1262 if (ch == '#' && style == SCE_PL_COMMENTLINE)
1263 return true;
1264 else if (ch != ' ' && ch != '\t')
1265 return false;
1266 }
1267 return false;
1268 }
1269
1270 static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[],
1271 Accessor &styler) {
1272 bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
1273 bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
1274 // Custom folding of POD and packages
1275 bool foldPOD = styler.GetPropertyInt("fold.perl.pod", 1) != 0;
1276 bool foldPackage = styler.GetPropertyInt("fold.perl.package", 1) != 0;
1277 unsigned int endPos = startPos + length;
1278 int visibleChars = 0;
1279 int lineCurrent = styler.GetLine(startPos);
1280 int levelPrev = SC_FOLDLEVELBASE;
1281 if (lineCurrent > 0)
1282 levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
1283 int levelCurrent = levelPrev;
1284 char chNext = styler[startPos];
1285 char chPrev = styler.SafeGetCharAt(startPos - 1);
1286 int styleNext = styler.StyleAt(startPos);
1287 // Used at end of line to determine if the line was a package definition
1288 bool isPackageLine = false;
1289 bool isPodHeading = false;
1290 for (unsigned int i = startPos; i < endPos; i++) {
1291 char ch = chNext;
1292 chNext = styler.SafeGetCharAt(i + 1);
1293 int style = styleNext;
1294 styleNext = styler.StyleAt(i + 1);
1295 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1296 bool atLineStart = isEOLChar(chPrev) || i == 0;
1297 // Comment folding
1298 if (foldComment && atEOL && IsCommentLine(lineCurrent, styler))
1299 {
1300 if (!IsCommentLine(lineCurrent - 1, styler)
1301 && IsCommentLine(lineCurrent + 1, styler))
1302 levelCurrent++;
1303 else if (IsCommentLine(lineCurrent - 1, styler)
1304 && !IsCommentLine(lineCurrent+1, styler))
1305 levelCurrent--;
1306 }
1307 if (style == SCE_C_OPERATOR) {
1308 if (ch == '{') {
1309 levelCurrent++;
1310 } else if (ch == '}') {
1311 levelCurrent--;
1312 }
1313 }
1314 // Custom POD folding
1315 if (foldPOD && atLineStart) {
1316 int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT;
1317 if (style == SCE_PL_POD) {
1318 if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
1319 levelCurrent++;
1320 else if (styler.Match(i, "=cut"))
1321 levelCurrent--;
1322 else if (styler.Match(i, "=head"))
1323 isPodHeading = true;
1324 } else if (style == SCE_PL_DATASECTION) {
1325 if (ch == '=' && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
1326 levelCurrent++;
1327 else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
1328 levelCurrent--;
1329 else if (styler.Match(i, "=head"))
1330 isPodHeading = true;
1331 // if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1332 // reset needed as level test is vs. SC_FOLDLEVELBASE
1333 else if (styler.Match(i, "__END__"))
1334 levelCurrent = SC_FOLDLEVELBASE;
1335 }
1336 }
1337 // Custom package folding
1338 if (foldPackage && atLineStart) {
1339 if (style == SCE_PL_WORD && styler.Match(i, "package")) {
1340 isPackageLine = true;
1341 }
1342 }
1343
1344 if (atEOL) {
1345 int lev = levelPrev;
1346 if (isPodHeading) {
1347 lev = levelPrev - 1;
1348 lev |= SC_FOLDLEVELHEADERFLAG;
1349 isPodHeading = false;
1350 }
1351 // Check if line was a package declaration
1352 // because packages need "special" treatment
1353 if (isPackageLine) {
1354 lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
1355 levelCurrent = SC_FOLDLEVELBASE + 1;
1356 isPackageLine = false;
1357 }
1358 lev |= levelCurrent << 16;
1359 if (visibleChars == 0 && foldCompact)
1360 lev |= SC_FOLDLEVELWHITEFLAG;
1361 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1362 lev |= SC_FOLDLEVELHEADERFLAG;
1363 if (lev != styler.LevelAt(lineCurrent)) {
1364 styler.SetLevel(lineCurrent, lev);
1365 }
1366 lineCurrent++;
1367 levelPrev = levelCurrent;
1368 visibleChars = 0;
1369 }
1370 if (!isspacechar(ch))
1371 visibleChars++;
1372 chPrev = ch;
1373 }
1374 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1375 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1376 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1377 }
1378
1379 static const char * const perlWordListDesc[] = {
1380 "Keywords",
1381 0
1382 };
1383
1384 LexerModule lmPerl(SCLEX_PERL, ColourisePerlDoc, "perl", FoldPerlDoc, perlWordListDesc, 8);
1385