]> git.saurik.com Git - wxWidgets.git/blob - contrib/src/stc/scintilla/src/LexPerl.cxx
16a068778e774d06abdc879349c8e43886731e16
[wxWidgets.git] / contrib / src / stc / scintilla / src / LexPerl.cxx
1 // Scintilla source code edit control
2 /** @file LexPerl.cxx
3 ** Lexer for subset of Perl.
4 **/
5 // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my> 20031020
6 // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
7 // The License.txt file describes the conditions under which this software may be distributed.
8
9 #include <stdlib.h>
10 #include <string.h>
11 #include <ctype.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14
15 #include "Platform.h"
16
17 #include "PropSet.h"
18 #include "Accessor.h"
19 #include "KeyWords.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
22
23 #define PERLNUM_DECIMAL 1
24 #define PERLNUM_NON_DEC 2
25 #define PERLNUM_FLOAT 3
26 #define PERLNUM_VECTOR 4
27 #define PERLNUM_V_VECTOR 5
28
29 #define HERE_DELIM_MAX 256
30
31 static inline bool isEOLChar(char ch) {
32 return (ch == '\r') || (ch == '\n');
33 }
34
35 static bool isSingleCharOp(char ch) {
36 char strCharSet[2];
37 strCharSet[0] = ch;
38 strCharSet[1] = '\0';
39 return (NULL != strstr("rwxoRWXOezsfdlpSbctugkTBMAC", strCharSet));
40 }
41
42 static inline bool isPerlOperator(char ch) {
43 if (ch == '%' || ch == '^' || ch == '&' || ch == '*' || ch == '\\' ||
44 ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
45 ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
46 ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
47 ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
48 ch == '?' || ch == '!' || ch == '.' || ch == '~')
49 return true;
50 return false;
51 }
52
53 static int classifyWordPerl(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
54 char s[100];
55 for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) {
56 s[i] = styler[start + i];
57 s[i + 1] = '\0';
58 }
59 char chAttr = SCE_PL_IDENTIFIER;
60 if (keywords.InList(s))
61 chAttr = SCE_PL_WORD;
62 styler.ColourTo(end, chAttr);
63 return chAttr;
64 }
65
66 static inline bool isEndVar(char ch) {
67 return !isalnum(ch) && ch != '#' && ch != '$' &&
68 ch != '_' && ch != '\'';
69 }
70
71 static inline bool isNonQuote(char ch) {
72 return isalnum(ch) || ch == '_';
73 }
74
75 static inline char actualNumStyle(int numberStyle) {
76 switch (numberStyle) {
77 case PERLNUM_VECTOR:
78 case PERLNUM_V_VECTOR:
79 return SCE_PL_STRING;
80 case PERLNUM_DECIMAL:
81 case PERLNUM_NON_DEC:
82 case PERLNUM_FLOAT:
83 default:
84 return SCE_PL_NUMBER;
85 }
86 }
87
88 static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
89 if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
90 return false;
91 }
92 while (*val) {
93 if (*val != styler[pos++]) {
94 return false;
95 }
96 val++;
97 }
98 return true;
99 }
100
101 static char opposite(char ch) {
102 if (ch == '(')
103 return ')';
104 if (ch == '[')
105 return ']';
106 if (ch == '{')
107 return '}';
108 if (ch == '<')
109 return '>';
110 return ch;
111 }
112
113 static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
114 WordList *keywordlists[], Accessor &styler) {
115
116 // Lexer for perl often has to backtrack to start of current style to determine
117 // which characters are being used as quotes, how deeply nested is the
118 // start position and what the termination string is for here documents
119
120 WordList &keywords = *keywordlists[0];
121
122 class HereDocCls {
123 public:
124 int State; // 0: '<<' encountered
125 // 1: collect the delimiter
126 // 2: here doc text (lines after the delimiter)
127 char Quote; // the char after '<<'
128 bool Quoted; // true if Quote in ('\'','"','`')
129 int DelimiterLength; // strlen(Delimiter)
130 char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf
131 HereDocCls() {
132 State = 0;
133 DelimiterLength = 0;
134 Delimiter = new char[HERE_DELIM_MAX];
135 Delimiter[0] = '\0';
136 }
137 ~HereDocCls() {
138 delete []Delimiter;
139 }
140 };
141 HereDocCls HereDoc; // TODO: FIFO for stacked here-docs
142
143 class QuoteCls {
144 public:
145 int Rep;
146 int Count;
147 char Up;
148 char Down;
149 QuoteCls() {
150 this->New(1);
151 }
152 void New(int r) {
153 Rep = r;
154 Count = 0;
155 Up = '\0';
156 Down = '\0';
157 }
158 void Open(char u) {
159 Count++;
160 Up = u;
161 Down = opposite(Up);
162 }
163 };
164 QuoteCls Quote;
165
166 int state = initStyle;
167 char numState = PERLNUM_DECIMAL;
168 int dotCount = 0;
169 unsigned int lengthDoc = startPos + length;
170 //int sookedpos = 0; // these have no apparent use, see POD state
171 //char sooked[100];
172 //sooked[sookedpos] = '\0';
173
174 // If in a long distance lexical state, seek to the beginning to find quote characters
175 // Perl strings can be multi-line with embedded newlines, so backtrack.
176 // Perl numbers have additional state during lexing, so backtrack too.
177 if (state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX) {
178 while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_HERE_DELIM)) {
179 startPos--;
180 }
181 startPos = styler.LineStart(styler.GetLine(startPos));
182 state = styler.StyleAt(startPos - 1);
183 }
184 if ( state == SCE_PL_STRING_Q
185 || state == SCE_PL_STRING_QQ
186 || state == SCE_PL_STRING_QX
187 || state == SCE_PL_STRING_QR
188 || state == SCE_PL_STRING_QW
189 || state == SCE_PL_REGEX
190 || state == SCE_PL_REGSUBST
191 || state == SCE_PL_STRING
192 || state == SCE_PL_BACKTICKS
193 || state == SCE_PL_CHARACTER
194 || state == SCE_PL_NUMBER
195 ) {
196 while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) {
197 startPos--;
198 }
199 state = SCE_PL_DEFAULT;
200 }
201
202 styler.StartAt(startPos);
203 char chPrev = styler.SafeGetCharAt(startPos - 1);
204 if (startPos == 0)
205 chPrev = '\n';
206 char chNext = styler[startPos];
207 styler.StartSegment(startPos);
208
209 for (unsigned int i = startPos; i < lengthDoc; i++) {
210 char ch = chNext;
211 // if the current character is not consumed due to the completion of an
212 // earlier style, lexing can be restarted via a simple goto
213 restartLexer:
214 chNext = styler.SafeGetCharAt(i + 1);
215 char chNext2 = styler.SafeGetCharAt(i + 2);
216
217 if (styler.IsLeadByte(ch)) {
218 chNext = styler.SafeGetCharAt(i + 2);
219 chPrev = ' ';
220 i += 1;
221 continue;
222 }
223 if ((chPrev == '\r' && ch == '\n')) { // skip on DOS/Windows
224 styler.ColourTo(i, state);
225 chPrev = ch;
226 continue;
227 }
228
229 if (HereDoc.State == 1 && isEOLChar(ch)) {
230 // Begin of here-doc (the line after the here-doc delimiter):
231 // Lexically, the here-doc starts from the next line after the >>, but the
232 // first line of here-doc seem to follow the style of the last EOL sequence
233 HereDoc.State = 2;
234 if (HereDoc.Quoted) {
235 if (state == SCE_PL_HERE_DELIM) {
236 // Missing quote at end of string! We are stricter than perl.
237 // Colour here-doc anyway while marking this bit as an error.
238 state = SCE_PL_ERROR;
239 }
240 styler.ColourTo(i - 1, state);
241 switch (HereDoc.Quote) {
242 case '\'':
243 state = SCE_PL_HERE_Q ;
244 break;
245 case '"':
246 state = SCE_PL_HERE_QQ;
247 break;
248 case '`':
249 state = SCE_PL_HERE_QX;
250 break;
251 }
252 } else {
253 styler.ColourTo(i - 1, state);
254 switch (HereDoc.Quote) {
255 case '\\':
256 state = SCE_PL_HERE_Q ;
257 break;
258 default :
259 state = SCE_PL_HERE_QQ;
260 }
261 }
262 }
263
264 if (state == SCE_PL_DEFAULT) {
265 if (isdigit(ch) || (isdigit(chNext) &&
266 (ch == '.' || ch == 'v'))) {
267 state = SCE_PL_NUMBER;
268 numState = PERLNUM_DECIMAL;
269 dotCount = 0;
270 if (ch == '0') { // hex,bin,octal
271 if (chNext == 'x' || chNext == 'b' || isdigit(chNext)) {
272 numState = PERLNUM_NON_DEC;
273 }
274 } else if (ch == 'v') { // vector
275 numState = PERLNUM_V_VECTOR;
276 }
277 } else if (iswordstart(ch)) {
278 if (ch == 's' && !isNonQuote(chNext)) {
279 state = SCE_PL_REGSUBST;
280 Quote.New(2);
281 } else if (ch == 'm' && !isNonQuote(chNext)) {
282 state = SCE_PL_REGEX;
283 Quote.New(1);
284 } else if (ch == 'q' && !isNonQuote(chNext)) {
285 state = SCE_PL_STRING_Q;
286 Quote.New(1);
287 } else if (ch == 'y' && !isNonQuote(chNext)) {
288 state = SCE_PL_REGSUBST;
289 Quote.New(2);
290 } else if (ch == 't' && chNext == 'r' && !isNonQuote(chNext2)) {
291 state = SCE_PL_REGSUBST;
292 Quote.New(2);
293 i++;
294 chNext = chNext2;
295 } else if (ch == 'q' && (chNext == 'q' || chNext == 'r' || chNext == 'w' || chNext == 'x') && !isNonQuote(chNext2)) {
296 if (chNext == 'q') state = SCE_PL_STRING_QQ;
297 else if (chNext == 'x') state = SCE_PL_STRING_QX;
298 else if (chNext == 'r') state = SCE_PL_STRING_QR;
299 else if (chNext == 'w') state = SCE_PL_STRING_QW;
300 i++;
301 chNext = chNext2;
302 Quote.New(1);
303 } else if (ch == 'x' && (chNext == '=' || // repetition
304 (chNext != '_' && !isalnum(chNext)) ||
305 (isdigit(chPrev) && isdigit(chNext)))) {
306 styler.ColourTo(i, SCE_PL_OPERATOR);
307 } else {
308 state = SCE_PL_WORD;
309 if ((!iswordchar(chNext) && chNext != '\'')
310 || (chNext == '.' && chNext2 == '.')) {
311 // We need that if length of word == 1!
312 // This test is copied from the SCE_PL_WORD handler.
313 classifyWordPerl(styler.GetStartSegment(), i, keywords, styler);
314 state = SCE_PL_DEFAULT;
315 }
316 }
317 } else if (ch == '#') {
318 state = SCE_PL_COMMENTLINE;
319 } else if (ch == '\"') {
320 state = SCE_PL_STRING;
321 Quote.New(1);
322 Quote.Open(ch);
323 } else if (ch == '\'') {
324 if (chPrev == '&') {
325 // Archaic call
326 styler.ColourTo(i, state);
327 } else {
328 state = SCE_PL_CHARACTER;
329 Quote.New(1);
330 Quote.Open(ch);
331 }
332 } else if (ch == '`') {
333 state = SCE_PL_BACKTICKS;
334 Quote.New(1);
335 Quote.Open(ch);
336 } else if (ch == '$') {
337 if ((chNext == '{') || isspacechar(chNext)) {
338 styler.ColourTo(i, SCE_PL_SCALAR);
339 } else {
340 state = SCE_PL_SCALAR;
341 if (chNext == '`' && chNext2 == '`') {
342 i += 2;
343 ch = styler.SafeGetCharAt(i);
344 chNext = styler.SafeGetCharAt(i + 1);
345 } else {
346 i++;
347 ch = chNext;
348 chNext = chNext2;
349 }
350 }
351 } else if (ch == '@') {
352 if (isalpha(chNext) || chNext == '#' || chNext == '$'
353 || chNext == '_' || chNext == '+') {
354 state = SCE_PL_ARRAY;
355 } else if (chNext != '{' && chNext != '[') {
356 styler.ColourTo(i, SCE_PL_ARRAY);
357 i++;
358 ch = ' ';
359 } else {
360 styler.ColourTo(i, SCE_PL_ARRAY);
361 }
362 } else if (ch == '%') {
363 if (isalpha(chNext) || chNext == '#' || chNext == '$' || chNext == '_') {
364 state = SCE_PL_HASH;
365 } else if (chNext == '{') {
366 styler.ColourTo(i, SCE_PL_HASH);
367 } else {
368 styler.ColourTo(i, SCE_PL_OPERATOR);
369 }
370 } else if (ch == '*') {
371 if (isalpha(chNext) || chNext == '_' || chNext == '{') {
372 state = SCE_PL_SYMBOLTABLE;
373 } else {
374 if (chNext == '*') { // exponentiation
375 i++;
376 ch = chNext;
377 chNext = chNext2;
378 }
379 styler.ColourTo(i, SCE_PL_OPERATOR);
380 }
381 } else if (ch == '/') {
382 // Explicit backward peeking to set a consistent preferRE for
383 // any slash found, so no longer need to track preferRE state.
384 // Find first previous significant lexed element and interpret.
385 bool preferRE = false;
386 unsigned int bk = (i > 0)? i - 1: 0;
387 char bkch;
388 styler.Flush();
389 while ((bk > 0) && (styler.StyleAt(bk) == SCE_PL_DEFAULT ||
390 styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) {
391 bk--;
392 }
393 if (bk == 0) {
394 preferRE = true;
395 } else {
396 int bkstyle = styler.StyleAt(bk);
397 switch(bkstyle) {
398 case SCE_PL_OPERATOR:
399 preferRE = true;
400 bkch = styler.SafeGetCharAt(bk);
401 if (bkch == ')' || bkch == ']') {
402 preferRE = false;
403 } else if (bkch == '}') {
404 // backtrack further, count balanced brace pairs
405 // if a brace pair found, see if it's a variable
406 int braceCount = 1;
407 while (--bk > 0) {
408 bkstyle = styler.StyleAt(bk);
409 if (bkstyle == SCE_PL_OPERATOR) {
410 bkch = styler.SafeGetCharAt(bk);
411 if (bkch == '}') {
412 braceCount++;
413 } else if (bkch == '{') {
414 if (--braceCount == 0)
415 break;
416 }
417 }
418 }
419 if (bk == 0) {
420 // at beginning, true
421 } else if (braceCount == 0) {
422 // balanced { found, check for variable
423 bkstyle = styler.StyleAt(bk - 1);
424 if (bkstyle == SCE_PL_SCALAR
425 || bkstyle == SCE_PL_ARRAY
426 || bkstyle == SCE_PL_HASH
427 || bkstyle == SCE_PL_SYMBOLTABLE) {
428 preferRE = false;
429 }
430 }
431 }
432 break;
433 // other styles uses the default, preferRE=false
434 case SCE_PL_IDENTIFIER:
435 case SCE_PL_POD:
436 case SCE_PL_WORD:
437 case SCE_PL_HERE_Q:
438 case SCE_PL_HERE_QQ:
439 case SCE_PL_HERE_QX:
440 preferRE = true;
441 break;
442 }
443 }
444 if (preferRE) {
445 state = SCE_PL_REGEX;
446 Quote.New(1);
447 Quote.Open(ch);
448 } else {
449 styler.ColourTo(i, SCE_PL_OPERATOR);
450 }
451 } else if (ch == '<' && chNext == '<') {
452 state = SCE_PL_HERE_DELIM;
453 HereDoc.State = 0;
454 } else if (ch == '=' // POD
455 && isalpha(chNext)
456 && (isEOLChar(chPrev))) {
457 state = SCE_PL_POD;
458 //sookedpos = 0;
459 //sooked[sookedpos] = '\0';
460 } else if (ch == '-' // file test operators
461 && isSingleCharOp(chNext)
462 && !isalnum((chNext2 = styler.SafeGetCharAt(i+2)))) {
463 styler.ColourTo(i + 1, SCE_PL_WORD);
464 state = SCE_PL_DEFAULT;
465 i++;
466 ch = chNext;
467 chNext = chNext2;
468 } else if (isPerlOperator(ch)) {
469 if (ch == '.' && chNext == '.') { // .. and ...
470 i++;
471 if (chNext2 == '.') { i++; }
472 state = SCE_PL_DEFAULT;
473 ch = styler.SafeGetCharAt(i);
474 chNext = styler.SafeGetCharAt(i + 1);
475 }
476 styler.ColourTo(i, SCE_PL_OPERATOR);
477 } else {
478 // keep colouring defaults to make restart easier
479 styler.ColourTo(i, SCE_PL_DEFAULT);
480 }
481 } else if (state == SCE_PL_NUMBER) {
482 if (ch == '.') {
483 if (chNext == '.') {
484 // double dot is always an operator
485 goto numAtEnd;
486 } else if (numState == PERLNUM_NON_DEC || numState == PERLNUM_FLOAT) {
487 // non-decimal number or float exponent, consume next dot
488 styler.ColourTo(i - 1, SCE_PL_NUMBER);
489 styler.ColourTo(i, SCE_PL_OPERATOR);
490 state = SCE_PL_DEFAULT;
491 } else { // decimal or vectors allows dots
492 dotCount++;
493 if (numState == PERLNUM_DECIMAL) {
494 if (dotCount > 1) {
495 if (isdigit(chNext)) { // really a vector
496 numState = PERLNUM_VECTOR;
497 } else // number then dot
498 goto numAtEnd;
499 }
500 } else { // vectors
501 if (!isdigit(chNext)) // vector then dot
502 goto numAtEnd;
503 }
504 }
505 } else if (ch == '_' && numState == PERLNUM_DECIMAL) {
506 if (!isdigit(chNext)) {
507 goto numAtEnd;
508 }
509 } else if (isalnum(ch)) {
510 if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
511 if (isalpha(ch)) {
512 if (dotCount == 0) { // change to word
513 state = SCE_PL_WORD;
514 } else { // vector then word
515 goto numAtEnd;
516 }
517 }
518 } else if (numState == PERLNUM_DECIMAL) {
519 if (ch == 'E' || ch == 'e') { // exponent
520 numState = PERLNUM_FLOAT;
521 if (chNext == '+' || chNext == '-') {
522 i++;
523 ch = chNext;
524 chNext = chNext2;
525 }
526 } else if (!isdigit(ch)) { // number then word
527 goto numAtEnd;
528 }
529 } else if (numState == PERLNUM_FLOAT) {
530 if (!isdigit(ch)) { // float then word
531 goto numAtEnd;
532 }
533 } else {// (numState == PERLNUM_NON_DEC)
534 // allow alphanum for bin,hex,oct for now
535 }
536 } else {
537 // complete current number or vector
538 numAtEnd:
539 styler.ColourTo(i - 1, actualNumStyle(numState));
540 state = SCE_PL_DEFAULT;
541 goto restartLexer;
542 }
543 } else if (state == SCE_PL_WORD) {
544 if ((!iswordchar(chNext) && chNext != '\'')
545 || (chNext == '.' && chNext2 == '.')) {
546 // ".." is always an operator if preceded by a SCE_PL_WORD.
547 // Archaic Perl has quotes inside names
548 if (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__DATA__")
549 || isMatch(styler, lengthDoc, styler.GetStartSegment(), "__END__")) {
550 styler.ColourTo(i, SCE_PL_DATASECTION);
551 state = SCE_PL_DATASECTION;
552 } else {
553 classifyWordPerl(styler.GetStartSegment(), i, keywords, styler);
554 state = SCE_PL_DEFAULT;
555 ch = ' ';
556 }
557 }
558 } else {
559 if (state == SCE_PL_COMMENTLINE) {
560 if (isEOLChar(ch)) {
561 styler.ColourTo(i - 1, state);
562 state = SCE_PL_DEFAULT;
563 goto restartLexer;
564 } else if (isEOLChar(chNext)) {
565 styler.ColourTo(i, state);
566 state = SCE_PL_DEFAULT;
567 }
568 } else if (state == SCE_PL_HERE_DELIM) {
569 //
570 // From perldata.pod:
571 // ------------------
572 // A line-oriented form of quoting is based on the shell ``here-doc''
573 // syntax.
574 // Following a << you specify a string to terminate the quoted material,
575 // and all lines following the current line down to the terminating
576 // string are the value of the item.
577 // The terminating string may be either an identifier (a word),
578 // or some quoted text.
579 // If quoted, the type of quotes you use determines the treatment of
580 // the text, just as in regular quoting.
581 // An unquoted identifier works like double quotes.
582 // There must be no space between the << and the identifier.
583 // (If you put a space it will be treated as a null identifier,
584 // which is valid, and matches the first empty line.)
585 // (This is deprecated, -w warns of this syntax)
586 // The terminating string must appear by itself (unquoted and with no
587 // surrounding whitespace) on the terminating line.
588 //
589 // From Bash info:
590 // ---------------
591 // Specifier format is: <<[-]WORD
592 // Optional '-' is for removal of leading tabs from here-doc.
593 // Whitespace acceptable after <<[-] operator.
594 //
595 if (HereDoc.State == 0) { // '<<' encountered
596 HereDoc.State = 1;
597 HereDoc.Quote = chNext;
598 HereDoc.Quoted = false;
599 HereDoc.DelimiterLength = 0;
600 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
601 if (chNext == '\'' || chNext == '"' || chNext == '`') { // a quoted here-doc delimiter
602 i++;
603 ch = chNext;
604 chNext = chNext2;
605 HereDoc.Quoted = true;
606 } else if (isalpha(chNext) || chNext == '_') {
607 // an unquoted here-doc delimiter, no special handling
608 } else if (isspacechar(chNext) || isdigit(chNext) || chNext == '\\'
609 || chNext == '=' || chNext == '$' || chNext == '@') {
610 // left shift << or <<= operator cases
611 styler.ColourTo(i, SCE_PL_OPERATOR);
612 state = SCE_PL_DEFAULT;
613 HereDoc.State = 0;
614 } else {
615 // symbols terminates; deprecated zero-length delimiter
616 }
617
618 } else if (HereDoc.State == 1) { // collect the delimiter
619 if (HereDoc.Quoted) { // a quoted here-doc delimiter
620 if (ch == HereDoc.Quote) { // closing quote => end of delimiter
621 styler.ColourTo(i, state);
622 state = SCE_PL_DEFAULT;
623 } else {
624 if (ch == '\\' && chNext == HereDoc.Quote) { // escaped quote
625 i++;
626 ch = chNext;
627 chNext = chNext2;
628 }
629 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
630 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
631 }
632 } else { // an unquoted here-doc delimiter
633 if (isalnum(ch) || ch == '_') {
634 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
635 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
636 } else {
637 styler.ColourTo(i - 1, state);
638 state = SCE_PL_DEFAULT;
639 goto restartLexer;
640 }
641 }
642 if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
643 styler.ColourTo(i - 1, state);
644 state = SCE_PL_ERROR;
645 goto restartLexer;
646 }
647 }
648 } else if (HereDoc.State == 2) {
649 // state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX
650 if (isEOLChar(chPrev) && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
651 i += HereDoc.DelimiterLength;
652 chPrev = styler.SafeGetCharAt(i - 1);
653 ch = styler.SafeGetCharAt(i);
654 if (isEOLChar(ch)) {
655 styler.ColourTo(i - 1, state);
656 state = SCE_PL_DEFAULT;
657 HereDoc.State = 0;
658 goto restartLexer;
659 }
660 chNext = styler.SafeGetCharAt(i + 1);
661 }
662 } else if (state == SCE_PL_POD) {
663 if (ch == '=' && isEOLChar(chPrev)) {
664 if (isMatch(styler, lengthDoc, i, "=cut")) {
665 styler.ColourTo(i - 1 + 4, state);
666 i += 4;
667 state = SCE_PL_DEFAULT;
668 ch = styler.SafeGetCharAt(i);
669 //chNext = styler.SafeGetCharAt(i + 1);
670 goto restartLexer;
671 }
672 }
673 } else if (state == SCE_PL_SCALAR // variable names
674 || state == SCE_PL_ARRAY
675 || state == SCE_PL_HASH
676 || state == SCE_PL_SYMBOLTABLE) {
677 if (ch == ':' && chNext == ':') { // skip ::
678 i++;
679 ch = chNext;
680 chNext = chNext2;
681 }
682 else if (isEndVar(ch)) {
683 if ((state == SCE_PL_SCALAR || state == SCE_PL_ARRAY)
684 && i == (styler.GetStartSegment() + 1)) {
685 // Special variable: $(, $_ etc.
686 styler.ColourTo(i, state);
687 state = SCE_PL_DEFAULT;
688 } else {
689 styler.ColourTo(i - 1, state);
690 state = SCE_PL_DEFAULT;
691 goto restartLexer;
692 }
693 }
694 } else if (state == SCE_PL_REGEX
695 || state == SCE_PL_STRING_QR
696 ) {
697 if (!Quote.Up && !isspacechar(ch)) {
698 Quote.Open(ch);
699 } else if (ch == '\\' && Quote.Up != '\\') {
700 // SG: Is it save to skip *every* escaped char?
701 i++;
702 ch = chNext;
703 chNext = styler.SafeGetCharAt(i + 1);
704 } else {
705 if (ch == Quote.Down /*&& chPrev != '\\'*/) {
706 Quote.Count--;
707 if (Quote.Count == 0) {
708 Quote.Rep--;
709 if (Quote.Up == Quote.Down) {
710 Quote.Count++;
711 }
712 }
713 if (!isalpha(chNext)) {
714 if (Quote.Rep <= 0) {
715 styler.ColourTo(i, state);
716 state = SCE_PL_DEFAULT;
717 ch = ' ';
718 }
719 }
720 } else if (ch == Quote.Up /*&& chPrev != '\\'*/) {
721 Quote.Count++;
722 } else if (!isalpha(chNext)) {
723 if (Quote.Rep <= 0) {
724 styler.ColourTo(i, state);
725 state = SCE_PL_DEFAULT;
726 ch = ' ';
727 }
728 }
729 }
730 } else if (state == SCE_PL_REGSUBST) {
731 if (!Quote.Up && !isspacechar(ch)) {
732 Quote.Open(ch);
733 } else if (ch == '\\' && Quote.Up != '\\') {
734 // SG: Is it save to skip *every* escaped char?
735 i++;
736 ch = chNext;
737 chNext = styler.SafeGetCharAt(i + 1);
738 } else {
739 if (Quote.Count == 0 && Quote.Rep == 1) {
740 /* We matched something like s(...) or tr{...}
741 * and are looking for the next matcher characters,
742 * which could be either bracketed ({...}) or non-bracketed
743 * (/.../).
744 *
745 * Number-signs are problematic. If they occur after
746 * the close of the first part, treat them like
747 * a Quote.Up char, even if they actually start comments.
748 *
749 * If we find an alnum, we end the regsubst, and punt.
750 *
751 * Eric Promislow ericp@activestate.com Aug 9,2000
752 */
753 if (isspacechar(ch)) {
754 // Keep going
755 }
756 else if (isalnum(ch)) {
757 styler.ColourTo(i, state);
758 state = SCE_PL_DEFAULT;
759 ch = ' ';
760 } else {
761 Quote.Open(ch);
762 }
763 } else if (ch == Quote.Down /*&& chPrev != '\\'*/) {
764 Quote.Count--;
765 if (Quote.Count == 0) {
766 Quote.Rep--;
767 }
768 if (!isalpha(chNext)) {
769 if (Quote.Rep <= 0) {
770 styler.ColourTo(i, state);
771 state = SCE_PL_DEFAULT;
772 ch = ' ';
773 }
774 }
775 if (Quote.Up == Quote.Down) {
776 Quote.Count++;
777 }
778 } else if (ch == Quote.Up /*&& chPrev != '\\'*/) {
779 Quote.Count++;
780 } else if (!isalpha(chNext)) {
781 if (Quote.Rep <= 0) {
782 styler.ColourTo(i, state);
783 state = SCE_PL_DEFAULT;
784 ch = ' ';
785 }
786 }
787 }
788 } else if (state == SCE_PL_STRING_Q
789 || state == SCE_PL_STRING_QQ
790 || state == SCE_PL_STRING_QX
791 || state == SCE_PL_STRING_QW
792 || state == SCE_PL_STRING
793 || state == SCE_PL_CHARACTER
794 || state == SCE_PL_BACKTICKS
795 ) {
796 if (!Quote.Down && !isspacechar(ch)) {
797 Quote.Open(ch);
798 } else if (ch == '\\' && Quote.Up != '\\') {
799 i++;
800 ch = chNext;
801 chNext = styler.SafeGetCharAt(i + 1);
802 } else if (ch == Quote.Down) {
803 Quote.Count--;
804 if (Quote.Count == 0) {
805 Quote.Rep--;
806 if (Quote.Rep <= 0) {
807 styler.ColourTo(i, state);
808 state = SCE_PL_DEFAULT;
809 ch = ' ';
810 }
811 if (Quote.Up == Quote.Down) {
812 Quote.Count++;
813 }
814 }
815 } else if (ch == Quote.Up) {
816 Quote.Count++;
817 }
818 }
819 }
820 if (state == SCE_PL_ERROR) {
821 break;
822 }
823 chPrev = ch;
824 }
825 styler.ColourTo(lengthDoc - 1, state);
826 }
827
828 static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[],
829 Accessor &styler) {
830 bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
831 bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
832 unsigned int endPos = startPos + length;
833 int visibleChars = 0;
834 int lineCurrent = styler.GetLine(startPos);
835 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
836 int levelCurrent = levelPrev;
837 char chNext = styler[startPos];
838 int styleNext = styler.StyleAt(startPos);
839 for (unsigned int i = startPos; i < endPos; i++) {
840 char ch = chNext;
841 chNext = styler.SafeGetCharAt(i + 1);
842 int style = styleNext;
843 styleNext = styler.StyleAt(i + 1);
844 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
845 if (foldComment && (style == SCE_PL_COMMENTLINE)) {
846 if ((ch == '/') && (chNext == '/')) {
847 char chNext2 = styler.SafeGetCharAt(i + 2);
848 if (chNext2 == '{') {
849 levelCurrent++;
850 } else if (chNext2 == '}') {
851 levelCurrent--;
852 }
853 }
854 }
855 if (style == SCE_C_OPERATOR) {
856 if (ch == '{') {
857 levelCurrent++;
858 } else if (ch == '}') {
859 levelCurrent--;
860 }
861 }
862 if (atEOL) {
863 int lev = levelPrev;
864 if (visibleChars == 0 && foldCompact)
865 lev |= SC_FOLDLEVELWHITEFLAG;
866 if ((levelCurrent > levelPrev) && (visibleChars > 0))
867 lev |= SC_FOLDLEVELHEADERFLAG;
868 if (lev != styler.LevelAt(lineCurrent)) {
869 styler.SetLevel(lineCurrent, lev);
870 }
871 lineCurrent++;
872 levelPrev = levelCurrent;
873 visibleChars = 0;
874 }
875 if (!isspacechar(ch))
876 visibleChars++;
877 }
878 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
879 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
880 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
881 }
882
883 static const char * const perlWordListDesc[] = {
884 "Keywords",
885 0
886 };
887
888 LexerModule lmPerl(SCLEX_PERL, ColourisePerlDoc, "perl", FoldPerlDoc, perlWordListDesc);