]>
Commit | Line | Data |
---|---|---|
65ec6247 RD |
1 | // Scintilla source code edit control |
2 | /** @file LexPerl.cxx | |
3 | ** Lexer for subset of Perl. | |
4 | **/ | |
1e9bafca RD |
5 | // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org> |
6 | // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my> | |
f6bcfd97 BP |
7 | // The License.txt file describes the conditions under which this software may be distributed. |
8 | ||
65ec6247 RD |
9 | #include <stdlib.h> |
10 | #include <string.h> | |
11 | #include <ctype.h> | |
12 | #include <stdio.h> | |
13 | #include <stdarg.h> | |
f6bcfd97 BP |
14 | |
15 | #include "Platform.h" | |
16 | ||
17 | #include "PropSet.h" | |
18 | #include "Accessor.h" | |
19 | #include "KeyWords.h" | |
20 | #include "Scintilla.h" | |
21 | #include "SciLexer.h" | |
22 | ||
1e9bafca RD |
23 | #define PERLNUM_BINARY 1 // order is significant: 1-4 cannot have a dot |
24 | #define PERLNUM_HEX 2 | |
25 | #define PERLNUM_OCTAL 3 | |
26 | #define PERLNUM_FLOAT 4 // actually exponent part | |
27 | #define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings | |
28 | #define PERLNUM_VECTOR 6 | |
29 | #define PERLNUM_V_VECTOR 7 | |
30 | #define PERLNUM_BAD 8 | |
31 | ||
32 | #define BACK_NONE 0 // lookback state for bareword disambiguation: | |
33 | #define BACK_OPERATOR 1 // whitespace/comments are insignificant | |
34 | #define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation | |
8e54aaed RD |
35 | |
36 | #define HERE_DELIM_MAX 256 | |
37 | ||
65ec6247 RD |
38 | static inline bool isEOLChar(char ch) { |
39 | return (ch == '\r') || (ch == '\n'); | |
40 | } | |
41 | ||
42 | static bool isSingleCharOp(char ch) { | |
43 | char strCharSet[2]; | |
44 | strCharSet[0] = ch; | |
45 | strCharSet[1] = '\0'; | |
46 | return (NULL != strstr("rwxoRWXOezsfdlpSbctugkTBMAC", strCharSet)); | |
47 | } | |
48 | ||
49 | static inline bool isPerlOperator(char ch) { | |
591d01be | 50 | if (ch == '^' || ch == '&' || ch == '\\' || |
f6bcfd97 BP |
51 | ch == '(' || ch == ')' || ch == '-' || ch == '+' || |
52 | ch == '=' || ch == '|' || ch == '{' || ch == '}' || | |
53 | ch == '[' || ch == ']' || ch == ':' || ch == ';' || | |
1e9bafca | 54 | ch == '>' || ch == ',' || |
f6bcfd97 BP |
55 | ch == '?' || ch == '!' || ch == '.' || ch == '~') |
56 | return true; | |
591d01be RD |
57 | // these chars are already tested before this call |
58 | // ch == '%' || ch == '*' || ch == '<' || ch == '/' || | |
f6bcfd97 BP |
59 | return false; |
60 | } | |
61 | ||
1e9bafca | 62 | static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) { |
f6bcfd97 | 63 | char s[100]; |
1e9bafca RD |
64 | unsigned int i, len = end - start; |
65 | if (len > 30) { len = 30; } | |
66 | for (i = 0; i < len; i++, start++) s[i] = styler[start]; | |
67 | s[i] = '\0'; | |
68 | return keywords.InList(s); | |
f6bcfd97 BP |
69 | } |
70 | ||
65ec6247 | 71 | static inline bool isEndVar(char ch) { |
f6bcfd97 BP |
72 | return !isalnum(ch) && ch != '#' && ch != '$' && |
73 | ch != '_' && ch != '\''; | |
74 | } | |
75 | ||
591d01be | 76 | |
8e54aaed RD |
77 | static inline bool isNonQuote(char ch) { |
78 | return isalnum(ch) || ch == '_'; | |
79 | } | |
80 | ||
81 | static inline char actualNumStyle(int numberStyle) { | |
1e9bafca RD |
82 | if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) { |
83 | return SCE_PL_STRING; | |
84 | } else if (numberStyle == PERLNUM_BAD) { | |
85 | return SCE_PL_ERROR; | |
86 | } | |
87 | return SCE_PL_NUMBER; | |
8e54aaed RD |
88 | } |
89 | ||
f6bcfd97 BP |
90 | static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) { |
91 | if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) { | |
92 | return false; | |
93 | } | |
94 | while (*val) { | |
95 | if (*val != styler[pos++]) { | |
96 | return false; | |
97 | } | |
98 | val++; | |
99 | } | |
100 | return true; | |
101 | } | |
102 | ||
103 | static char opposite(char ch) { | |
104 | if (ch == '(') | |
105 | return ')'; | |
106 | if (ch == '[') | |
107 | return ']'; | |
108 | if (ch == '{') | |
109 | return '}'; | |
110 | if (ch == '<') | |
111 | return '>'; | |
112 | return ch; | |
113 | } | |
114 | ||
115 | static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle, | |
116 | WordList *keywordlists[], Accessor &styler) { | |
117 | ||
118 | // Lexer for perl often has to backtrack to start of current style to determine | |
119 | // which characters are being used as quotes, how deeply nested is the | |
120 | // start position and what the termination string is for here documents | |
65ec6247 | 121 | |
f6bcfd97 | 122 | WordList &keywords = *keywordlists[0]; |
65ec6247 RD |
123 | |
124 | class HereDocCls { | |
125 | public: | |
126 | int State; // 0: '<<' encountered | |
127 | // 1: collect the delimiter | |
128 | // 2: here doc text (lines after the delimiter) | |
129 | char Quote; // the char after '<<' | |
130 | bool Quoted; // true if Quote in ('\'','"','`') | |
131 | int DelimiterLength; // strlen(Delimiter) | |
8e54aaed | 132 | char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf |
65ec6247 RD |
133 | HereDocCls() { |
134 | State = 0; | |
1e9bafca RD |
135 | Quote = 0; |
136 | Quoted = false; | |
65ec6247 | 137 | DelimiterLength = 0; |
8e54aaed | 138 | Delimiter = new char[HERE_DELIM_MAX]; |
65ec6247 RD |
139 | Delimiter[0] = '\0'; |
140 | } | |
8e54aaed RD |
141 | ~HereDocCls() { |
142 | delete []Delimiter; | |
143 | } | |
65ec6247 RD |
144 | }; |
145 | HereDocCls HereDoc; // TODO: FIFO for stacked here-docs | |
146 | ||
147 | class QuoteCls { | |
148 | public: | |
149 | int Rep; | |
150 | int Count; | |
151 | char Up; | |
152 | char Down; | |
153 | QuoteCls() { | |
154 | this->New(1); | |
155 | } | |
156 | void New(int r) { | |
157 | Rep = r; | |
158 | Count = 0; | |
159 | Up = '\0'; | |
160 | Down = '\0'; | |
161 | } | |
162 | void Open(char u) { | |
163 | Count++; | |
164 | Up = u; | |
165 | Down = opposite(Up); | |
166 | } | |
167 | }; | |
168 | QuoteCls Quote; | |
169 | ||
f6bcfd97 | 170 | int state = initStyle; |
8e54aaed RD |
171 | char numState = PERLNUM_DECIMAL; |
172 | int dotCount = 0; | |
65ec6247 | 173 | unsigned int lengthDoc = startPos + length; |
8e54aaed RD |
174 | //int sookedpos = 0; // these have no apparent use, see POD state |
175 | //char sooked[100]; | |
176 | //sooked[sookedpos] = '\0'; | |
65ec6247 | 177 | |
8e54aaed RD |
178 | // If in a long distance lexical state, seek to the beginning to find quote characters |
179 | // Perl strings can be multi-line with embedded newlines, so backtrack. | |
180 | // Perl numbers have additional state during lexing, so backtrack too. | |
65ec6247 RD |
181 | if (state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX) { |
182 | while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_HERE_DELIM)) { | |
183 | startPos--; | |
184 | } | |
185 | startPos = styler.LineStart(styler.GetLine(startPos)); | |
186 | state = styler.StyleAt(startPos - 1); | |
187 | } | |
188 | if ( state == SCE_PL_STRING_Q | |
189 | || state == SCE_PL_STRING_QQ | |
190 | || state == SCE_PL_STRING_QX | |
191 | || state == SCE_PL_STRING_QR | |
192 | || state == SCE_PL_STRING_QW | |
193 | || state == SCE_PL_REGEX | |
194 | || state == SCE_PL_REGSUBST | |
8e54aaed RD |
195 | || state == SCE_PL_STRING |
196 | || state == SCE_PL_BACKTICKS | |
197 | || state == SCE_PL_CHARACTER | |
198 | || state == SCE_PL_NUMBER | |
591d01be | 199 | || state == SCE_PL_IDENTIFIER |
1e9bafca | 200 | || state == SCE_PL_ERROR |
65ec6247 | 201 | ) { |
f6bcfd97 BP |
202 | while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) { |
203 | startPos--; | |
204 | } | |
205 | state = SCE_PL_DEFAULT; | |
206 | } | |
65ec6247 | 207 | |
1e9bafca RD |
208 | // lookback at start of lexing to set proper state for backflag |
209 | // after this, they are updated when elements are lexed | |
210 | int backflag = BACK_NONE; | |
211 | unsigned int backPos = startPos; | |
212 | if (backPos > 0) { | |
213 | backPos--; | |
214 | int sty = SCE_PL_DEFAULT; | |
215 | while ((backPos > 0) && (sty = styler.StyleAt(backPos), | |
216 | sty == SCE_PL_DEFAULT || sty == SCE_PL_COMMENTLINE)) | |
217 | backPos--; | |
218 | if (sty == SCE_PL_OPERATOR) | |
219 | backflag = BACK_OPERATOR; | |
220 | else if (sty == SCE_PL_WORD) | |
221 | backflag = BACK_KEYWORD; | |
222 | } | |
223 | ||
f6bcfd97 | 224 | styler.StartAt(startPos); |
d134f170 | 225 | char chPrev = styler.SafeGetCharAt(startPos - 1); |
65ec6247 RD |
226 | if (startPos == 0) |
227 | chPrev = '\n'; | |
f6bcfd97 BP |
228 | char chNext = styler[startPos]; |
229 | styler.StartSegment(startPos); | |
65ec6247 RD |
230 | |
231 | for (unsigned int i = startPos; i < lengthDoc; i++) { | |
f6bcfd97 | 232 | char ch = chNext; |
8e54aaed RD |
233 | // if the current character is not consumed due to the completion of an |
234 | // earlier style, lexing can be restarted via a simple goto | |
235 | restartLexer: | |
f6bcfd97 BP |
236 | chNext = styler.SafeGetCharAt(i + 1); |
237 | char chNext2 = styler.SafeGetCharAt(i + 2); | |
238 | ||
239 | if (styler.IsLeadByte(ch)) { | |
240 | chNext = styler.SafeGetCharAt(i + 2); | |
241 | chPrev = ' '; | |
242 | i += 1; | |
243 | continue; | |
244 | } | |
65ec6247 | 245 | if ((chPrev == '\r' && ch == '\n')) { // skip on DOS/Windows |
8e54aaed | 246 | styler.ColourTo(i, state); |
65ec6247 RD |
247 | chPrev = ch; |
248 | continue; | |
249 | } | |
250 | ||
251 | if (HereDoc.State == 1 && isEOLChar(ch)) { | |
252 | // Begin of here-doc (the line after the here-doc delimiter): | |
8e54aaed RD |
253 | // Lexically, the here-doc starts from the next line after the >>, but the |
254 | // first line of here-doc seem to follow the style of the last EOL sequence | |
65ec6247 | 255 | HereDoc.State = 2; |
65ec6247 RD |
256 | if (HereDoc.Quoted) { |
257 | if (state == SCE_PL_HERE_DELIM) { | |
258 | // Missing quote at end of string! We are stricter than perl. | |
8e54aaed | 259 | // Colour here-doc anyway while marking this bit as an error. |
65ec6247 | 260 | state = SCE_PL_ERROR; |
8e54aaed RD |
261 | } |
262 | styler.ColourTo(i - 1, state); | |
263 | switch (HereDoc.Quote) { | |
264 | case '\'': | |
265 | state = SCE_PL_HERE_Q ; | |
266 | break; | |
267 | case '"': | |
268 | state = SCE_PL_HERE_QQ; | |
269 | break; | |
270 | case '`': | |
271 | state = SCE_PL_HERE_QX; | |
272 | break; | |
65ec6247 RD |
273 | } |
274 | } else { | |
8e54aaed | 275 | styler.ColourTo(i - 1, state); |
65ec6247 RD |
276 | switch (HereDoc.Quote) { |
277 | case '\\': | |
278 | state = SCE_PL_HERE_Q ; | |
279 | break; | |
280 | default : | |
281 | state = SCE_PL_HERE_QQ; | |
282 | } | |
283 | } | |
284 | } | |
f6bcfd97 BP |
285 | |
286 | if (state == SCE_PL_DEFAULT) { | |
8e54aaed RD |
287 | if (isdigit(ch) || (isdigit(chNext) && |
288 | (ch == '.' || ch == 'v'))) { | |
289 | state = SCE_PL_NUMBER; | |
1e9bafca | 290 | backflag = BACK_NONE; |
8e54aaed RD |
291 | numState = PERLNUM_DECIMAL; |
292 | dotCount = 0; | |
293 | if (ch == '0') { // hex,bin,octal | |
1e9bafca RD |
294 | if (chNext == 'x') { |
295 | numState = PERLNUM_HEX; | |
296 | } else if (chNext == 'b') { | |
297 | numState = PERLNUM_BINARY; | |
298 | } else if (isdigit(chNext)) { | |
299 | numState = PERLNUM_OCTAL; | |
300 | } | |
301 | if (numState != PERLNUM_DECIMAL) { | |
302 | i++; | |
303 | ch = chNext; | |
304 | chNext = chNext2; | |
305 | } | |
8e54aaed RD |
306 | } else if (ch == 'v') { // vector |
307 | numState = PERLNUM_V_VECTOR; | |
308 | } | |
309 | } else if (iswordstart(ch)) { | |
1e9bafca RD |
310 | // if immediately prefixed by '::', always a bareword |
311 | state = SCE_PL_WORD; | |
312 | if (chPrev == ':' && styler.SafeGetCharAt(i - 2) == ':') { | |
313 | state = SCE_PL_IDENTIFIER; | |
314 | } | |
315 | unsigned int kw = i + 1; | |
316 | // first check for possible quote-like delimiter | |
317 | if (ch == 's' && !isNonQuote(chNext)) { | |
f6bcfd97 | 318 | state = SCE_PL_REGSUBST; |
65ec6247 | 319 | Quote.New(2); |
8e54aaed | 320 | } else if (ch == 'm' && !isNonQuote(chNext)) { |
f6bcfd97 | 321 | state = SCE_PL_REGEX; |
65ec6247 | 322 | Quote.New(1); |
8e54aaed | 323 | } else if (ch == 'q' && !isNonQuote(chNext)) { |
65ec6247 RD |
324 | state = SCE_PL_STRING_Q; |
325 | Quote.New(1); | |
8e54aaed | 326 | } else if (ch == 'y' && !isNonQuote(chNext)) { |
65ec6247 RD |
327 | state = SCE_PL_REGSUBST; |
328 | Quote.New(2); | |
8e54aaed | 329 | } else if (ch == 't' && chNext == 'r' && !isNonQuote(chNext2)) { |
f6bcfd97 | 330 | state = SCE_PL_REGSUBST; |
65ec6247 | 331 | Quote.New(2); |
1e9bafca | 332 | kw++; |
8e54aaed | 333 | } else if (ch == 'q' && (chNext == 'q' || chNext == 'r' || chNext == 'w' || chNext == 'x') && !isNonQuote(chNext2)) { |
65ec6247 RD |
334 | if (chNext == 'q') state = SCE_PL_STRING_QQ; |
335 | else if (chNext == 'x') state = SCE_PL_STRING_QX; | |
336 | else if (chNext == 'r') state = SCE_PL_STRING_QR; | |
337 | else if (chNext == 'w') state = SCE_PL_STRING_QW; | |
65ec6247 | 338 | Quote.New(1); |
1e9bafca | 339 | kw++; |
8e54aaed | 340 | } else if (ch == 'x' && (chNext == '=' || // repetition |
1e9bafca RD |
341 | (chNext != '_' && !isalnum(chNext)) || |
342 | (isdigit(chPrev) && isdigit(chNext)))) { | |
343 | state = SCE_PL_OPERATOR; | |
344 | } | |
345 | // if potentially a keyword, scan forward and grab word, then check | |
346 | // if it's really one; if yes, disambiguation test is performed | |
347 | // otherwise it is always a bareword and we skip a lot of scanning | |
348 | // note: keywords assumed to be limited to [_a-zA-Z] only | |
349 | if (state == SCE_PL_WORD) { | |
350 | while (iswordstart(styler.SafeGetCharAt(kw))) kw++; | |
351 | if (!isPerlKeyword(styler.GetStartSegment(), kw, keywords, styler)) { | |
352 | state = SCE_PL_IDENTIFIER; | |
353 | } | |
354 | } | |
355 | // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this | |
356 | // for quote-like delimiters/keywords, attempt to disambiguate | |
357 | // to select for bareword, change state -> SCE_PL_IDENTIFIER | |
358 | if (state != SCE_PL_IDENTIFIER && i > 0) { | |
359 | unsigned int j = i; | |
360 | bool moreback = false; // true if passed newline/comments | |
361 | bool brace = false; // true if opening brace found | |
362 | char ch2; | |
363 | // first look backwards past whitespace/comments for EOLs | |
364 | // if BACK_NONE, neither operator nor keyword, so skip test | |
365 | if (backflag != BACK_NONE) { | |
366 | while (--j > backPos) { | |
367 | if (isEOLChar(styler.SafeGetCharAt(j))) | |
368 | moreback = true; | |
369 | } | |
370 | ch2 = styler.SafeGetCharAt(j); | |
371 | if (ch2 == '{' && !moreback) { | |
372 | // {bareword: possible variable spec | |
373 | brace = true; | |
374 | } else if ((ch2 == '&') | |
375 | // &bareword: subroutine call | |
376 | || (ch2 == '>' && styler.SafeGetCharAt(j - 1) == '-') | |
377 | // ->bareword: part of variable spec | |
378 | || (ch2 == 'b' && styler.Match(j - 2, "su"))) { | |
379 | // sub bareword: subroutine declaration | |
380 | // (implied BACK_KEYWORD, no keywords end in 'sub'!) | |
381 | state = SCE_PL_IDENTIFIER; | |
382 | } | |
383 | // if status still ambiguous, look forward after word past | |
384 | // tabs/spaces only; if ch2 isn't one of '[{(,' it can never | |
385 | // match anything, so skip the whole thing | |
386 | j = kw; | |
387 | if (state != SCE_PL_IDENTIFIER | |
388 | && (ch2 == '{' || ch2 == '(' || ch2 == '['|| ch2 == ',') | |
389 | && kw < lengthDoc) { | |
390 | while (ch2 = styler.SafeGetCharAt(j), | |
391 | (ch2 == ' ' || ch2 == '\t') && j < lengthDoc) { | |
392 | j++; | |
393 | } | |
394 | if ((ch2 == '}' && brace) | |
395 | // {bareword}: variable spec | |
396 | || (ch2 == '=' && styler.SafeGetCharAt(j + 1) == '>')) { | |
397 | // [{(, bareword=>: hash literal | |
398 | state = SCE_PL_IDENTIFIER; | |
399 | } | |
400 | } | |
401 | } | |
402 | } | |
403 | backflag = BACK_NONE; | |
404 | // an identifier or bareword | |
405 | if (state == SCE_PL_IDENTIFIER) { | |
406 | if ((!iswordchar(chNext) && chNext != '\'') | |
407 | || (chNext == '.' && chNext2 == '.')) { | |
408 | // We need that if length of word == 1! | |
409 | // This test is copied from the SCE_PL_WORD handler. | |
410 | styler.ColourTo(i, SCE_PL_IDENTIFIER); | |
411 | state = SCE_PL_DEFAULT; | |
412 | } | |
413 | // a keyword | |
414 | } else if (state == SCE_PL_WORD) { | |
415 | i = kw - 1; | |
416 | if (ch == '_' && chNext == '_' && | |
417 | (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__DATA__") | |
418 | || isMatch(styler, lengthDoc, styler.GetStartSegment(), "__END__"))) { | |
419 | styler.ColourTo(i, SCE_PL_DATASECTION); | |
420 | state = SCE_PL_DATASECTION; | |
421 | } else { | |
422 | styler.ColourTo(i, SCE_PL_WORD); | |
423 | state = SCE_PL_DEFAULT; | |
424 | backflag = BACK_KEYWORD; | |
425 | backPos = i; | |
426 | } | |
427 | ch = styler.SafeGetCharAt(i); | |
428 | chNext = styler.SafeGetCharAt(i + 1); | |
429 | // a repetition operator 'x' | |
430 | } else if (state == SCE_PL_OPERATOR) { | |
431 | styler.ColourTo(i, SCE_PL_OPERATOR); | |
432 | state = SCE_PL_DEFAULT; | |
433 | // quote-like delimiter, skip one char if double-char delimiter | |
434 | } else { | |
435 | i = kw - 1; | |
436 | chNext = styler.SafeGetCharAt(i + 1); | |
437 | } | |
f6bcfd97 | 438 | } else if (ch == '#') { |
f6bcfd97 BP |
439 | state = SCE_PL_COMMENTLINE; |
440 | } else if (ch == '\"') { | |
f6bcfd97 | 441 | state = SCE_PL_STRING; |
65ec6247 RD |
442 | Quote.New(1); |
443 | Quote.Open(ch); | |
1e9bafca | 444 | backflag = BACK_NONE; |
f6bcfd97 BP |
445 | } else if (ch == '\'') { |
446 | if (chPrev == '&') { | |
447 | // Archaic call | |
448 | styler.ColourTo(i, state); | |
449 | } else { | |
f6bcfd97 | 450 | state = SCE_PL_CHARACTER; |
65ec6247 RD |
451 | Quote.New(1); |
452 | Quote.Open(ch); | |
f6bcfd97 | 453 | } |
1e9bafca | 454 | backflag = BACK_NONE; |
f6bcfd97 | 455 | } else if (ch == '`') { |
f6bcfd97 | 456 | state = SCE_PL_BACKTICKS; |
65ec6247 RD |
457 | Quote.New(1); |
458 | Quote.Open(ch); | |
1e9bafca | 459 | backflag = BACK_NONE; |
f6bcfd97 | 460 | } else if (ch == '$') { |
65ec6247 | 461 | if ((chNext == '{') || isspacechar(chNext)) { |
f6bcfd97 | 462 | styler.ColourTo(i, SCE_PL_SCALAR); |
f6bcfd97 | 463 | } else { |
65ec6247 | 464 | state = SCE_PL_SCALAR; |
8e54aaed RD |
465 | if (chNext == '`' && chNext2 == '`') { |
466 | i += 2; | |
467 | ch = styler.SafeGetCharAt(i); | |
468 | chNext = styler.SafeGetCharAt(i + 1); | |
469 | } else { | |
470 | i++; | |
471 | ch = chNext; | |
472 | chNext = chNext2; | |
473 | } | |
f6bcfd97 | 474 | } |
1e9bafca | 475 | backflag = BACK_NONE; |
f6bcfd97 | 476 | } else if (ch == '@') { |
8e54aaed | 477 | if (isalpha(chNext) || chNext == '#' || chNext == '$' |
1e9bafca | 478 | || chNext == '_' || chNext == '+' || chNext == '-') { |
f6bcfd97 BP |
479 | state = SCE_PL_ARRAY; |
480 | } else if (chNext != '{' && chNext != '[') { | |
481 | styler.ColourTo(i, SCE_PL_ARRAY); | |
f6bcfd97 BP |
482 | } else { |
483 | styler.ColourTo(i, SCE_PL_ARRAY); | |
484 | } | |
1e9bafca | 485 | backflag = BACK_NONE; |
f6bcfd97 | 486 | } else if (ch == '%') { |
1e9bafca RD |
487 | if (isalpha(chNext) || chNext == '#' || chNext == '$' |
488 | || chNext == '_' || chNext == '!' || chNext == '^') { | |
f6bcfd97 | 489 | state = SCE_PL_HASH; |
1e9bafca RD |
490 | i++; |
491 | ch = chNext; | |
492 | chNext = chNext2; | |
65ec6247 | 493 | } else if (chNext == '{') { |
f6bcfd97 | 494 | styler.ColourTo(i, SCE_PL_HASH); |
f6bcfd97 | 495 | } else { |
65ec6247 | 496 | styler.ColourTo(i, SCE_PL_OPERATOR); |
f6bcfd97 | 497 | } |
1e9bafca | 498 | backflag = BACK_NONE; |
f6bcfd97 | 499 | } else if (ch == '*') { |
1e9bafca RD |
500 | char strch[2]; |
501 | strch[0] = chNext; | |
502 | strch[1] = '\0'; | |
503 | if (isalpha(chNext) || chNext == '_' || | |
504 | NULL != strstr("^/|,\\\";#%^:?<>)[]", strch)) { | |
8e54aaed | 505 | state = SCE_PL_SYMBOLTABLE; |
1e9bafca RD |
506 | i++; |
507 | ch = chNext; | |
508 | chNext = chNext2; | |
509 | } else if (chNext == '{') { | |
510 | styler.ColourTo(i, SCE_PL_SYMBOLTABLE); | |
8e54aaed RD |
511 | } else { |
512 | if (chNext == '*') { // exponentiation | |
513 | i++; | |
514 | ch = chNext; | |
515 | chNext = chNext2; | |
516 | } | |
517 | styler.ColourTo(i, SCE_PL_OPERATOR); | |
518 | } | |
1e9bafca RD |
519 | backflag = BACK_NONE; |
520 | } else if (ch == '/' || (ch == '<' && chNext == '<')) { | |
8e54aaed RD |
521 | // Explicit backward peeking to set a consistent preferRE for |
522 | // any slash found, so no longer need to track preferRE state. | |
523 | // Find first previous significant lexed element and interpret. | |
1e9bafca RD |
524 | // Test for HERE doc start '<<' shares this code, helps to |
525 | // determine if it should be an operator. | |
8e54aaed | 526 | bool preferRE = false; |
1e9bafca RD |
527 | bool isHereDoc = (ch == '<'); |
528 | bool hereDocSpace = false; // these are for corner case: | |
529 | bool hereDocScalar = false; // SCALAR [whitespace] '<<' | |
8e54aaed RD |
530 | unsigned int bk = (i > 0)? i - 1: 0; |
531 | char bkch; | |
532 | styler.Flush(); | |
1e9bafca RD |
533 | if (styler.StyleAt(bk) == SCE_PL_DEFAULT) |
534 | hereDocSpace = true; | |
8e54aaed RD |
535 | while ((bk > 0) && (styler.StyleAt(bk) == SCE_PL_DEFAULT || |
536 | styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) { | |
537 | bk--; | |
538 | } | |
539 | if (bk == 0) { | |
591d01be RD |
540 | // position 0 won't really be checked; rarely happens |
541 | // hard to fix due to an unsigned index i | |
8e54aaed RD |
542 | preferRE = true; |
543 | } else { | |
544 | int bkstyle = styler.StyleAt(bk); | |
591d01be | 545 | bkch = styler.SafeGetCharAt(bk); |
8e54aaed RD |
546 | switch(bkstyle) { |
547 | case SCE_PL_OPERATOR: | |
548 | preferRE = true; | |
8e54aaed RD |
549 | if (bkch == ')' || bkch == ']') { |
550 | preferRE = false; | |
551 | } else if (bkch == '}') { | |
552 | // backtrack further, count balanced brace pairs | |
553 | // if a brace pair found, see if it's a variable | |
554 | int braceCount = 1; | |
555 | while (--bk > 0) { | |
556 | bkstyle = styler.StyleAt(bk); | |
557 | if (bkstyle == SCE_PL_OPERATOR) { | |
558 | bkch = styler.SafeGetCharAt(bk); | |
591d01be RD |
559 | if (bkch == ';') { // early out |
560 | break; | |
561 | } else if (bkch == '}') { | |
8e54aaed RD |
562 | braceCount++; |
563 | } else if (bkch == '{') { | |
564 | if (--braceCount == 0) | |
565 | break; | |
566 | } | |
567 | } | |
568 | } | |
569 | if (bk == 0) { | |
570 | // at beginning, true | |
571 | } else if (braceCount == 0) { | |
591d01be RD |
572 | // balanced { found, bk>0, skip more whitespace |
573 | if (styler.StyleAt(--bk) == SCE_PL_DEFAULT) { | |
574 | while (bk > 0) { | |
575 | bkstyle = styler.StyleAt(--bk); | |
576 | if (bkstyle != SCE_PL_DEFAULT) | |
577 | break; | |
578 | } | |
579 | } | |
580 | bkstyle = styler.StyleAt(bk); | |
8e54aaed RD |
581 | if (bkstyle == SCE_PL_SCALAR |
582 | || bkstyle == SCE_PL_ARRAY | |
583 | || bkstyle == SCE_PL_HASH | |
591d01be RD |
584 | || bkstyle == SCE_PL_SYMBOLTABLE |
585 | || bkstyle == SCE_PL_OPERATOR) { | |
8e54aaed RD |
586 | preferRE = false; |
587 | } | |
588 | } | |
589 | } | |
590 | break; | |
8e54aaed | 591 | case SCE_PL_IDENTIFIER: |
591d01be RD |
592 | preferRE = true; |
593 | if (bkch == '>') { // inputsymbol | |
594 | preferRE = false; | |
595 | break; | |
596 | } | |
597 | // backtrack to find "->" or "::" before identifier | |
598 | while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) { | |
599 | bk--; | |
600 | } | |
601 | while (bk > 0) { | |
602 | bkstyle = styler.StyleAt(bk); | |
603 | if (bkstyle == SCE_PL_DEFAULT || | |
604 | bkstyle == SCE_PL_COMMENTLINE) { | |
605 | } else if (bkstyle == SCE_PL_OPERATOR) { | |
606 | // gcc 3.2.3 bloats if more compact form used | |
607 | bkch = styler.SafeGetCharAt(bk); | |
608 | if (bkch == '>') { // "->" | |
609 | if (styler.SafeGetCharAt(bk - 1) == '-') { | |
610 | preferRE = false; | |
611 | break; | |
612 | } | |
613 | } else if (bkch == ':') { // "::" | |
614 | if (styler.SafeGetCharAt(bk - 1) == ':') { | |
615 | preferRE = false; | |
616 | break; | |
617 | } | |
618 | } | |
619 | } else {// bare identifier, usually a function call but Perl | |
620 | // optimizes them as pseudo-constants, then the next | |
621 | // '/' will be a divide; favour divide over regex | |
622 | // if there is a whitespace after the '/' | |
623 | if (isspacechar(chNext)) { | |
624 | preferRE = false; | |
625 | } | |
626 | break; | |
627 | } | |
628 | bk--; | |
629 | } | |
630 | break; | |
1e9bafca RD |
631 | case SCE_PL_SCALAR: // for $var<< case |
632 | hereDocScalar = true; | |
633 | break; | |
591d01be | 634 | // other styles uses the default, preferRE=false |
8e54aaed | 635 | case SCE_PL_WORD: |
591d01be | 636 | case SCE_PL_POD: |
1e9bafca | 637 | case SCE_PL_POD_VERB: |
8e54aaed RD |
638 | case SCE_PL_HERE_Q: |
639 | case SCE_PL_HERE_QQ: | |
640 | case SCE_PL_HERE_QX: | |
641 | preferRE = true; | |
642 | break; | |
643 | } | |
644 | } | |
1e9bafca RD |
645 | if (isHereDoc) { // handle HERE doc |
646 | // if SCALAR whitespace '<<', *always* a HERE doc | |
647 | if (preferRE || (hereDocSpace && hereDocScalar)) { | |
648 | state = SCE_PL_HERE_DELIM; | |
649 | HereDoc.State = 0; | |
650 | } else { // << operator | |
651 | i++; | |
652 | ch = chNext; | |
653 | chNext = chNext2; | |
654 | styler.ColourTo(i, SCE_PL_OPERATOR); | |
655 | } | |
656 | } else { // handle regexp | |
657 | if (preferRE) { | |
658 | state = SCE_PL_REGEX; | |
659 | Quote.New(1); | |
660 | Quote.Open(ch); | |
661 | } else { // / operator | |
662 | styler.ColourTo(i, SCE_PL_OPERATOR); | |
663 | } | |
664 | } | |
665 | backflag = BACK_NONE; | |
591d01be RD |
666 | } else if (ch == '<') { |
667 | // looks forward for matching > on same line | |
668 | unsigned int fw = i + 1; | |
669 | while (fw < lengthDoc) { | |
670 | char fwch = styler.SafeGetCharAt(fw); | |
1e9bafca RD |
671 | if (fwch == ' ') { |
672 | if (styler.SafeGetCharAt(fw-1) != '\\' || | |
673 | styler.SafeGetCharAt(fw-2) != '\\') | |
674 | break; | |
675 | } else if (isEOLChar(fwch) || isspacechar(fwch)) { | |
591d01be | 676 | break; |
1e9bafca | 677 | } else if (fwch == '>') { |
591d01be RD |
678 | if ((fw - i) == 2 && // '<=>' case |
679 | styler.SafeGetCharAt(fw-1) == '=') { | |
680 | styler.ColourTo(fw, SCE_PL_OPERATOR); | |
681 | } else { | |
682 | styler.ColourTo(fw, SCE_PL_IDENTIFIER); | |
683 | } | |
684 | i = fw; | |
685 | ch = fwch; | |
686 | chNext = styler.SafeGetCharAt(i+1); | |
687 | } | |
688 | fw++; | |
689 | } | |
690 | styler.ColourTo(i, SCE_PL_OPERATOR); | |
1e9bafca | 691 | backflag = BACK_NONE; |
8e54aaed | 692 | } else if (ch == '=' // POD |
65ec6247 RD |
693 | && isalpha(chNext) |
694 | && (isEOLChar(chPrev))) { | |
f6bcfd97 | 695 | state = SCE_PL_POD; |
1e9bafca | 696 | backflag = BACK_NONE; |
8e54aaed RD |
697 | //sookedpos = 0; |
698 | //sooked[sookedpos] = '\0'; | |
699 | } else if (ch == '-' // file test operators | |
65ec6247 RD |
700 | && isSingleCharOp(chNext) |
701 | && !isalnum((chNext2 = styler.SafeGetCharAt(i+2)))) { | |
65ec6247 RD |
702 | styler.ColourTo(i + 1, SCE_PL_WORD); |
703 | state = SCE_PL_DEFAULT; | |
8e54aaed RD |
704 | i++; |
705 | ch = chNext; | |
706 | chNext = chNext2; | |
1e9bafca | 707 | backflag = BACK_NONE; |
f6bcfd97 | 708 | } else if (isPerlOperator(ch)) { |
8e54aaed RD |
709 | if (ch == '.' && chNext == '.') { // .. and ... |
710 | i++; | |
711 | if (chNext2 == '.') { i++; } | |
712 | state = SCE_PL_DEFAULT; | |
713 | ch = styler.SafeGetCharAt(i); | |
714 | chNext = styler.SafeGetCharAt(i + 1); | |
715 | } | |
f6bcfd97 | 716 | styler.ColourTo(i, SCE_PL_OPERATOR); |
1e9bafca RD |
717 | backflag = BACK_OPERATOR; |
718 | backPos = i; | |
8e54aaed RD |
719 | } else { |
720 | // keep colouring defaults to make restart easier | |
721 | styler.ColourTo(i, SCE_PL_DEFAULT); | |
722 | } | |
723 | } else if (state == SCE_PL_NUMBER) { | |
724 | if (ch == '.') { | |
725 | if (chNext == '.') { | |
726 | // double dot is always an operator | |
727 | goto numAtEnd; | |
1e9bafca | 728 | } else if (numState <= PERLNUM_FLOAT) { |
8e54aaed RD |
729 | // non-decimal number or float exponent, consume next dot |
730 | styler.ColourTo(i - 1, SCE_PL_NUMBER); | |
731 | styler.ColourTo(i, SCE_PL_OPERATOR); | |
732 | state = SCE_PL_DEFAULT; | |
733 | } else { // decimal or vectors allows dots | |
734 | dotCount++; | |
735 | if (numState == PERLNUM_DECIMAL) { | |
736 | if (dotCount > 1) { | |
737 | if (isdigit(chNext)) { // really a vector | |
738 | numState = PERLNUM_VECTOR; | |
739 | } else // number then dot | |
740 | goto numAtEnd; | |
741 | } | |
742 | } else { // vectors | |
743 | if (!isdigit(chNext)) // vector then dot | |
744 | goto numAtEnd; | |
745 | } | |
746 | } | |
747 | } else if (ch == '_' && numState == PERLNUM_DECIMAL) { | |
748 | if (!isdigit(chNext)) { | |
749 | goto numAtEnd; | |
750 | } | |
751 | } else if (isalnum(ch)) { | |
752 | if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) { | |
753 | if (isalpha(ch)) { | |
754 | if (dotCount == 0) { // change to word | |
1e9bafca | 755 | state = SCE_PL_IDENTIFIER; |
8e54aaed RD |
756 | } else { // vector then word |
757 | goto numAtEnd; | |
758 | } | |
759 | } | |
760 | } else if (numState == PERLNUM_DECIMAL) { | |
761 | if (ch == 'E' || ch == 'e') { // exponent | |
762 | numState = PERLNUM_FLOAT; | |
763 | if (chNext == '+' || chNext == '-') { | |
764 | i++; | |
765 | ch = chNext; | |
766 | chNext = chNext2; | |
767 | } | |
768 | } else if (!isdigit(ch)) { // number then word | |
769 | goto numAtEnd; | |
770 | } | |
771 | } else if (numState == PERLNUM_FLOAT) { | |
772 | if (!isdigit(ch)) { // float then word | |
773 | goto numAtEnd; | |
774 | } | |
1e9bafca RD |
775 | } else if (numState == PERLNUM_OCTAL) { |
776 | if (!isdigit(ch)) | |
777 | goto numAtEnd; | |
778 | else if (ch > '7') | |
779 | numState = PERLNUM_BAD; | |
780 | } else if (numState == PERLNUM_BINARY) { | |
781 | if (!isdigit(ch)) | |
782 | goto numAtEnd; | |
783 | else if (ch > '1') | |
784 | numState = PERLNUM_BAD; | |
785 | } else if (numState == PERLNUM_HEX) { | |
786 | int ch2 = toupper(ch); | |
787 | if (!isdigit(ch) && !(ch2 >= 'A' && ch2 <= 'F')) | |
788 | goto numAtEnd; | |
789 | } else {//(numState == PERLNUM_BAD) { | |
790 | if (!isdigit(ch)) | |
791 | goto numAtEnd; | |
792 | } | |
8e54aaed RD |
793 | } else { |
794 | // complete current number or vector | |
795 | numAtEnd: | |
796 | styler.ColourTo(i - 1, actualNumStyle(numState)); | |
797 | state = SCE_PL_DEFAULT; | |
798 | goto restartLexer; | |
f6bcfd97 | 799 | } |
591d01be | 800 | } else if (state == SCE_PL_IDENTIFIER) { |
1e9bafca | 801 | if (!iswordstart(chNext) && chNext != '\'') { |
591d01be RD |
802 | styler.ColourTo(i, SCE_PL_IDENTIFIER); |
803 | state = SCE_PL_DEFAULT; | |
804 | ch = ' '; | |
805 | } | |
f6bcfd97 BP |
806 | } else { |
807 | if (state == SCE_PL_COMMENTLINE) { | |
65ec6247 | 808 | if (isEOLChar(ch)) { |
f6bcfd97 BP |
809 | styler.ColourTo(i - 1, state); |
810 | state = SCE_PL_DEFAULT; | |
8e54aaed RD |
811 | goto restartLexer; |
812 | } else if (isEOLChar(chNext)) { | |
813 | styler.ColourTo(i, state); | |
814 | state = SCE_PL_DEFAULT; | |
f6bcfd97 | 815 | } |
65ec6247 RD |
816 | } else if (state == SCE_PL_HERE_DELIM) { |
817 | // | |
818 | // From perldata.pod: | |
819 | // ------------------ | |
820 | // A line-oriented form of quoting is based on the shell ``here-doc'' | |
821 | // syntax. | |
822 | // Following a << you specify a string to terminate the quoted material, | |
823 | // and all lines following the current line down to the terminating | |
824 | // string are the value of the item. | |
825 | // The terminating string may be either an identifier (a word), | |
826 | // or some quoted text. | |
827 | // If quoted, the type of quotes you use determines the treatment of | |
828 | // the text, just as in regular quoting. | |
829 | // An unquoted identifier works like double quotes. | |
830 | // There must be no space between the << and the identifier. | |
831 | // (If you put a space it will be treated as a null identifier, | |
832 | // which is valid, and matches the first empty line.) | |
8e54aaed | 833 | // (This is deprecated, -w warns of this syntax) |
65ec6247 RD |
834 | // The terminating string must appear by itself (unquoted and with no |
835 | // surrounding whitespace) on the terminating line. | |
836 | // | |
8e54aaed RD |
837 | // From Bash info: |
838 | // --------------- | |
839 | // Specifier format is: <<[-]WORD | |
840 | // Optional '-' is for removal of leading tabs from here-doc. | |
841 | // Whitespace acceptable after <<[-] operator. | |
842 | // | |
65ec6247 | 843 | if (HereDoc.State == 0) { // '<<' encountered |
1e9bafca RD |
844 | bool gotspace = false; |
845 | unsigned int oldi = i; | |
846 | if (chNext == ' ' || chNext == '\t') { | |
847 | // skip whitespace; legal for quoted delimiters | |
848 | gotspace = true; | |
849 | do { | |
850 | i++; | |
851 | chNext = styler.SafeGetCharAt(i + 1); | |
852 | } while ((i + 1 < lengthDoc) && (chNext == ' ' || chNext == '\t')); | |
853 | chNext2 = styler.SafeGetCharAt(i + 2); | |
854 | } | |
65ec6247 RD |
855 | HereDoc.State = 1; |
856 | HereDoc.Quote = chNext; | |
857 | HereDoc.Quoted = false; | |
858 | HereDoc.DelimiterLength = 0; | |
859 | HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; | |
1e9bafca RD |
860 | if (chNext == '\'' || chNext == '"' || chNext == '`') { |
861 | // a quoted here-doc delimiter | |
f6bcfd97 BP |
862 | i++; |
863 | ch = chNext; | |
65ec6247 RD |
864 | chNext = chNext2; |
865 | HereDoc.Quoted = true; | |
8e54aaed | 866 | } else if (isspacechar(chNext) || isdigit(chNext) || chNext == '\\' |
1e9bafca RD |
867 | || chNext == '=' || chNext == '$' || chNext == '@' |
868 | || ((isalpha(chNext) || chNext == '_') && gotspace)) { | |
8e54aaed | 869 | // left shift << or <<= operator cases |
1e9bafca RD |
870 | // restore position if operator |
871 | i = oldi; | |
8e54aaed RD |
872 | styler.ColourTo(i, SCE_PL_OPERATOR); |
873 | state = SCE_PL_DEFAULT; | |
874 | HereDoc.State = 0; | |
1e9bafca | 875 | goto restartLexer; |
8e54aaed | 876 | } else { |
1e9bafca RD |
877 | // an unquoted here-doc delimiter, no special handling |
878 | // (cannot be prefixed by spaces/tabs), or | |
8e54aaed | 879 | // symbols terminates; deprecated zero-length delimiter |
65ec6247 RD |
880 | } |
881 | ||
882 | } else if (HereDoc.State == 1) { // collect the delimiter | |
1e9bafca | 883 | backflag = BACK_NONE; |
65ec6247 RD |
884 | if (HereDoc.Quoted) { // a quoted here-doc delimiter |
885 | if (ch == HereDoc.Quote) { // closing quote => end of delimiter | |
886 | styler.ColourTo(i, state); | |
887 | state = SCE_PL_DEFAULT; | |
65ec6247 RD |
888 | } else { |
889 | if (ch == '\\' && chNext == HereDoc.Quote) { // escaped quote | |
890 | i++; | |
891 | ch = chNext; | |
892 | chNext = chNext2; | |
893 | } | |
894 | HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch; | |
895 | HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; | |
896 | } | |
897 | } else { // an unquoted here-doc delimiter | |
898 | if (isalnum(ch) || ch == '_') { | |
899 | HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch; | |
900 | HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; | |
901 | } else { | |
902 | styler.ColourTo(i - 1, state); | |
903 | state = SCE_PL_DEFAULT; | |
8e54aaed | 904 | goto restartLexer; |
65ec6247 RD |
905 | } |
906 | } | |
8e54aaed | 907 | if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { |
65ec6247 RD |
908 | styler.ColourTo(i - 1, state); |
909 | state = SCE_PL_ERROR; | |
8e54aaed | 910 | goto restartLexer; |
f6bcfd97 | 911 | } |
f6bcfd97 | 912 | } |
65ec6247 RD |
913 | } else if (HereDoc.State == 2) { |
914 | // state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX | |
915 | if (isEOLChar(chPrev) && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) { | |
916 | i += HereDoc.DelimiterLength; | |
8e54aaed RD |
917 | chPrev = styler.SafeGetCharAt(i - 1); |
918 | ch = styler.SafeGetCharAt(i); | |
919 | if (isEOLChar(ch)) { | |
65ec6247 RD |
920 | styler.ColourTo(i - 1, state); |
921 | state = SCE_PL_DEFAULT; | |
1e9bafca | 922 | backflag = BACK_NONE; |
65ec6247 | 923 | HereDoc.State = 0; |
8e54aaed | 924 | goto restartLexer; |
65ec6247 | 925 | } |
f6bcfd97 BP |
926 | chNext = styler.SafeGetCharAt(i + 1); |
927 | } | |
1e9bafca RD |
928 | } else if (state == SCE_PL_POD |
929 | || state == SCE_PL_POD_VERB) { | |
930 | if (isEOLChar(chPrev)) { | |
931 | if (ch == ' ' || ch == '\t') { | |
932 | styler.ColourTo(i - 1, state); | |
933 | state = SCE_PL_POD_VERB; | |
934 | } else { | |
935 | styler.ColourTo(i - 1, state); | |
936 | state = SCE_PL_POD; | |
937 | if (ch == '=') { | |
938 | if (isMatch(styler, lengthDoc, i, "=cut")) { | |
939 | styler.ColourTo(i - 1 + 4, state); | |
940 | i += 4; | |
941 | state = SCE_PL_DEFAULT; | |
942 | ch = styler.SafeGetCharAt(i); | |
943 | //chNext = styler.SafeGetCharAt(i + 1); | |
944 | goto restartLexer; | |
945 | } | |
946 | } | |
f6bcfd97 BP |
947 | } |
948 | } | |
8e54aaed RD |
949 | } else if (state == SCE_PL_SCALAR // variable names |
950 | || state == SCE_PL_ARRAY | |
951 | || state == SCE_PL_HASH | |
952 | || state == SCE_PL_SYMBOLTABLE) { | |
953 | if (ch == ':' && chNext == ':') { // skip :: | |
954 | i++; | |
955 | ch = chNext; | |
956 | chNext = chNext2; | |
957 | } | |
958 | else if (isEndVar(ch)) { | |
1e9bafca | 959 | if (i == (styler.GetStartSegment() + 1)) { |
65ec6247 RD |
960 | // Special variable: $(, $_ etc. |
961 | styler.ColourTo(i, state); | |
8e54aaed | 962 | state = SCE_PL_DEFAULT; |
65ec6247 RD |
963 | } else { |
964 | styler.ColourTo(i - 1, state); | |
8e54aaed RD |
965 | state = SCE_PL_DEFAULT; |
966 | goto restartLexer; | |
65ec6247 | 967 | } |
f6bcfd97 | 968 | } |
65ec6247 RD |
969 | } else if (state == SCE_PL_REGEX |
970 | || state == SCE_PL_STRING_QR | |
971 | ) { | |
972 | if (!Quote.Up && !isspacechar(ch)) { | |
973 | Quote.Open(ch); | |
974 | } else if (ch == '\\' && Quote.Up != '\\') { | |
975 | // SG: Is it save to skip *every* escaped char? | |
976 | i++; | |
977 | ch = chNext; | |
978 | chNext = styler.SafeGetCharAt(i + 1); | |
f6bcfd97 | 979 | } else { |
65ec6247 RD |
980 | if (ch == Quote.Down /*&& chPrev != '\\'*/) { |
981 | Quote.Count--; | |
982 | if (Quote.Count == 0) { | |
983 | Quote.Rep--; | |
984 | if (Quote.Up == Quote.Down) { | |
985 | Quote.Count++; | |
f6bcfd97 BP |
986 | } |
987 | } | |
988 | if (!isalpha(chNext)) { | |
65ec6247 | 989 | if (Quote.Rep <= 0) { |
f6bcfd97 BP |
990 | styler.ColourTo(i, state); |
991 | state = SCE_PL_DEFAULT; | |
992 | ch = ' '; | |
993 | } | |
994 | } | |
65ec6247 RD |
995 | } else if (ch == Quote.Up /*&& chPrev != '\\'*/) { |
996 | Quote.Count++; | |
f6bcfd97 | 997 | } else if (!isalpha(chNext)) { |
65ec6247 | 998 | if (Quote.Rep <= 0) { |
f6bcfd97 BP |
999 | styler.ColourTo(i, state); |
1000 | state = SCE_PL_DEFAULT; | |
1001 | ch = ' '; | |
1002 | } | |
1003 | } | |
1004 | } | |
1005 | } else if (state == SCE_PL_REGSUBST) { | |
65ec6247 RD |
1006 | if (!Quote.Up && !isspacechar(ch)) { |
1007 | Quote.Open(ch); | |
1008 | } else if (ch == '\\' && Quote.Up != '\\') { | |
1009 | // SG: Is it save to skip *every* escaped char? | |
1010 | i++; | |
1011 | ch = chNext; | |
1012 | chNext = styler.SafeGetCharAt(i + 1); | |
f6bcfd97 | 1013 | } else { |
65ec6247 | 1014 | if (Quote.Count == 0 && Quote.Rep == 1) { |
d134f170 RD |
1015 | /* We matched something like s(...) or tr{...} |
1016 | * and are looking for the next matcher characters, | |
1017 | * which could be either bracketed ({...}) or non-bracketed | |
1018 | * (/.../). | |
1019 | * | |
1020 | * Number-signs are problematic. If they occur after | |
1021 | * the close of the first part, treat them like | |
65ec6247 | 1022 | * a Quote.Up char, even if they actually start comments. |
d134f170 RD |
1023 | * |
1024 | * If we find an alnum, we end the regsubst, and punt. | |
1025 | * | |
1026 | * Eric Promislow ericp@activestate.com Aug 9,2000 | |
1027 | */ | |
65ec6247 | 1028 | if (isspacechar(ch)) { |
d134f170 | 1029 | // Keep going |
65ec6247 RD |
1030 | } |
1031 | else if (isalnum(ch)) { | |
d134f170 RD |
1032 | styler.ColourTo(i, state); |
1033 | state = SCE_PL_DEFAULT; | |
1034 | ch = ' '; | |
1035 | } else { | |
65ec6247 | 1036 | Quote.Open(ch); |
d134f170 | 1037 | } |
65ec6247 RD |
1038 | } else if (ch == Quote.Down /*&& chPrev != '\\'*/) { |
1039 | Quote.Count--; | |
1040 | if (Quote.Count == 0) { | |
1041 | Quote.Rep--; | |
f6bcfd97 BP |
1042 | } |
1043 | if (!isalpha(chNext)) { | |
65ec6247 | 1044 | if (Quote.Rep <= 0) { |
f6bcfd97 BP |
1045 | styler.ColourTo(i, state); |
1046 | state = SCE_PL_DEFAULT; | |
1047 | ch = ' '; | |
1048 | } | |
1049 | } | |
65ec6247 RD |
1050 | if (Quote.Up == Quote.Down) { |
1051 | Quote.Count++; | |
f6bcfd97 | 1052 | } |
65ec6247 RD |
1053 | } else if (ch == Quote.Up /*&& chPrev != '\\'*/) { |
1054 | Quote.Count++; | |
f6bcfd97 | 1055 | } else if (!isalpha(chNext)) { |
65ec6247 | 1056 | if (Quote.Rep <= 0) { |
f6bcfd97 BP |
1057 | styler.ColourTo(i, state); |
1058 | state = SCE_PL_DEFAULT; | |
1059 | ch = ' '; | |
1060 | } | |
1061 | } | |
1062 | } | |
65ec6247 RD |
1063 | } else if (state == SCE_PL_STRING_Q |
1064 | || state == SCE_PL_STRING_QQ | |
1065 | || state == SCE_PL_STRING_QX | |
1066 | || state == SCE_PL_STRING_QW | |
1067 | || state == SCE_PL_STRING | |
1068 | || state == SCE_PL_CHARACTER | |
1069 | || state == SCE_PL_BACKTICKS | |
1070 | ) { | |
1071 | if (!Quote.Down && !isspacechar(ch)) { | |
1072 | Quote.Open(ch); | |
1073 | } else if (ch == '\\' && Quote.Up != '\\') { | |
1074 | i++; | |
1075 | ch = chNext; | |
1076 | chNext = styler.SafeGetCharAt(i + 1); | |
1077 | } else if (ch == Quote.Down) { | |
1078 | Quote.Count--; | |
1079 | if (Quote.Count == 0) { | |
1080 | Quote.Rep--; | |
1081 | if (Quote.Rep <= 0) { | |
f6bcfd97 BP |
1082 | styler.ColourTo(i, state); |
1083 | state = SCE_PL_DEFAULT; | |
1084 | ch = ' '; | |
1085 | } | |
65ec6247 RD |
1086 | if (Quote.Up == Quote.Down) { |
1087 | Quote.Count++; | |
f6bcfd97 BP |
1088 | } |
1089 | } | |
65ec6247 RD |
1090 | } else if (ch == Quote.Up) { |
1091 | Quote.Count++; | |
f6bcfd97 BP |
1092 | } |
1093 | } | |
f6bcfd97 | 1094 | } |
65ec6247 RD |
1095 | if (state == SCE_PL_ERROR) { |
1096 | break; | |
1097 | } | |
f6bcfd97 BP |
1098 | chPrev = ch; |
1099 | } | |
1a2fb4cd | 1100 | styler.ColourTo(lengthDoc - 1, state); |
f6bcfd97 BP |
1101 | } |
1102 | ||
1e9bafca RD |
1103 | static bool IsCommentLine(int line, Accessor &styler) { |
1104 | int pos = styler.LineStart(line); | |
1105 | int eol_pos = styler.LineStart(line + 1) - 1; | |
1106 | for (int i = pos; i < eol_pos; i++) { | |
1107 | char ch = styler[i]; | |
1108 | int style = styler.StyleAt(i); | |
1109 | if (ch == '#' && style == SCE_PL_COMMENTLINE) | |
1110 | return true; | |
1111 | else if (ch != ' ' && ch != '\t') | |
1112 | return false; | |
1113 | } | |
1114 | return false; | |
1115 | } | |
1116 | ||
9e730a78 RD |
1117 | static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[], |
1118 | Accessor &styler) { | |
1119 | bool foldComment = styler.GetPropertyInt("fold.comment") != 0; | |
1120 | bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0; | |
1e9bafca RD |
1121 | // Custom folding of POD and packages |
1122 | bool foldPOD = styler.GetPropertyInt("fold.perl.pod", 1) != 0; | |
1123 | bool foldPackage = styler.GetPropertyInt("fold.perl.package", 1) != 0; | |
9e730a78 RD |
1124 | unsigned int endPos = startPos + length; |
1125 | int visibleChars = 0; | |
1126 | int lineCurrent = styler.GetLine(startPos); | |
1e9bafca RD |
1127 | int levelPrev = SC_FOLDLEVELBASE; |
1128 | if (lineCurrent > 0) | |
1129 | levelPrev = styler.LevelAt(lineCurrent - 1) >> 16; | |
9e730a78 RD |
1130 | int levelCurrent = levelPrev; |
1131 | char chNext = styler[startPos]; | |
1e9bafca | 1132 | char chPrev = styler.SafeGetCharAt(startPos - 1); |
9e730a78 | 1133 | int styleNext = styler.StyleAt(startPos); |
1e9bafca RD |
1134 | // Used at end of line to determine if the line was a package definition |
1135 | bool isPackageLine = false; | |
1136 | bool isPodHeading = false; | |
9e730a78 RD |
1137 | for (unsigned int i = startPos; i < endPos; i++) { |
1138 | char ch = chNext; | |
1139 | chNext = styler.SafeGetCharAt(i + 1); | |
1140 | int style = styleNext; | |
1141 | styleNext = styler.StyleAt(i + 1); | |
1142 | bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); | |
1e9bafca RD |
1143 | bool atLineStart = isEOLChar(chPrev) || i == 0; |
1144 | // Comment folding | |
1145 | if (foldComment && atEOL && IsCommentLine(lineCurrent, styler)) | |
1146 | { | |
1147 | if (!IsCommentLine(lineCurrent - 1, styler) | |
1148 | && IsCommentLine(lineCurrent + 1, styler)) | |
1149 | levelCurrent++; | |
1150 | else if (IsCommentLine(lineCurrent - 1, styler) | |
1151 | && !IsCommentLine(lineCurrent+1, styler)) | |
1152 | levelCurrent--; | |
1153 | } | |
9e730a78 RD |
1154 | if (style == SCE_C_OPERATOR) { |
1155 | if (ch == '{') { | |
1156 | levelCurrent++; | |
1157 | } else if (ch == '}') { | |
1158 | levelCurrent--; | |
1159 | } | |
1160 | } | |
1e9bafca RD |
1161 | // Custom POD folding |
1162 | if (foldPOD && atLineStart) { | |
1163 | int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT; | |
1164 | if (style == SCE_PL_POD) { | |
1165 | if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB) | |
1166 | levelCurrent++; | |
1167 | else if (styler.Match(i, "=cut")) | |
1168 | levelCurrent--; | |
1169 | else if (styler.Match(i, "=head")) | |
1170 | isPodHeading = true; | |
1171 | } else if (style == SCE_PL_DATASECTION) { | |
1172 | if (ch == '=' && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE) | |
1173 | levelCurrent++; | |
1174 | else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE) | |
1175 | levelCurrent--; | |
1176 | else if (styler.Match(i, "=head")) | |
1177 | isPodHeading = true; | |
1178 | // if package used or unclosed brace, level > SC_FOLDLEVELBASE! | |
1179 | // reset needed as level test is vs. SC_FOLDLEVELBASE | |
1180 | else if (styler.Match(i, "__END__")) | |
1181 | levelCurrent = SC_FOLDLEVELBASE; | |
1182 | } | |
1183 | } | |
1184 | // Custom package folding | |
1185 | if (foldPackage && atLineStart) { | |
1186 | if (style == SCE_PL_WORD && styler.Match(i, "package")) { | |
1187 | isPackageLine = true; | |
1188 | } | |
1189 | } | |
1190 | ||
9e730a78 RD |
1191 | if (atEOL) { |
1192 | int lev = levelPrev; | |
1e9bafca RD |
1193 | if (isPodHeading) { |
1194 | lev = levelPrev - 1; | |
1195 | lev |= SC_FOLDLEVELHEADERFLAG; | |
1196 | isPodHeading = false; | |
1197 | } | |
1198 | // Check if line was a package declaration | |
1199 | // because packages need "special" treatment | |
1200 | if (isPackageLine) { | |
1201 | lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG; | |
1202 | levelCurrent = SC_FOLDLEVELBASE + 1; | |
1203 | isPackageLine = false; | |
1204 | } | |
1205 | lev |= levelCurrent << 16; | |
9e730a78 RD |
1206 | if (visibleChars == 0 && foldCompact) |
1207 | lev |= SC_FOLDLEVELWHITEFLAG; | |
1208 | if ((levelCurrent > levelPrev) && (visibleChars > 0)) | |
1209 | lev |= SC_FOLDLEVELHEADERFLAG; | |
1210 | if (lev != styler.LevelAt(lineCurrent)) { | |
1211 | styler.SetLevel(lineCurrent, lev); | |
1212 | } | |
1213 | lineCurrent++; | |
1214 | levelPrev = levelCurrent; | |
1215 | visibleChars = 0; | |
1216 | } | |
1217 | if (!isspacechar(ch)) | |
1218 | visibleChars++; | |
1e9bafca | 1219 | chPrev = ch; |
9e730a78 RD |
1220 | } |
1221 | // Fill in the real level of the next line, keeping the current flags as they will be filled in later | |
1222 | int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK; | |
1223 | styler.SetLevel(lineCurrent, levelPrev | flagsNext); | |
1224 | } | |
1225 | ||
a834585d | 1226 | static const char * const perlWordListDesc[] = { |
9e730a78 | 1227 | "Keywords", |
a834585d RD |
1228 | 0 |
1229 | }; | |
1230 | ||
9e730a78 | 1231 | LexerModule lmPerl(SCLEX_PERL, ColourisePerlDoc, "perl", FoldPerlDoc, perlWordListDesc); |
591d01be | 1232 |