]>
Commit | Line | Data |
---|---|---|
65ec6247 RD |
1 | // Scintilla source code edit control |
2 | /** @file LexPerl.cxx | |
9e96e16f | 3 | ** Lexer for Perl. |
1dcf666d | 4 | ** Converted to lexer object by "Udo Lechner" <dlchnr(at)gmx(dot)net> |
65ec6247 | 5 | **/ |
9e96e16f | 6 | // Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org> |
1e9bafca | 7 | // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my> |
f6bcfd97 BP |
8 | // The License.txt file describes the conditions under which this software may be distributed. |
9 | ||
65ec6247 RD |
10 | #include <stdlib.h> |
11 | #include <string.h> | |
65ec6247 RD |
12 | #include <stdio.h> |
13 | #include <stdarg.h> | |
1dcf666d RD |
14 | #include <assert.h> |
15 | #include <ctype.h> | |
f6bcfd97 | 16 | |
1dcf666d RD |
17 | #include <string> |
18 | #include <map> | |
f6bcfd97 | 19 | |
1dcf666d | 20 | #include "ILexer.h" |
f6bcfd97 BP |
21 | #include "Scintilla.h" |
22 | #include "SciLexer.h" | |
1dcf666d RD |
23 | |
24 | #include "WordList.h" | |
25 | #include "LexAccessor.h" | |
26 | #include "StyleContext.h" | |
9e96e16f | 27 | #include "CharacterSet.h" |
1dcf666d RD |
28 | #include "LexerModule.h" |
29 | #include "OptionSet.h" | |
f6bcfd97 | 30 | |
7e0c58e9 RD |
31 | #ifdef SCI_NAMESPACE |
32 | using namespace Scintilla; | |
33 | #endif | |
34 | ||
9e96e16f RD |
35 | // Info for HERE document handling from perldata.pod (reformatted): |
36 | // ---------------------------------------------------------------- | |
37 | // A line-oriented form of quoting is based on the shell ``here-doc'' syntax. | |
38 | // Following a << you specify a string to terminate the quoted material, and | |
39 | // all lines following the current line down to the terminating string are | |
40 | // the value of the item. | |
41 | // * The terminating string may be either an identifier (a word), or some | |
42 | // quoted text. | |
43 | // * If quoted, the type of quotes you use determines the treatment of the | |
44 | // text, just as in regular quoting. | |
45 | // * An unquoted identifier works like double quotes. | |
46 | // * There must be no space between the << and the identifier. | |
47 | // (If you put a space it will be treated as a null identifier, | |
48 | // which is valid, and matches the first empty line.) | |
49 | // (This is deprecated, -w warns of this syntax) | |
50 | // * The terminating string must appear by itself (unquoted and | |
51 | // with no surrounding whitespace) on the terminating line. | |
1e9bafca | 52 | |
9e96e16f | 53 | #define HERE_DELIM_MAX 256 // maximum length of HERE doc delimiter |
8e54aaed | 54 | |
9e96e16f RD |
55 | #define PERLNUM_BINARY 1 // order is significant: 1-4 cannot have a dot |
56 | #define PERLNUM_HEX 2 | |
57 | #define PERLNUM_OCTAL 3 | |
58 | #define PERLNUM_FLOAT_EXP 4 // exponent part only | |
59 | #define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings | |
60 | #define PERLNUM_VECTOR 6 | |
61 | #define PERLNUM_V_VECTOR 7 | |
62 | #define PERLNUM_BAD 8 | |
8e54aaed | 63 | |
9e96e16f RD |
64 | #define BACK_NONE 0 // lookback state for bareword disambiguation: |
65 | #define BACK_OPERATOR 1 // whitespace/comments are insignificant | |
66 | #define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation | |
f6bcfd97 | 67 | |
1dcf666d RD |
68 | // all interpolated styles are different from their parent styles by a constant difference |
69 | // we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value | |
70 | #define INTERPOLATE_SHIFT (SCE_PL_STRING_VAR - SCE_PL_STRING) | |
71 | ||
72 | static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, LexAccessor &styler) { | |
9e96e16f RD |
73 | // old-style keyword matcher; needed because GetCurrent() needs |
74 | // current segment to be committed, but we may abandon early... | |
f6bcfd97 | 75 | char s[100]; |
9e96e16f RD |
76 | unsigned int i, len = end - start; |
77 | if (len > 30) { len = 30; } | |
1e9bafca | 78 | for (i = 0; i < len; i++, start++) s[i] = styler[start]; |
9e96e16f | 79 | s[i] = '\0'; |
1e9bafca | 80 | return keywords.InList(s); |
f6bcfd97 BP |
81 | } |
82 | ||
1dcf666d RD |
83 | static int disambiguateBareword(LexAccessor &styler, unsigned int bk, unsigned int fw, |
84 | int backFlag, unsigned int backPos, unsigned int endPos) { | |
9e96e16f RD |
85 | // identifiers are recognized by Perl as barewords under some |
86 | // conditions, the following attempts to do the disambiguation | |
87 | // by looking backward and forward; result in 2 LSB | |
88 | int result = 0; | |
89 | bool moreback = false; // true if passed newline/comments | |
90 | bool brace = false; // true if opening brace found | |
91 | // if BACK_NONE, neither operator nor keyword, so skip test | |
92 | if (backFlag == BACK_NONE) | |
93 | return result; | |
94 | // first look backwards past whitespace/comments to set EOL flag | |
95 | // (some disambiguation patterns must be on a single line) | |
96 | if (backPos <= static_cast<unsigned int>(styler.LineStart(styler.GetLine(bk)))) | |
97 | moreback = true; | |
98 | // look backwards at last significant lexed item for disambiguation | |
99 | bk = backPos - 1; | |
100 | int ch = static_cast<unsigned char>(styler.SafeGetCharAt(bk)); | |
101 | if (ch == '{' && !moreback) { | |
102 | // {bareword: possible variable spec | |
103 | brace = true; | |
104 | } else if ((ch == '&' && styler.SafeGetCharAt(bk - 1) != '&') | |
1dcf666d RD |
105 | // &bareword: subroutine call |
106 | || styler.Match(bk - 1, "->") | |
107 | // ->bareword: part of variable spec | |
108 | || styler.Match(bk - 2, "sub")) { | |
109 | // sub bareword: subroutine declaration | |
110 | // (implied BACK_KEYWORD, no keywords end in 'sub'!) | |
9e96e16f RD |
111 | result |= 1; |
112 | } | |
113 | // next, scan forward after word past tab/spaces only; | |
114 | // if ch isn't one of '[{(,' we can skip the test | |
115 | if ((ch == '{' || ch == '(' || ch == '['|| ch == ',') | |
1dcf666d | 116 | && fw < endPos) { |
9e96e16f | 117 | while (ch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)), |
1dcf666d | 118 | IsASpaceOrTab(ch) && fw < endPos) { |
9e96e16f RD |
119 | fw++; |
120 | } | |
121 | if ((ch == '}' && brace) | |
1dcf666d RD |
122 | // {bareword}: variable spec |
123 | || styler.Match(fw, "=>")) { | |
9e96e16f RD |
124 | // [{(, bareword=>: hash literal |
125 | result |= 2; | |
126 | } | |
127 | } | |
128 | return result; | |
7e0c58e9 RD |
129 | } |
130 | ||
1dcf666d | 131 | static void skipWhitespaceComment(LexAccessor &styler, unsigned int &p) { |
9e96e16f RD |
132 | // when backtracking, we need to skip whitespace and comments |
133 | int style; | |
134 | while ((p > 0) && (style = styler.StyleAt(p), | |
1dcf666d | 135 | style == SCE_PL_DEFAULT || style == SCE_PL_COMMENTLINE)) |
9e96e16f | 136 | p--; |
f6bcfd97 BP |
137 | } |
138 | ||
1dcf666d | 139 | static int styleBeforeBracePair(LexAccessor &styler, unsigned int bk) { |
9e96e16f RD |
140 | // backtrack to find open '{' corresponding to a '}', balanced |
141 | // return significant style to be tested for '/' disambiguation | |
142 | int braceCount = 1; | |
143 | if (bk == 0) | |
144 | return SCE_PL_DEFAULT; | |
145 | while (--bk > 0) { | |
146 | if (styler.StyleAt(bk) == SCE_PL_OPERATOR) { | |
147 | int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk)); | |
148 | if (bkch == ';') { // early out | |
149 | break; | |
150 | } else if (bkch == '}') { | |
151 | braceCount++; | |
152 | } else if (bkch == '{') { | |
153 | if (--braceCount == 0) break; | |
154 | } | |
155 | } | |
156 | } | |
157 | if (bk > 0 && braceCount == 0) { | |
158 | // balanced { found, bk > 0, skip more whitespace/comments | |
159 | bk--; | |
160 | skipWhitespaceComment(styler, bk); | |
161 | return styler.StyleAt(bk); | |
162 | } | |
163 | return SCE_PL_DEFAULT; | |
8e54aaed RD |
164 | } |
165 | ||
1dcf666d | 166 | static int styleCheckIdentifier(LexAccessor &styler, unsigned int bk) { |
9e96e16f RD |
167 | // backtrack to classify sub-styles of identifier under test |
168 | // return sub-style to be tested for '/' disambiguation | |
169 | if (styler.SafeGetCharAt(bk) == '>') // inputsymbol, like <foo> | |
170 | return 1; | |
171 | // backtrack to check for possible "->" or "::" before identifier | |
172 | while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) { | |
173 | bk--; | |
174 | } | |
175 | while (bk > 0) { | |
176 | int bkstyle = styler.StyleAt(bk); | |
177 | if (bkstyle == SCE_PL_DEFAULT | |
1dcf666d | 178 | || bkstyle == SCE_PL_COMMENTLINE) { |
9e96e16f RD |
179 | // skip whitespace, comments |
180 | } else if (bkstyle == SCE_PL_OPERATOR) { | |
181 | // test for "->" and "::" | |
182 | if (styler.Match(bk - 1, "->") || styler.Match(bk - 1, "::")) | |
183 | return 2; | |
184 | } else | |
185 | return 3; // bare identifier | |
186 | bk--; | |
187 | } | |
188 | return 0; | |
8e54aaed RD |
189 | } |
190 | ||
1dcf666d | 191 | static int inputsymbolScan(LexAccessor &styler, unsigned int pos, unsigned int endPos) { |
9e96e16f RD |
192 | // looks forward for matching > on same line; a bit ugly |
193 | unsigned int fw = pos; | |
194 | while (++fw < endPos) { | |
195 | int fwch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)); | |
196 | if (fwch == '\r' || fwch == '\n') { | |
197 | return 0; | |
198 | } else if (fwch == '>') { | |
199 | if (styler.Match(fw - 2, "<=>")) // '<=>' case | |
200 | return 0; | |
201 | return fw - pos; | |
202 | } | |
f6bcfd97 | 203 | } |
9e96e16f RD |
204 | return 0; |
205 | } | |
206 | ||
1dcf666d | 207 | static int podLineScan(LexAccessor &styler, unsigned int &pos, unsigned int endPos) { |
9e96e16f RD |
208 | // forward scan the current line to classify line for POD style |
209 | int state = -1; | |
210 | while (pos <= endPos) { | |
211 | int ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos)); | |
212 | if (ch == '\n' || ch == '\r' || pos >= endPos) { | |
213 | if (ch == '\r' && styler.SafeGetCharAt(pos + 1) == '\n') pos++; | |
214 | break; | |
f6bcfd97 | 215 | } |
9e96e16f RD |
216 | if (IsASpaceOrTab(ch)) { // whitespace, take note |
217 | if (state == -1) | |
218 | state = SCE_PL_DEFAULT; | |
219 | } else if (state == SCE_PL_DEFAULT) { // verbatim POD line | |
220 | state = SCE_PL_POD_VERB; | |
221 | } else if (state != SCE_PL_POD_VERB) { // regular POD line | |
222 | state = SCE_PL_POD; | |
223 | } | |
224 | pos++; | |
225 | } | |
226 | if (state == -1) | |
227 | state = SCE_PL_DEFAULT; | |
228 | return state; | |
229 | } | |
230 | ||
1dcf666d | 231 | static bool styleCheckSubPrototype(LexAccessor &styler, unsigned int bk) { |
9e96e16f RD |
232 | // backtrack to identify if we're starting a subroutine prototype |
233 | // we also need to ignore whitespace/comments: | |
234 | // 'sub' [whitespace|comment] <identifier> [whitespace|comment] | |
235 | styler.Flush(); | |
236 | skipWhitespaceComment(styler, bk); | |
237 | if (bk == 0 || styler.StyleAt(bk) != SCE_PL_IDENTIFIER) // check identifier | |
238 | return false; | |
239 | while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_IDENTIFIER)) { | |
240 | bk--; | |
f6bcfd97 | 241 | } |
9e96e16f RD |
242 | skipWhitespaceComment(styler, bk); |
243 | if (bk < 2 || styler.StyleAt(bk) != SCE_PL_WORD // check "sub" keyword | |
1dcf666d | 244 | || !styler.Match(bk - 2, "sub")) // assume suffix is unique! |
9e96e16f | 245 | return false; |
f6bcfd97 BP |
246 | return true; |
247 | } | |
248 | ||
9e96e16f RD |
249 | static int actualNumStyle(int numberStyle) { |
250 | if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) { | |
251 | return SCE_PL_STRING; | |
252 | } else if (numberStyle == PERLNUM_BAD) { | |
253 | return SCE_PL_ERROR; | |
254 | } | |
255 | return SCE_PL_NUMBER; | |
256 | } | |
257 | ||
258 | static int opposite(int ch) { | |
259 | if (ch == '(') return ')'; | |
260 | if (ch == '[') return ']'; | |
261 | if (ch == '{') return '}'; | |
262 | if (ch == '<') return '>'; | |
f6bcfd97 BP |
263 | return ch; |
264 | } | |
265 | ||
1dcf666d RD |
266 | static bool IsCommentLine(int line, LexAccessor &styler) { |
267 | int pos = styler.LineStart(line); | |
268 | int eol_pos = styler.LineStart(line + 1) - 1; | |
269 | for (int i = pos; i < eol_pos; i++) { | |
270 | char ch = styler[i]; | |
271 | int style = styler.StyleAt(i); | |
272 | if (ch == '#' && style == SCE_PL_COMMENTLINE) | |
273 | return true; | |
274 | else if (!IsASpaceOrTab(ch)) | |
275 | return false; | |
276 | } | |
277 | return false; | |
278 | } | |
279 | ||
280 | static bool IsPackageLine(int line, LexAccessor &styler) { | |
281 | int pos = styler.LineStart(line); | |
282 | int style = styler.StyleAt(pos); | |
283 | if (style == SCE_PL_WORD && styler.Match(pos, "package")) { | |
284 | return true; | |
285 | } | |
286 | return false; | |
287 | } | |
288 | ||
289 | static int PodHeadingLevel(int pos, LexAccessor &styler) { | |
290 | int lvl = static_cast<unsigned char>(styler.SafeGetCharAt(pos + 5)); | |
291 | if (lvl >= '1' && lvl <= '4') { | |
292 | return lvl - '0'; | |
293 | } | |
294 | return 0; | |
295 | } | |
296 | ||
297 | // An individual named option for use in an OptionSet | |
298 | ||
299 | // Options used for LexerPerl | |
300 | struct OptionsPerl { | |
301 | bool fold; | |
302 | bool foldComment; | |
303 | bool foldCompact; | |
304 | // Custom folding of POD and packages | |
305 | bool foldPOD; // fold.perl.pod | |
306 | // Enable folding Pod blocks when using the Perl lexer. | |
307 | bool foldPackage; // fold.perl.package | |
308 | // Enable folding packages when using the Perl lexer. | |
309 | ||
310 | bool foldCommentExplicit; | |
311 | ||
312 | bool foldAtElse; | |
313 | ||
314 | OptionsPerl() { | |
315 | fold = false; | |
316 | foldComment = false; | |
317 | foldCompact = true; | |
318 | foldPOD = true; | |
319 | foldPackage = true; | |
320 | foldCommentExplicit = true; | |
321 | foldAtElse = false; | |
322 | } | |
323 | }; | |
324 | ||
325 | static const char *const perlWordListDesc[] = { | |
326 | "Keywords", | |
327 | 0 | |
328 | }; | |
329 | ||
330 | struct OptionSetPerl : public OptionSet<OptionsPerl> { | |
331 | OptionSetPerl() { | |
332 | DefineProperty("fold", &OptionsPerl::fold); | |
333 | ||
334 | DefineProperty("fold.comment", &OptionsPerl::foldComment); | |
335 | ||
336 | DefineProperty("fold.compact", &OptionsPerl::foldCompact); | |
337 | ||
338 | DefineProperty("fold.perl.pod", &OptionsPerl::foldPOD, | |
339 | "Set to 0 to disable folding Pod blocks when using the Perl lexer."); | |
340 | ||
341 | DefineProperty("fold.perl.package", &OptionsPerl::foldPackage, | |
342 | "Set to 0 to disable folding packages when using the Perl lexer."); | |
343 | ||
344 | DefineProperty("fold.perl.comment.explicit", &OptionsPerl::foldCommentExplicit, | |
345 | "Set to 0 to disable explicit folding."); | |
346 | ||
347 | DefineProperty("fold.perl.at.else", &OptionsPerl::foldAtElse, | |
348 | "This option enables Perl folding on a \"} else {\" line of an if statement."); | |
349 | ||
350 | DefineWordListSets(perlWordListDesc); | |
351 | } | |
352 | }; | |
353 | ||
354 | class LexerPerl : public ILexer { | |
355 | CharacterSet setWordStart; | |
356 | CharacterSet setWord; | |
357 | CharacterSet setSpecialVar; | |
358 | CharacterSet setControlVar; | |
359 | WordList keywords; | |
360 | OptionsPerl options; | |
361 | OptionSetPerl osPerl; | |
362 | public: | |
363 | LexerPerl() : | |
364 | setWordStart(CharacterSet::setAlpha, "_", 0x80, true), | |
365 | setWord(CharacterSet::setAlphaNum, "_", 0x80, true), | |
366 | setSpecialVar(CharacterSet::setNone, "\"$;<>&`'+,./\\%:=~!?@[]"), | |
367 | setControlVar(CharacterSet::setNone, "ACDEFHILMNOPRSTVWX") { | |
368 | } | |
369 | virtual ~LexerPerl() { | |
370 | } | |
371 | void SCI_METHOD Release() { | |
372 | delete this; | |
373 | } | |
374 | int SCI_METHOD Version() const { | |
375 | return lvOriginal; | |
376 | } | |
377 | const char *SCI_METHOD PropertyNames() { | |
378 | return osPerl.PropertyNames(); | |
379 | } | |
380 | int SCI_METHOD PropertyType(const char *name) { | |
381 | return osPerl.PropertyType(name); | |
382 | } | |
383 | const char *SCI_METHOD DescribeProperty(const char *name) { | |
384 | return osPerl.DescribeProperty(name); | |
385 | } | |
386 | int SCI_METHOD PropertySet(const char *key, const char *val); | |
387 | const char *SCI_METHOD DescribeWordListSets() { | |
388 | return osPerl.DescribeWordListSets(); | |
389 | } | |
390 | int SCI_METHOD WordListSet(int n, const char *wl); | |
391 | void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess); | |
392 | void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess); | |
393 | ||
394 | void *SCI_METHOD PrivateCall(int, void *) { | |
395 | return 0; | |
396 | } | |
397 | ||
398 | static ILexer *LexerFactoryPerl() { | |
399 | return new LexerPerl(); | |
400 | } | |
401 | void InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern=false); | |
402 | }; | |
403 | ||
404 | int SCI_METHOD LexerPerl::PropertySet(const char *key, const char *val) { | |
405 | if (osPerl.PropertySet(&options, key, val)) { | |
406 | return 0; | |
407 | } | |
408 | return -1; | |
409 | } | |
410 | ||
411 | int SCI_METHOD LexerPerl::WordListSet(int n, const char *wl) { | |
412 | WordList *wordListN = 0; | |
413 | switch (n) { | |
414 | case 0: | |
415 | wordListN = &keywords; | |
416 | break; | |
417 | } | |
418 | int firstModification = -1; | |
419 | if (wordListN) { | |
420 | WordList wlNew; | |
421 | wlNew.Set(wl); | |
422 | if (*wordListN != wlNew) { | |
423 | wordListN->Set(wl); | |
424 | firstModification = 0; | |
425 | } | |
426 | } | |
427 | return firstModification; | |
428 | } | |
429 | ||
430 | void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) { | |
431 | // interpolate a segment (with no active backslashes or delimiters within) | |
432 | // switch in or out of an interpolation style or continue current style | |
433 | // commit variable patterns if found, trim segment, repeat until done | |
434 | while (maxSeg > 0) { | |
435 | bool isVar = false; | |
436 | int sLen = 0; | |
437 | if ((maxSeg > 1) && (sc.ch == '$' || sc.ch == '@')) { | |
438 | // $#[$]*word [$@][$]*word (where word or {word} is always present) | |
439 | bool braces = false; | |
440 | sLen = 1; | |
441 | if (sc.ch == '$' && sc.chNext == '#') { // starts with $# | |
442 | sLen++; | |
443 | } | |
444 | while ((maxSeg > sLen) && (sc.GetRelative(sLen) == '$')) // >0 $ dereference within | |
445 | sLen++; | |
446 | if ((maxSeg > sLen) && (sc.GetRelative(sLen) == '{')) { // { start for {word} | |
447 | sLen++; | |
448 | braces = true; | |
449 | } | |
450 | if (maxSeg > sLen) { | |
451 | int c = sc.GetRelative(sLen); | |
452 | if (setWordStart.Contains(c)) { // word (various) | |
453 | sLen++; | |
454 | isVar = true; | |
455 | while ((maxSeg > sLen) && setWord.Contains(sc.GetRelative(sLen))) | |
456 | sLen++; | |
457 | } else if (braces && IsADigit(c) && (sLen == 2)) { // digit for ${digit} | |
458 | sLen++; | |
459 | isVar = true; | |
460 | } | |
461 | } | |
462 | if (braces) { | |
463 | if ((maxSeg > sLen) && (sc.GetRelative(sLen) == '}')) { // } end for {word} | |
464 | sLen++; | |
465 | } else | |
466 | isVar = false; | |
467 | } | |
468 | } | |
469 | if (!isVar && (maxSeg > 1)) { // $- or @-specific variable patterns | |
470 | sLen = 1; | |
471 | int c = sc.chNext; | |
472 | if (sc.ch == '$') { | |
473 | if (IsADigit(c)) { // $[0-9] and slurp trailing digits | |
474 | sLen++; | |
475 | isVar = true; | |
476 | while ((maxSeg > sLen) && IsADigit(sc.GetRelative(sLen))) | |
477 | sLen++; | |
478 | } else if (setSpecialVar.Contains(c)) { // $ special variables | |
479 | sLen++; | |
480 | isVar = true; | |
481 | } else if (!isPattern && ((c == '(') || (c == ')') || (c == '|'))) { // $ additional | |
482 | sLen++; | |
483 | isVar = true; | |
484 | } else if (c == '^') { // $^A control-char style | |
485 | sLen++; | |
486 | if ((maxSeg > sLen) && setControlVar.Contains(sc.GetRelative(sLen))) { | |
487 | sLen++; | |
488 | isVar = true; | |
489 | } | |
490 | } | |
491 | } else if (sc.ch == '@') { | |
492 | if (!isPattern && ((c == '+') || (c == '-'))) { // @ specials non-pattern | |
493 | sLen++; | |
494 | isVar = true; | |
495 | } | |
496 | } | |
497 | } | |
498 | if (isVar) { // commit as interpolated variable or normal character | |
499 | if (sc.state < SCE_PL_STRING_VAR) | |
500 | sc.SetState(sc.state + INTERPOLATE_SHIFT); | |
501 | sc.Forward(sLen); | |
502 | maxSeg -= sLen; | |
503 | } else { | |
504 | if (sc.state >= SCE_PL_STRING_VAR) | |
505 | sc.SetState(sc.state - INTERPOLATE_SHIFT); | |
506 | sc.Forward(); | |
507 | maxSeg--; | |
508 | } | |
509 | } | |
510 | if (sc.state >= SCE_PL_STRING_VAR) | |
511 | sc.SetState(sc.state - INTERPOLATE_SHIFT); | |
512 | } | |
f6bcfd97 | 513 | |
1dcf666d RD |
514 | void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) { |
515 | LexAccessor styler(pAccess); | |
65ec6247 | 516 | |
9e96e16f RD |
517 | // keywords that forces /PATTERN/ at all times; should track vim's behaviour |
518 | WordList reWords; | |
519 | reWords.Set("elsif if split while"); | |
520 | ||
521 | // charset classes | |
9e96e16f RD |
522 | CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC"); |
523 | // lexing of "%*</" operators is non-trivial; these are missing in the set below | |
524 | CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~"); | |
525 | CharacterSet setQDelim(CharacterSet::setNone, "qrwx"); | |
526 | CharacterSet setModifiers(CharacterSet::setAlpha); | |
527 | CharacterSet setPreferRE(CharacterSet::setNone, "*/<%"); | |
528 | // setArray and setHash also accepts chars for special vars like $_, | |
529 | // which are then truncated when the next char does not match setVar | |
530 | CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'", 0x80, true); | |
531 | CharacterSet setArray(CharacterSet::setAlpha, "#$_+-", 0x80, true); | |
532 | CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-", 0x80, true); | |
533 | CharacterSet &setPOD = setModifiers; | |
534 | CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@"); | |
535 | CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_"); | |
1dcf666d | 536 | CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*+];"); |
9e96e16f RD |
537 | // for format identifiers |
538 | CharacterSet setFormatStart(CharacterSet::setAlpha, "_="); | |
539 | CharacterSet &setFormat = setHereDocDelim; | |
7e0c58e9 | 540 | |
9e96e16f RD |
541 | // Lexer for perl often has to backtrack to start of current style to determine |
542 | // which characters are being used as quotes, how deeply nested is the | |
543 | // start position and what the termination string is for HERE documents. | |
544 | ||
545 | class HereDocCls { // Class to manage HERE doc sequence | |
65ec6247 | 546 | public: |
1dcf666d RD |
547 | int State; |
548 | // 0: '<<' encountered | |
549 | // 1: collect the delimiter | |
550 | // 2: here doc text (lines after the delimiter) | |
9e96e16f | 551 | int Quote; // the char after '<<' |
65ec6247 RD |
552 | bool Quoted; // true if Quote in ('\'','"','`') |
553 | int DelimiterLength; // strlen(Delimiter) | |
8e54aaed | 554 | char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf |
65ec6247 RD |
555 | HereDocCls() { |
556 | State = 0; | |
9e96e16f RD |
557 | Quote = 0; |
558 | Quoted = false; | |
65ec6247 | 559 | DelimiterLength = 0; |
8e54aaed | 560 | Delimiter = new char[HERE_DELIM_MAX]; |
65ec6247 RD |
561 | Delimiter[0] = '\0'; |
562 | } | |
9e96e16f RD |
563 | void Append(int ch) { |
564 | Delimiter[DelimiterLength++] = static_cast<char>(ch); | |
565 | Delimiter[DelimiterLength] = '\0'; | |
566 | } | |
8e54aaed RD |
567 | ~HereDocCls() { |
568 | delete []Delimiter; | |
569 | } | |
65ec6247 | 570 | }; |
9e96e16f | 571 | HereDocCls HereDoc; // TODO: FIFO for stacked here-docs |
65ec6247 | 572 | |
9e96e16f | 573 | class QuoteCls { // Class to manage quote pairs |
1dcf666d | 574 | public: |
9e96e16f RD |
575 | int Rep; |
576 | int Count; | |
577 | int Up, Down; | |
65ec6247 RD |
578 | QuoteCls() { |
579 | this->New(1); | |
580 | } | |
9e96e16f | 581 | void New(int r = 1) { |
65ec6247 RD |
582 | Rep = r; |
583 | Count = 0; | |
584 | Up = '\0'; | |
585 | Down = '\0'; | |
586 | } | |
9e96e16f | 587 | void Open(int u) { |
65ec6247 RD |
588 | Count++; |
589 | Up = u; | |
590 | Down = opposite(Up); | |
591 | } | |
592 | }; | |
593 | QuoteCls Quote; | |
594 | ||
9e96e16f RD |
595 | // additional state for number lexing |
596 | int numState = PERLNUM_DECIMAL; | |
8e54aaed | 597 | int dotCount = 0; |
65ec6247 | 598 | |
9e96e16f RD |
599 | unsigned int endPos = startPos + length; |
600 | ||
601 | // Backtrack to beginning of style if required... | |
602 | // If in a long distance lexical state, backtrack to find quote characters. | |
603 | // Includes strings (may be multi-line), numbers (additional state), format | |
604 | // bodies, as well as POD sections. | |
605 | if (initStyle == SCE_PL_HERE_Q | |
1dcf666d RD |
606 | || initStyle == SCE_PL_HERE_QQ |
607 | || initStyle == SCE_PL_HERE_QX | |
608 | || initStyle == SCE_PL_FORMAT | |
609 | || initStyle == SCE_PL_HERE_QQ_VAR | |
610 | || initStyle == SCE_PL_HERE_QX_VAR | |
611 | ) { | |
612 | // backtrack through multiple styles to reach the delimiter start | |
9e96e16f RD |
613 | int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM; |
614 | while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) { | |
65ec6247 RD |
615 | startPos--; |
616 | } | |
617 | startPos = styler.LineStart(styler.GetLine(startPos)); | |
9e96e16f | 618 | initStyle = styler.StyleAt(startPos - 1); |
65ec6247 | 619 | } |
1dcf666d RD |
620 | if (initStyle == SCE_PL_STRING |
621 | || initStyle == SCE_PL_STRING_QQ | |
622 | || initStyle == SCE_PL_BACKTICKS | |
623 | || initStyle == SCE_PL_STRING_QX | |
624 | || initStyle == SCE_PL_REGEX | |
625 | || initStyle == SCE_PL_STRING_QR | |
626 | || initStyle == SCE_PL_REGSUBST | |
627 | || initStyle == SCE_PL_STRING_VAR | |
628 | || initStyle == SCE_PL_STRING_QQ_VAR | |
629 | || initStyle == SCE_PL_BACKTICKS_VAR | |
630 | || initStyle == SCE_PL_STRING_QX_VAR | |
631 | || initStyle == SCE_PL_REGEX_VAR | |
632 | || initStyle == SCE_PL_STRING_QR_VAR | |
633 | || initStyle == SCE_PL_REGSUBST_VAR | |
634 | ) { | |
635 | // for interpolation, must backtrack through a mix of two different styles | |
636 | int otherStyle = (initStyle >= SCE_PL_STRING_VAR) ? | |
637 | initStyle - INTERPOLATE_SHIFT : initStyle + INTERPOLATE_SHIFT; | |
638 | while (startPos > 1) { | |
639 | int st = styler.StyleAt(startPos - 1); | |
640 | if ((st != initStyle) && (st != otherStyle)) | |
641 | break; | |
642 | startPos--; | |
643 | } | |
644 | initStyle = SCE_PL_DEFAULT; | |
645 | } else if (initStyle == SCE_PL_STRING_Q | |
646 | || initStyle == SCE_PL_STRING_QW | |
647 | || initStyle == SCE_PL_XLAT | |
648 | || initStyle == SCE_PL_CHARACTER | |
649 | || initStyle == SCE_PL_NUMBER | |
650 | || initStyle == SCE_PL_IDENTIFIER | |
651 | || initStyle == SCE_PL_ERROR | |
652 | || initStyle == SCE_PL_SUB_PROTOTYPE | |
653 | ) { | |
9e96e16f | 654 | while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) { |
7e0c58e9 RD |
655 | startPos--; |
656 | } | |
9e96e16f RD |
657 | initStyle = SCE_PL_DEFAULT; |
658 | } else if (initStyle == SCE_PL_POD | |
1dcf666d RD |
659 | || initStyle == SCE_PL_POD_VERB |
660 | ) { | |
9e96e16f RD |
661 | // POD backtracking finds preceeding blank lines and goes back past them |
662 | int ln = styler.GetLine(startPos); | |
663 | if (ln > 0) { | |
664 | initStyle = styler.StyleAt(styler.LineStart(--ln)); | |
665 | if (initStyle == SCE_PL_POD || initStyle == SCE_PL_POD_VERB) { | |
666 | while (ln > 0 && styler.GetLineState(ln) == SCE_PL_DEFAULT) | |
667 | ln--; | |
668 | } | |
669 | startPos = styler.LineStart(++ln); | |
670 | initStyle = styler.StyleAt(startPos - 1); | |
671 | } else { | |
672 | startPos = 0; | |
673 | initStyle = SCE_PL_DEFAULT; | |
f6bcfd97 | 674 | } |
f6bcfd97 | 675 | } |
65ec6247 | 676 | |
9e96e16f RD |
677 | // backFlag, backPos are additional state to aid identifier corner cases. |
678 | // Look backwards past whitespace and comments in order to detect either | |
679 | // operator or keyword. Later updated as we go along. | |
680 | int backFlag = BACK_NONE; | |
681 | unsigned int backPos = startPos; | |
682 | if (backPos > 0) { | |
683 | backPos--; | |
684 | skipWhitespaceComment(styler, backPos); | |
685 | if (styler.StyleAt(backPos) == SCE_PL_OPERATOR) | |
686 | backFlag = BACK_OPERATOR; | |
687 | else if (styler.StyleAt(backPos) == SCE_PL_WORD) | |
688 | backFlag = BACK_KEYWORD; | |
689 | backPos++; | |
690 | } | |
1e9bafca | 691 | |
9e96e16f | 692 | StyleContext sc(startPos, endPos - startPos, initStyle, styler, static_cast<char>(STYLE_MAX)); |
65ec6247 | 693 | |
9e96e16f | 694 | for (; sc.More(); sc.Forward()) { |
f6bcfd97 | 695 | |
9e96e16f RD |
696 | // Determine if the current state should terminate. |
697 | switch (sc.state) { | |
1dcf666d RD |
698 | case SCE_PL_OPERATOR: |
699 | sc.SetState(SCE_PL_DEFAULT); | |
700 | backFlag = BACK_OPERATOR; | |
701 | backPos = sc.currentPos; | |
702 | break; | |
703 | case SCE_PL_IDENTIFIER: // identifier, bareword, inputsymbol | |
704 | if ((!setWord.Contains(sc.ch) && sc.ch != '\'') | |
705 | || sc.Match('.', '.') | |
706 | || sc.chPrev == '>') { // end of inputsymbol | |
9e96e16f | 707 | sc.SetState(SCE_PL_DEFAULT); |
1dcf666d RD |
708 | } |
709 | break; | |
710 | case SCE_PL_WORD: // keyword, plus special cases | |
711 | if (!setWord.Contains(sc.ch)) { | |
712 | char s[100]; | |
713 | sc.GetCurrent(s, sizeof(s)); | |
714 | if ((strcmp(s, "__DATA__") == 0) || (strcmp(s, "__END__") == 0)) { | |
715 | sc.ChangeState(SCE_PL_DATASECTION); | |
716 | } else { | |
717 | if ((strcmp(s, "format") == 0)) { | |
718 | sc.SetState(SCE_PL_FORMAT_IDENT); | |
719 | HereDoc.State = 0; | |
9e96e16f | 720 | } else { |
1dcf666d | 721 | sc.SetState(SCE_PL_DEFAULT); |
9e96e16f | 722 | } |
1dcf666d RD |
723 | backFlag = BACK_KEYWORD; |
724 | backPos = sc.currentPos; | |
9e96e16f | 725 | } |
1dcf666d RD |
726 | } |
727 | break; | |
728 | case SCE_PL_SCALAR: | |
729 | case SCE_PL_ARRAY: | |
730 | case SCE_PL_HASH: | |
731 | case SCE_PL_SYMBOLTABLE: | |
732 | if (sc.Match(':', ':')) { // skip :: | |
733 | sc.Forward(); | |
734 | } else if (!setVar.Contains(sc.ch)) { | |
735 | if (sc.LengthCurrent() == 1) { | |
736 | // Special variable: $(, $_ etc. | |
9e96e16f | 737 | sc.Forward(); |
9e96e16f | 738 | } |
1dcf666d RD |
739 | sc.SetState(SCE_PL_DEFAULT); |
740 | } | |
741 | break; | |
742 | case SCE_PL_NUMBER: | |
743 | // if no early break, number style is terminated at "(go through)" | |
744 | if (sc.ch == '.') { | |
745 | if (sc.chNext == '.') { | |
746 | // double dot is always an operator (go through) | |
747 | } else if (numState <= PERLNUM_FLOAT_EXP) { | |
748 | // non-decimal number or float exponent, consume next dot | |
749 | sc.SetState(SCE_PL_OPERATOR); | |
9e96e16f | 750 | break; |
1dcf666d RD |
751 | } else { // decimal or vectors allows dots |
752 | dotCount++; | |
753 | if (numState == PERLNUM_DECIMAL) { | |
754 | if (dotCount <= 1) // number with one dot in it | |
755 | break; | |
756 | if (IsADigit(sc.chNext)) { // really a vector | |
757 | numState = PERLNUM_VECTOR; | |
758 | break; | |
9e96e16f | 759 | } |
1dcf666d RD |
760 | // number then dot (go through) |
761 | } else if (IsADigit(sc.chNext)) // vectors | |
9e96e16f | 762 | break; |
1dcf666d RD |
763 | // vector then dot (go through) |
764 | } | |
765 | } else if (sc.ch == '_') { | |
766 | // permissive underscoring for number and vector literals | |
767 | break; | |
768 | } else if (numState == PERLNUM_DECIMAL) { | |
769 | if (sc.ch == 'E' || sc.ch == 'e') { // exponent, sign | |
770 | numState = PERLNUM_FLOAT_EXP; | |
771 | if (sc.chNext == '+' || sc.chNext == '-') { | |
772 | sc.Forward(); | |
9e96e16f | 773 | } |
1dcf666d RD |
774 | break; |
775 | } else if (IsADigit(sc.ch)) | |
776 | break; | |
777 | // number then word (go through) | |
778 | } else if (numState == PERLNUM_HEX) { | |
779 | if (IsADigit(sc.ch, 16)) | |
780 | break; | |
781 | } else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) { | |
782 | if (IsADigit(sc.ch)) // vector | |
783 | break; | |
784 | if (setWord.Contains(sc.ch) && dotCount == 0) { // change to word | |
785 | sc.ChangeState(SCE_PL_IDENTIFIER); | |
9e96e16f RD |
786 | break; |
787 | } | |
1dcf666d RD |
788 | // vector then word (go through) |
789 | } else if (IsADigit(sc.ch)) { | |
790 | if (numState == PERLNUM_FLOAT_EXP) { | |
791 | break; | |
792 | } else if (numState == PERLNUM_OCTAL) { | |
793 | if (sc.ch <= '7') break; | |
794 | } else if (numState == PERLNUM_BINARY) { | |
795 | if (sc.ch <= '1') break; | |
9e96e16f | 796 | } |
1dcf666d RD |
797 | // mark invalid octal, binary numbers (go through) |
798 | numState = PERLNUM_BAD; | |
9e96e16f | 799 | break; |
1dcf666d RD |
800 | } |
801 | // complete current number or vector | |
802 | sc.ChangeState(actualNumStyle(numState)); | |
803 | sc.SetState(SCE_PL_DEFAULT); | |
804 | break; | |
805 | case SCE_PL_COMMENTLINE: | |
806 | if (sc.atLineEnd) { | |
807 | sc.SetState(SCE_PL_DEFAULT); | |
808 | } | |
809 | break; | |
810 | case SCE_PL_HERE_DELIM: | |
811 | if (HereDoc.State == 0) { // '<<' encountered | |
812 | int delim_ch = sc.chNext; | |
813 | int ws_skip = 0; | |
814 | HereDoc.State = 1; // pre-init HERE doc class | |
815 | HereDoc.Quote = sc.chNext; | |
816 | HereDoc.Quoted = false; | |
817 | HereDoc.DelimiterLength = 0; | |
818 | HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; | |
819 | if (IsASpaceOrTab(delim_ch)) { | |
820 | // skip whitespace; legal only for quoted delimiters | |
821 | unsigned int i = sc.currentPos + 1; | |
822 | while ((i < endPos) && IsASpaceOrTab(delim_ch)) { | |
823 | i++; | |
824 | delim_ch = static_cast<unsigned char>(styler.SafeGetCharAt(i)); | |
9e96e16f | 825 | } |
1dcf666d RD |
826 | ws_skip = i - sc.currentPos - 1; |
827 | } | |
828 | if (delim_ch == '\'' || delim_ch == '"' || delim_ch == '`') { | |
829 | // a quoted here-doc delimiter; skip any whitespace | |
830 | sc.Forward(ws_skip + 1); | |
831 | HereDoc.Quote = delim_ch; | |
832 | HereDoc.Quoted = true; | |
833 | } else if ((ws_skip == 0 && setNonHereDoc.Contains(sc.chNext)) | |
834 | || ws_skip > 0) { | |
835 | // left shift << or <<= operator cases | |
836 | // restore position if operator | |
837 | sc.ChangeState(SCE_PL_OPERATOR); | |
838 | sc.ForwardSetState(SCE_PL_DEFAULT); | |
839 | backFlag = BACK_OPERATOR; | |
840 | backPos = sc.currentPos; | |
841 | HereDoc.State = 0; | |
842 | } else { | |
843 | // specially handle initial '\' for identifier | |
844 | if (ws_skip == 0 && HereDoc.Quote == '\\') | |
845 | sc.Forward(); | |
846 | // an unquoted here-doc delimiter, no special handling | |
847 | // (cannot be prefixed by spaces/tabs), or | |
848 | // symbols terminates; deprecated zero-length delimiter | |
849 | } | |
850 | } else if (HereDoc.State == 1) { // collect the delimiter | |
851 | backFlag = BACK_NONE; | |
852 | if (HereDoc.Quoted) { // a quoted here-doc delimiter | |
853 | if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter | |
9e96e16f | 854 | sc.ForwardSetState(SCE_PL_DEFAULT); |
1dcf666d RD |
855 | } else if (!sc.atLineEnd) { |
856 | if (sc.Match('\\', static_cast<char>(HereDoc.Quote))) { // escaped quote | |
9e96e16f | 857 | sc.Forward(); |
9e96e16f | 858 | } |
1dcf666d | 859 | if (sc.ch != '\r') { // skip CR if CRLF |
9e96e16f | 860 | HereDoc.Append(sc.ch); |
9e96e16f RD |
861 | } |
862 | } | |
1dcf666d RD |
863 | } else { // an unquoted here-doc delimiter |
864 | if (setHereDocDelim.Contains(sc.ch)) { | |
865 | HereDoc.Append(sc.ch); | |
866 | } else { | |
867 | sc.SetState(SCE_PL_DEFAULT); | |
9e96e16f RD |
868 | } |
869 | } | |
1dcf666d RD |
870 | if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { |
871 | sc.SetState(SCE_PL_ERROR); | |
872 | HereDoc.State = 0; | |
873 | } | |
874 | } | |
875 | break; | |
876 | case SCE_PL_HERE_Q: | |
877 | case SCE_PL_HERE_QQ: | |
878 | case SCE_PL_HERE_QX: | |
879 | // also implies HereDoc.State == 2 | |
880 | sc.Complete(); | |
881 | if (HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter)) { | |
882 | int c = sc.GetRelative(HereDoc.DelimiterLength); | |
883 | if (c == '\r' || c == '\n') { // peek first, do not consume match | |
884 | sc.Forward(HereDoc.DelimiterLength); | |
9e96e16f RD |
885 | sc.SetState(SCE_PL_DEFAULT); |
886 | backFlag = BACK_NONE; | |
887 | HereDoc.State = 0; | |
1dcf666d RD |
888 | if (!sc.atLineEnd) |
889 | sc.Forward(); | |
890 | break; | |
891 | } | |
892 | } | |
893 | if (sc.state == SCE_PL_HERE_Q) { // \EOF and 'EOF' non-interpolated | |
894 | while (!sc.atLineEnd) | |
895 | sc.Forward(); | |
896 | break; | |
897 | } | |
898 | while (!sc.atLineEnd) { // "EOF" and `EOF` interpolated | |
899 | int s = 0, endType = 0; | |
900 | int maxSeg = endPos - sc.currentPos; | |
901 | while (s < maxSeg) { // scan to break string into segments | |
902 | int c = sc.GetRelative(s); | |
903 | if (c == '\\') { | |
904 | endType = 1; break; | |
905 | } else if (c == '\r' || c == '\n') { | |
906 | endType = 2; break; | |
907 | } | |
908 | s++; | |
9e96e16f | 909 | } |
1dcf666d RD |
910 | if (s > 0) // process non-empty segments |
911 | InterpolateSegment(sc, s); | |
912 | if (endType == 1) { | |
913 | sc.Forward(); | |
914 | // \ at end-of-line does not appear to have any effect, skip | |
915 | if (sc.ch != '\r' && sc.ch != '\n') | |
916 | sc.Forward(); | |
917 | } else if (endType == 2) { | |
918 | if (!sc.atLineEnd) | |
919 | sc.Forward(); | |
920 | } | |
921 | } | |
922 | break; | |
923 | case SCE_PL_POD: | |
924 | case SCE_PL_POD_VERB: { | |
9e96e16f RD |
925 | unsigned int fw = sc.currentPos; |
926 | int ln = styler.GetLine(fw); | |
927 | if (sc.atLineStart && sc.Match("=cut")) { // end of POD | |
928 | sc.SetState(SCE_PL_POD); | |
929 | sc.Forward(4); | |
930 | sc.SetState(SCE_PL_DEFAULT); | |
931 | styler.SetLineState(ln, SCE_PL_POD); | |
932 | break; | |
933 | } | |
934 | int pod = podLineScan(styler, fw, endPos); // classify POD line | |
935 | styler.SetLineState(ln, pod); | |
936 | if (pod == SCE_PL_DEFAULT) { | |
937 | if (sc.state == SCE_PL_POD_VERB) { | |
938 | unsigned int fw2 = fw; | |
939 | while (fw2 <= endPos && pod == SCE_PL_DEFAULT) { | |
940 | fw = fw2++; // penultimate line (last blank line) | |
941 | pod = podLineScan(styler, fw2, endPos); | |
942 | styler.SetLineState(styler.GetLine(fw2), pod); | |
943 | } | |
944 | if (pod == SCE_PL_POD) { // truncate verbatim POD early | |
945 | sc.SetState(SCE_PL_POD); | |
946 | } else | |
947 | fw = fw2; | |
1dcf666d | 948 | } |
9e96e16f RD |
949 | } else { |
950 | if (pod == SCE_PL_POD_VERB // still part of current paragraph | |
1dcf666d | 951 | && (styler.GetLineState(ln - 1) == SCE_PL_POD)) { |
9e96e16f RD |
952 | pod = SCE_PL_POD; |
953 | styler.SetLineState(ln, pod); | |
954 | } else if (pod == SCE_PL_POD | |
1dcf666d | 955 | && (styler.GetLineState(ln - 1) == SCE_PL_POD_VERB)) { |
9e96e16f RD |
956 | pod = SCE_PL_POD_VERB; |
957 | styler.SetLineState(ln, pod); | |
958 | } | |
959 | sc.SetState(pod); | |
960 | } | |
961 | sc.Forward(fw - sc.currentPos); // commit style | |
1dcf666d RD |
962 | } |
963 | break; | |
964 | case SCE_PL_REGEX: | |
965 | case SCE_PL_STRING_QR: | |
966 | if (Quote.Rep <= 0) { | |
967 | if (!setModifiers.Contains(sc.ch)) | |
968 | sc.SetState(SCE_PL_DEFAULT); | |
969 | } else if (!Quote.Up && !IsASpace(sc.ch)) { | |
970 | Quote.Open(sc.ch); | |
971 | } else { | |
972 | int s = 0, endType = 0; | |
973 | int maxSeg = endPos - sc.currentPos; | |
974 | while (s < maxSeg) { // scan to break string into segments | |
975 | int c = sc.GetRelative(s); | |
976 | if (IsASpace(c)) { | |
977 | break; | |
978 | } else if (c == '\\' && Quote.Up != '\\') { | |
979 | endType = 1; break; | |
980 | } else if (c == Quote.Down) { | |
981 | Quote.Count--; | |
982 | if (Quote.Count == 0) { | |
983 | Quote.Rep--; | |
984 | break; | |
985 | } | |
986 | } else if (c == Quote.Up) | |
987 | Quote.Count++; | |
988 | s++; | |
9e96e16f | 989 | } |
1dcf666d RD |
990 | if (s > 0) { // process non-empty segments |
991 | if (Quote.Up != '\'') { | |
992 | InterpolateSegment(sc, s, true); | |
993 | } else // non-interpolated path | |
994 | sc.Forward(s); | |
995 | } | |
996 | if (endType == 1) | |
9e96e16f | 997 | sc.Forward(); |
1dcf666d RD |
998 | } |
999 | break; | |
1000 | case SCE_PL_REGSUBST: | |
1001 | case SCE_PL_XLAT: | |
1002 | if (Quote.Rep <= 0) { | |
1003 | if (!setModifiers.Contains(sc.ch)) | |
1004 | sc.SetState(SCE_PL_DEFAULT); | |
1005 | } else if (!Quote.Up && !IsASpace(sc.ch)) { | |
1006 | Quote.Open(sc.ch); | |
1007 | } else { | |
1008 | int s = 0, endType = 0; | |
1009 | int maxSeg = endPos - sc.currentPos; | |
1010 | bool isPattern = (Quote.Rep == 2); | |
1011 | while (s < maxSeg) { // scan to break string into segments | |
1012 | int c = sc.GetRelative(s); | |
1013 | if (c == '\\' && Quote.Up != '\\') { | |
1014 | endType = 2; break; | |
1015 | } else if (Quote.Count == 0 && Quote.Rep == 1) { | |
1016 | // We matched something like s(...) or tr{...}, Perl 5.10 | |
1017 | // appears to allow almost any character for use as the | |
1018 | // next delimiters. Whitespace and comments are accepted in | |
1019 | // between, but we'll limit to whitespace here. | |
1020 | // For '#', if no whitespace in between, it's a delimiter. | |
1021 | if (IsASpace(c)) { | |
1022 | // Keep going | |
1023 | } else if (c == '#' && IsASpaceOrTab(sc.GetRelative(s - 1))) { | |
1024 | endType = 3; | |
1025 | } else | |
1026 | Quote.Open(c); | |
1027 | break; | |
1028 | } else if (c == Quote.Down) { | |
1029 | Quote.Count--; | |
1030 | if (Quote.Count == 0) { | |
1031 | Quote.Rep--; | |
1032 | endType = 1; | |
1033 | } | |
1034 | if (Quote.Up == Quote.Down) | |
1035 | Quote.Count++; | |
1036 | if (endType == 1) | |
1037 | break; | |
1038 | } else if (c == Quote.Up) { | |
9e96e16f | 1039 | Quote.Count++; |
1dcf666d RD |
1040 | } else if (IsASpace(c)) |
1041 | break; | |
1042 | s++; | |
9e96e16f | 1043 | } |
1dcf666d RD |
1044 | if (s > 0) { // process non-empty segments |
1045 | if (sc.state == SCE_PL_REGSUBST && Quote.Up != '\'') { | |
1046 | InterpolateSegment(sc, s, isPattern); | |
1047 | } else // non-interpolated path | |
1048 | sc.Forward(s); | |
1049 | } | |
1050 | if (endType == 2) { | |
9e96e16f | 1051 | sc.Forward(); |
1dcf666d RD |
1052 | } else if (endType == 3) |
1053 | sc.SetState(SCE_PL_DEFAULT); | |
1054 | } | |
1055 | break; | |
1056 | case SCE_PL_STRING_Q: | |
1057 | case SCE_PL_STRING_QQ: | |
1058 | case SCE_PL_STRING_QX: | |
1059 | case SCE_PL_STRING_QW: | |
1060 | case SCE_PL_STRING: | |
1061 | case SCE_PL_CHARACTER: | |
1062 | case SCE_PL_BACKTICKS: | |
1063 | if (!Quote.Down && !IsASpace(sc.ch)) { | |
1064 | Quote.Open(sc.ch); | |
1065 | } else { | |
1066 | int s = 0, endType = 0; | |
1067 | int maxSeg = endPos - sc.currentPos; | |
1068 | while (s < maxSeg) { // scan to break string into segments | |
1069 | int c = sc.GetRelative(s); | |
1070 | if (IsASpace(c)) { | |
1071 | break; | |
1072 | } else if (c == '\\' && Quote.Up != '\\') { | |
1073 | endType = 2; break; | |
1074 | } else if (c == Quote.Down) { | |
1075 | Quote.Count--; | |
1076 | if (Quote.Count == 0) { | |
1077 | endType = 3; break; | |
1078 | } | |
1079 | } else if (c == Quote.Up) | |
1080 | Quote.Count++; | |
1081 | s++; | |
9e96e16f | 1082 | } |
1dcf666d RD |
1083 | if (s > 0) { // process non-empty segments |
1084 | switch (sc.state) { | |
1085 | case SCE_PL_STRING: | |
1086 | case SCE_PL_STRING_QQ: | |
1087 | case SCE_PL_BACKTICKS: | |
1088 | InterpolateSegment(sc, s); | |
1089 | break; | |
1090 | case SCE_PL_STRING_QX: | |
1091 | if (Quote.Up != '\'') { | |
1092 | InterpolateSegment(sc, s); | |
1093 | break; | |
1094 | } | |
1095 | // (continued for ' delim) | |
1096 | default: // non-interpolated path | |
1097 | sc.Forward(s); | |
1098 | } | |
1099 | } | |
1100 | if (endType == 2) { | |
1101 | sc.Forward(); | |
1102 | } else if (endType == 3) | |
1103 | sc.ForwardSetState(SCE_PL_DEFAULT); | |
1104 | } | |
1105 | break; | |
1106 | case SCE_PL_SUB_PROTOTYPE: { | |
9e96e16f RD |
1107 | int i = 0; |
1108 | // forward scan; must all be valid proto characters | |
1109 | while (setSubPrototype.Contains(sc.GetRelative(i))) | |
1110 | i++; | |
1111 | if (sc.GetRelative(i) == ')') { // valid sub prototype | |
1112 | sc.Forward(i); | |
1113 | sc.ForwardSetState(SCE_PL_DEFAULT); | |
1114 | } else { | |
1115 | // abandon prototype, restart from '(' | |
1116 | sc.ChangeState(SCE_PL_OPERATOR); | |
1117 | sc.SetState(SCE_PL_DEFAULT); | |
1118 | } | |
1dcf666d RD |
1119 | } |
1120 | break; | |
1121 | case SCE_PL_FORMAT: { | |
9e96e16f | 1122 | sc.Complete(); |
1dcf666d RD |
1123 | if (sc.Match('.')) { |
1124 | sc.Forward(); | |
1125 | if (sc.atLineEnd || ((sc.ch == '\r' && sc.chNext == '\n'))) | |
1126 | sc.SetState(SCE_PL_DEFAULT); | |
1127 | } | |
9e96e16f RD |
1128 | while (!sc.atLineEnd) |
1129 | sc.Forward(); | |
1dcf666d RD |
1130 | } |
1131 | break; | |
1132 | case SCE_PL_ERROR: | |
1133 | break; | |
f6bcfd97 | 1134 | } |
9e96e16f RD |
1135 | // Needed for specific continuation styles (one follows the other) |
1136 | switch (sc.state) { | |
1137 | // continued from SCE_PL_WORD | |
1dcf666d RD |
1138 | case SCE_PL_FORMAT_IDENT: |
1139 | // occupies HereDoc state 3 to avoid clashing with HERE docs | |
1140 | if (IsASpaceOrTab(sc.ch)) { // skip whitespace | |
1141 | sc.ChangeState(SCE_PL_DEFAULT); | |
1142 | while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) | |
1143 | sc.Forward(); | |
1144 | sc.SetState(SCE_PL_FORMAT_IDENT); | |
1145 | } | |
1146 | if (setFormatStart.Contains(sc.ch)) { // identifier or '=' | |
1147 | if (sc.ch != '=') { | |
1148 | do { | |
9e96e16f | 1149 | sc.Forward(); |
1dcf666d | 1150 | } while (setFormat.Contains(sc.ch)); |
9e96e16f | 1151 | } |
1dcf666d RD |
1152 | while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) |
1153 | sc.Forward(); | |
1154 | if (sc.ch == '=') { | |
1155 | sc.ForwardSetState(SCE_PL_DEFAULT); | |
1156 | HereDoc.State = 3; | |
9e96e16f | 1157 | } else { |
1dcf666d RD |
1158 | // invalid indentifier; inexact fallback, but hey |
1159 | sc.ChangeState(SCE_PL_IDENTIFIER); | |
1160 | sc.SetState(SCE_PL_DEFAULT); | |
9e96e16f | 1161 | } |
1dcf666d RD |
1162 | } else { |
1163 | sc.ChangeState(SCE_PL_DEFAULT); // invalid indentifier | |
1164 | } | |
1165 | backFlag = BACK_NONE; | |
1166 | break; | |
65ec6247 RD |
1167 | } |
1168 | ||
9e96e16f RD |
1169 | // Must check end of HereDoc states here before default state is handled |
1170 | if (HereDoc.State == 1 && sc.atLineEnd) { | |
65ec6247 | 1171 | // Begin of here-doc (the line after the here-doc delimiter): |
8e54aaed RD |
1172 | // Lexically, the here-doc starts from the next line after the >>, but the |
1173 | // first line of here-doc seem to follow the style of the last EOL sequence | |
9e96e16f | 1174 | int st_new = SCE_PL_HERE_QQ; |
65ec6247 | 1175 | HereDoc.State = 2; |
65ec6247 | 1176 | if (HereDoc.Quoted) { |
9e96e16f | 1177 | if (sc.state == SCE_PL_HERE_DELIM) { |
65ec6247 | 1178 | // Missing quote at end of string! We are stricter than perl. |
8e54aaed | 1179 | // Colour here-doc anyway while marking this bit as an error. |
9e96e16f | 1180 | sc.ChangeState(SCE_PL_ERROR); |
8e54aaed | 1181 | } |
8e54aaed | 1182 | switch (HereDoc.Quote) { |
1dcf666d RD |
1183 | case '\'': |
1184 | st_new = SCE_PL_HERE_Q ; | |
1185 | break; | |
1186 | case '"' : | |
1187 | st_new = SCE_PL_HERE_QQ; | |
1188 | break; | |
1189 | case '`' : | |
1190 | st_new = SCE_PL_HERE_QX; | |
1191 | break; | |
65ec6247 RD |
1192 | } |
1193 | } else { | |
9e96e16f RD |
1194 | if (HereDoc.Quote == '\\') |
1195 | st_new = SCE_PL_HERE_Q; | |
65ec6247 | 1196 | } |
9e96e16f RD |
1197 | sc.SetState(st_new); |
1198 | } | |
1199 | if (HereDoc.State == 3 && sc.atLineEnd) { | |
1200 | // Start of format body. | |
1201 | HereDoc.State = 0; | |
1202 | sc.SetState(SCE_PL_FORMAT); | |
65ec6247 | 1203 | } |
f6bcfd97 | 1204 | |
9e96e16f RD |
1205 | // Determine if a new state should be entered. |
1206 | if (sc.state == SCE_PL_DEFAULT) { | |
1207 | if (IsADigit(sc.ch) || | |
1dcf666d | 1208 | (IsADigit(sc.chNext) && (sc.ch == '.' || sc.ch == 'v'))) { |
9e96e16f RD |
1209 | sc.SetState(SCE_PL_NUMBER); |
1210 | backFlag = BACK_NONE; | |
8e54aaed RD |
1211 | numState = PERLNUM_DECIMAL; |
1212 | dotCount = 0; | |
9e96e16f | 1213 | if (sc.ch == '0') { // hex,bin,octal |
1dcf666d | 1214 | if (sc.chNext == 'x' || sc.chNext == 'X') { |
1e9bafca | 1215 | numState = PERLNUM_HEX; |
1dcf666d | 1216 | } else if (sc.chNext == 'b' || sc.chNext == 'B') { |
9e96e16f RD |
1217 | numState = PERLNUM_BINARY; |
1218 | } else if (IsADigit(sc.chNext)) { | |
1219 | numState = PERLNUM_OCTAL; | |
1220 | } | |
1221 | if (numState != PERLNUM_DECIMAL) { | |
1222 | sc.Forward(); | |
1223 | } | |
1224 | } else if (sc.ch == 'v') { // vector | |
8e54aaed RD |
1225 | numState = PERLNUM_V_VECTOR; |
1226 | } | |
9e96e16f RD |
1227 | } else if (setWord.Contains(sc.ch)) { |
1228 | // if immediately prefixed by '::', always a bareword | |
1229 | sc.SetState(SCE_PL_WORD); | |
1230 | if (sc.chPrev == ':' && sc.GetRelative(-2) == ':') { | |
1231 | sc.ChangeState(SCE_PL_IDENTIFIER); | |
1232 | } | |
1233 | unsigned int bk = sc.currentPos; | |
1234 | unsigned int fw = sc.currentPos + 1; | |
1235 | // first check for possible quote-like delimiter | |
1236 | if (sc.ch == 's' && !setWord.Contains(sc.chNext)) { | |
1237 | sc.ChangeState(SCE_PL_REGSUBST); | |
65ec6247 | 1238 | Quote.New(2); |
9e96e16f RD |
1239 | } else if (sc.ch == 'm' && !setWord.Contains(sc.chNext)) { |
1240 | sc.ChangeState(SCE_PL_REGEX); | |
1241 | Quote.New(); | |
1242 | } else if (sc.ch == 'q' && !setWord.Contains(sc.chNext)) { | |
1243 | sc.ChangeState(SCE_PL_STRING_Q); | |
1244 | Quote.New(); | |
1245 | } else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) { | |
1dcf666d | 1246 | sc.ChangeState(SCE_PL_XLAT); |
65ec6247 | 1247 | Quote.New(2); |
9e96e16f | 1248 | } else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) { |
1dcf666d | 1249 | sc.ChangeState(SCE_PL_XLAT); |
65ec6247 | 1250 | Quote.New(2); |
9e96e16f RD |
1251 | sc.Forward(); |
1252 | fw++; | |
1253 | } else if (sc.ch == 'q' && setQDelim.Contains(sc.chNext) | |
1dcf666d RD |
1254 | && !setWord.Contains(sc.GetRelative(2))) { |
1255 | if (sc.chNext == 'q') sc.ChangeState(SCE_PL_STRING_QQ); | |
9e96e16f RD |
1256 | else if (sc.chNext == 'x') sc.ChangeState(SCE_PL_STRING_QX); |
1257 | else if (sc.chNext == 'r') sc.ChangeState(SCE_PL_STRING_QR); | |
1258 | else sc.ChangeState(SCE_PL_STRING_QW); // sc.chNext == 'w' | |
1259 | Quote.New(); | |
1260 | sc.Forward(); | |
1261 | fw++; | |
1262 | } else if (sc.ch == 'x' && (sc.chNext == '=' || // repetition | |
1dcf666d RD |
1263 | !setWord.Contains(sc.chNext) || |
1264 | (IsADigit(sc.chPrev) && IsADigit(sc.chNext)))) { | |
9e96e16f | 1265 | sc.ChangeState(SCE_PL_OPERATOR); |
f6bcfd97 | 1266 | } |
9e96e16f RD |
1267 | // if potentially a keyword, scan forward and grab word, then check |
1268 | // if it's really one; if yes, disambiguation test is performed | |
1269 | // otherwise it is always a bareword and we skip a lot of scanning | |
1270 | if (sc.state == SCE_PL_WORD) { | |
1271 | while (setWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(fw)))) | |
1272 | fw++; | |
1273 | if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) { | |
1274 | sc.ChangeState(SCE_PL_IDENTIFIER); | |
8e54aaed | 1275 | } |
f6bcfd97 | 1276 | } |
9e96e16f RD |
1277 | // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this |
1278 | // for quote-like delimiters/keywords, attempt to disambiguate | |
1279 | // to select for bareword, change state -> SCE_PL_IDENTIFIER | |
1280 | if (sc.state != SCE_PL_IDENTIFIER && bk > 0) { | |
1281 | if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos)) | |
1282 | sc.ChangeState(SCE_PL_IDENTIFIER); | |
f6bcfd97 | 1283 | } |
9e96e16f RD |
1284 | backFlag = BACK_NONE; |
1285 | } else if (sc.ch == '#') { | |
1286 | sc.SetState(SCE_PL_COMMENTLINE); | |
1287 | } else if (sc.ch == '\"') { | |
1288 | sc.SetState(SCE_PL_STRING); | |
1289 | Quote.New(); | |
1290 | Quote.Open(sc.ch); | |
1291 | backFlag = BACK_NONE; | |
1292 | } else if (sc.ch == '\'') { | |
1293 | if (sc.chPrev == '&' && setWordStart.Contains(sc.chNext)) { | |
1294 | // Archaic call | |
1295 | sc.SetState(SCE_PL_IDENTIFIER); | |
f6bcfd97 | 1296 | } else { |
9e96e16f RD |
1297 | sc.SetState(SCE_PL_CHARACTER); |
1298 | Quote.New(); | |
1299 | Quote.Open(sc.ch); | |
f6bcfd97 | 1300 | } |
9e96e16f RD |
1301 | backFlag = BACK_NONE; |
1302 | } else if (sc.ch == '`') { | |
1303 | sc.SetState(SCE_PL_BACKTICKS); | |
1304 | Quote.New(); | |
1305 | Quote.Open(sc.ch); | |
1306 | backFlag = BACK_NONE; | |
1307 | } else if (sc.ch == '$') { | |
1308 | sc.SetState(SCE_PL_SCALAR); | |
1309 | if (sc.chNext == '{') { | |
1310 | sc.ForwardSetState(SCE_PL_OPERATOR); | |
1311 | } else if (IsASpace(sc.chNext)) { | |
1312 | sc.ForwardSetState(SCE_PL_DEFAULT); | |
8e54aaed | 1313 | } else { |
9e96e16f RD |
1314 | sc.Forward(); |
1315 | if (sc.Match('`', '`') || sc.Match(':', ':')) { | |
1316 | sc.Forward(); | |
8e54aaed | 1317 | } |
8e54aaed | 1318 | } |
9e96e16f RD |
1319 | backFlag = BACK_NONE; |
1320 | } else if (sc.ch == '@') { | |
1321 | sc.SetState(SCE_PL_ARRAY); | |
1322 | if (setArray.Contains(sc.chNext)) { | |
1323 | // no special treatment | |
1324 | } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') { | |
1325 | sc.Forward(2); | |
1326 | } else if (sc.chNext == '{' || sc.chNext == '[') { | |
1327 | sc.ForwardSetState(SCE_PL_OPERATOR); | |
1328 | } else { | |
1329 | sc.ChangeState(SCE_PL_OPERATOR); | |
1330 | } | |
1331 | backFlag = BACK_NONE; | |
1332 | } else if (setPreferRE.Contains(sc.ch)) { | |
8e54aaed RD |
1333 | // Explicit backward peeking to set a consistent preferRE for |
1334 | // any slash found, so no longer need to track preferRE state. | |
1335 | // Find first previous significant lexed element and interpret. | |
9e96e16f | 1336 | // A few symbols shares this code for disambiguation. |
8e54aaed | 1337 | bool preferRE = false; |
9e96e16f RD |
1338 | bool isHereDoc = sc.Match('<', '<'); |
1339 | bool hereDocSpace = false; // for: SCALAR [whitespace] '<<' | |
1340 | unsigned int bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0; | |
9e96e16f | 1341 | sc.Complete(); |
8e54aaed | 1342 | styler.Flush(); |
9e96e16f RD |
1343 | if (styler.StyleAt(bk) == SCE_PL_DEFAULT) |
1344 | hereDocSpace = true; | |
1345 | skipWhitespaceComment(styler, bk); | |
8e54aaed | 1346 | if (bk == 0) { |
9e96e16f | 1347 | // avoid backward scanning breakage |
8e54aaed RD |
1348 | preferRE = true; |
1349 | } else { | |
1350 | int bkstyle = styler.StyleAt(bk); | |
9e96e16f | 1351 | int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk)); |
1dcf666d | 1352 | switch (bkstyle) { |
8e54aaed RD |
1353 | case SCE_PL_OPERATOR: |
1354 | preferRE = true; | |
8e54aaed RD |
1355 | if (bkch == ')' || bkch == ']') { |
1356 | preferRE = false; | |
1357 | } else if (bkch == '}') { | |
9e96e16f RD |
1358 | // backtrack by counting balanced brace pairs |
1359 | // needed to test for variables like ${}, @{} etc. | |
1360 | bkstyle = styleBeforeBracePair(styler, bk); | |
1361 | if (bkstyle == SCE_PL_SCALAR | |
1dcf666d RD |
1362 | || bkstyle == SCE_PL_ARRAY |
1363 | || bkstyle == SCE_PL_HASH | |
1364 | || bkstyle == SCE_PL_SYMBOLTABLE | |
1365 | || bkstyle == SCE_PL_OPERATOR) { | |
9e96e16f | 1366 | preferRE = false; |
8e54aaed | 1367 | } |
9e96e16f RD |
1368 | } else if (bkch == '+' || bkch == '-') { |
1369 | if (bkch == static_cast<unsigned char>(styler.SafeGetCharAt(bk - 1)) | |
1dcf666d RD |
1370 | && bkch != static_cast<unsigned char>(styler.SafeGetCharAt(bk - 2))) |
1371 | // exceptions for operators: unary suffixes ++, -- | |
1372 | preferRE = false; | |
8e54aaed RD |
1373 | } |
1374 | break; | |
8e54aaed | 1375 | case SCE_PL_IDENTIFIER: |
591d01be | 1376 | preferRE = true; |
9e96e16f RD |
1377 | bkstyle = styleCheckIdentifier(styler, bk); |
1378 | if ((bkstyle == 1) || (bkstyle == 2)) { | |
1379 | // inputsymbol or var with "->" or "::" before identifier | |
591d01be | 1380 | preferRE = false; |
9e96e16f RD |
1381 | } else if (bkstyle == 3) { |
1382 | // bare identifier, test cases follows: | |
1383 | if (sc.ch == '/') { | |
1384 | // if '/', /PATTERN/ unless digit/space immediately after '/' | |
1385 | // if '//', always expect defined-or operator to follow identifier | |
1386 | if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/') | |
1387 | preferRE = false; | |
1388 | } else if (sc.ch == '*' || sc.ch == '%') { | |
1389 | if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*')) | |
1390 | preferRE = false; | |
1391 | } else if (sc.ch == '<') { | |
1392 | if (IsASpace(sc.chNext) || sc.chNext == '=') | |
1393 | preferRE = false; | |
591d01be | 1394 | } |
591d01be RD |
1395 | } |
1396 | break; | |
9e96e16f RD |
1397 | case SCE_PL_SCALAR: // for $var<< case: |
1398 | if (isHereDoc && hereDocSpace) // if SCALAR whitespace '<<', *always* a HERE doc | |
1399 | preferRE = true; | |
1400 | break; | |
8e54aaed | 1401 | case SCE_PL_WORD: |
9e96e16f RD |
1402 | preferRE = true; |
1403 | // for HERE docs, always true | |
1404 | if (sc.ch == '/') { | |
1405 | // adopt heuristics similar to vim-style rules: | |
1406 | // keywords always forced as /PATTERN/: split, if, elsif, while | |
1407 | // everything else /PATTERN/ unless digit/space immediately after '/' | |
1408 | // for '//', defined-or favoured unless special keywords | |
1dcf666d | 1409 | unsigned int bkend = bk + 1; |
9e96e16f RD |
1410 | while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) { |
1411 | bk--; | |
1412 | } | |
1413 | if (isPerlKeyword(bk, bkend, reWords, styler)) | |
1414 | break; | |
1415 | if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/') | |
1416 | preferRE = false; | |
1417 | } else if (sc.ch == '*' || sc.ch == '%') { | |
1418 | if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*')) | |
1419 | preferRE = false; | |
1420 | } else if (sc.ch == '<') { | |
1421 | if (IsASpace(sc.chNext) || sc.chNext == '=') | |
1422 | preferRE = false; | |
7e0c58e9 | 1423 | } |
9e96e16f | 1424 | break; |
1dcf666d RD |
1425 | |
1426 | // other styles uses the default, preferRE=false | |
591d01be | 1427 | case SCE_PL_POD: |
8e54aaed RD |
1428 | case SCE_PL_HERE_Q: |
1429 | case SCE_PL_HERE_QQ: | |
1430 | case SCE_PL_HERE_QX: | |
1431 | preferRE = true; | |
1432 | break; | |
1433 | } | |
1434 | } | |
9e96e16f RD |
1435 | backFlag = BACK_NONE; |
1436 | if (isHereDoc) { // handle '<<', HERE doc | |
1437 | if (preferRE) { | |
1438 | sc.SetState(SCE_PL_HERE_DELIM); | |
1439 | HereDoc.State = 0; | |
1440 | } else { // << operator | |
1441 | sc.SetState(SCE_PL_OPERATOR); | |
1442 | sc.Forward(); | |
8e54aaed | 1443 | } |
9e96e16f RD |
1444 | } else if (sc.ch == '*') { // handle '*', typeglob |
1445 | if (preferRE) { | |
1446 | sc.SetState(SCE_PL_SYMBOLTABLE); | |
1447 | if (sc.chNext == ':' && sc.GetRelative(2) == ':') { | |
1448 | sc.Forward(2); | |
1449 | } else if (sc.chNext == '{') { | |
1450 | sc.ForwardSetState(SCE_PL_OPERATOR); | |
1451 | } else { | |
1452 | sc.Forward(); | |
8e54aaed | 1453 | } |
8e54aaed | 1454 | } else { |
9e96e16f RD |
1455 | sc.SetState(SCE_PL_OPERATOR); |
1456 | if (sc.chNext == '*') // exponentiation | |
1457 | sc.Forward(); | |
65ec6247 | 1458 | } |
9e96e16f RD |
1459 | } else if (sc.ch == '%') { // handle '%', hash |
1460 | if (preferRE) { | |
1461 | sc.SetState(SCE_PL_HASH); | |
1462 | if (setHash.Contains(sc.chNext)) { | |
1463 | sc.Forward(); | |
1464 | } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') { | |
1465 | sc.Forward(2); | |
1466 | } else if (sc.chNext == '{') { | |
1467 | sc.ForwardSetState(SCE_PL_OPERATOR); | |
65ec6247 | 1468 | } else { |
9e96e16f | 1469 | sc.ChangeState(SCE_PL_OPERATOR); |
65ec6247 | 1470 | } |
9e96e16f RD |
1471 | } else { |
1472 | sc.SetState(SCE_PL_OPERATOR); | |
1473 | } | |
1474 | } else if (sc.ch == '<') { // handle '<', inputsymbol | |
1475 | if (preferRE) { | |
1476 | // forward scan | |
1477 | int i = inputsymbolScan(styler, sc.currentPos, endPos); | |
1478 | if (i > 0) { | |
1479 | sc.SetState(SCE_PL_IDENTIFIER); | |
1480 | sc.Forward(i); | |
65ec6247 | 1481 | } else { |
9e96e16f | 1482 | sc.SetState(SCE_PL_OPERATOR); |
65ec6247 | 1483 | } |
1e9bafca | 1484 | } else { |
9e96e16f | 1485 | sc.SetState(SCE_PL_OPERATOR); |
f6bcfd97 | 1486 | } |
9e96e16f RD |
1487 | } else { // handle '/', regexp |
1488 | if (preferRE) { | |
1489 | sc.SetState(SCE_PL_REGEX); | |
1490 | Quote.New(); | |
1491 | Quote.Open(sc.ch); | |
1492 | } else { // / and // operators | |
1493 | sc.SetState(SCE_PL_OPERATOR); | |
1494 | if (sc.chNext == '/') { | |
1495 | sc.Forward(); | |
1496 | } | |
65ec6247 | 1497 | } |
f6bcfd97 | 1498 | } |
9e96e16f | 1499 | } else if (sc.ch == '=' // POD |
1dcf666d RD |
1500 | && setPOD.Contains(sc.chNext) |
1501 | && sc.atLineStart) { | |
9e96e16f RD |
1502 | sc.SetState(SCE_PL_POD); |
1503 | backFlag = BACK_NONE; | |
1504 | } else if (sc.ch == '-' && setWordStart.Contains(sc.chNext)) { // extended '-' cases | |
1505 | unsigned int bk = sc.currentPos; | |
1506 | unsigned int fw = 2; | |
1507 | if (setSingleCharOp.Contains(sc.chNext) && // file test operators | |
1dcf666d | 1508 | !setWord.Contains(sc.GetRelative(2))) { |
9e96e16f | 1509 | sc.SetState(SCE_PL_WORD); |
f6bcfd97 | 1510 | } else { |
9e96e16f RD |
1511 | // nominally a minus and bareword; find extent of bareword |
1512 | while (setWord.Contains(sc.GetRelative(fw))) | |
1513 | fw++; | |
1514 | sc.SetState(SCE_PL_OPERATOR); | |
f6bcfd97 | 1515 | } |
9e96e16f RD |
1516 | // force to bareword for hash key => or {variable literal} cases |
1517 | if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & 2) { | |
1518 | sc.ChangeState(SCE_PL_IDENTIFIER); | |
1519 | } | |
1520 | backFlag = BACK_NONE; | |
1521 | } else if (sc.ch == '(' && sc.currentPos > 0) { // '(' or subroutine prototype | |
1522 | sc.Complete(); | |
1523 | if (styleCheckSubPrototype(styler, sc.currentPos - 1)) { | |
1524 | sc.SetState(SCE_PL_SUB_PROTOTYPE); | |
1525 | backFlag = BACK_NONE; | |
f6bcfd97 | 1526 | } else { |
9e96e16f | 1527 | sc.SetState(SCE_PL_OPERATOR); |
f6bcfd97 | 1528 | } |
9e96e16f RD |
1529 | } else if (setPerlOperator.Contains(sc.ch)) { // operators |
1530 | sc.SetState(SCE_PL_OPERATOR); | |
1531 | if (sc.Match('.', '.')) { // .. and ... | |
1532 | sc.Forward(); | |
1533 | if (sc.chNext == '.') sc.Forward(); | |
f6bcfd97 | 1534 | } |
9e96e16f RD |
1535 | } else if (sc.ch == 4 || sc.ch == 26) { // ^D and ^Z ends valid perl source |
1536 | sc.SetState(SCE_PL_DATASECTION); | |
1537 | } else { | |
1538 | // keep colouring defaults | |
1539 | sc.Complete(); | |
1540 | } | |
65ec6247 | 1541 | } |
f6bcfd97 | 1542 | } |
9e96e16f | 1543 | sc.Complete(); |
1dcf666d RD |
1544 | if (sc.state == SCE_PL_HERE_Q |
1545 | || sc.state == SCE_PL_HERE_QQ | |
1546 | || sc.state == SCE_PL_HERE_QX | |
1547 | || sc.state == SCE_PL_FORMAT) { | |
1548 | styler.ChangeLexerState(sc.currentPos, styler.Length()); | |
1e9bafca | 1549 | } |
1dcf666d | 1550 | sc.Complete(); |
1e9bafca RD |
1551 | } |
1552 | ||
1dcf666d RD |
1553 | #define PERL_HEADFOLD_SHIFT 4 |
1554 | #define PERL_HEADFOLD_MASK 0xF0 | |
1555 | ||
1556 | void SCI_METHOD LexerPerl::Fold(unsigned int startPos, int length, int /* initStyle */, IDocument *pAccess) { | |
9e96e16f | 1557 | |
1dcf666d RD |
1558 | if (!options.fold) |
1559 | return; | |
9e96e16f | 1560 | |
1dcf666d | 1561 | LexAccessor styler(pAccess); |
9e96e16f | 1562 | |
9e730a78 RD |
1563 | unsigned int endPos = startPos + length; |
1564 | int visibleChars = 0; | |
1565 | int lineCurrent = styler.GetLine(startPos); | |
1dcf666d RD |
1566 | |
1567 | // Backtrack to previous line in case need to fix its fold status | |
1568 | if (startPos > 0) { | |
1569 | if (lineCurrent > 0) { | |
1570 | lineCurrent--; | |
1571 | startPos = styler.LineStart(lineCurrent); | |
1572 | } | |
1573 | } | |
1574 | ||
1e9bafca RD |
1575 | int levelPrev = SC_FOLDLEVELBASE; |
1576 | if (lineCurrent > 0) | |
1577 | levelPrev = styler.LevelAt(lineCurrent - 1) >> 16; | |
9e730a78 RD |
1578 | int levelCurrent = levelPrev; |
1579 | char chNext = styler[startPos]; | |
1e9bafca | 1580 | char chPrev = styler.SafeGetCharAt(startPos - 1); |
9e730a78 | 1581 | int styleNext = styler.StyleAt(startPos); |
1e9bafca RD |
1582 | // Used at end of line to determine if the line was a package definition |
1583 | bool isPackageLine = false; | |
1dcf666d | 1584 | int podHeading = 0; |
9e730a78 RD |
1585 | for (unsigned int i = startPos; i < endPos; i++) { |
1586 | char ch = chNext; | |
1587 | chNext = styler.SafeGetCharAt(i + 1); | |
1588 | int style = styleNext; | |
1589 | styleNext = styler.StyleAt(i + 1); | |
1dcf666d | 1590 | int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT; |
9e730a78 | 1591 | bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); |
9e96e16f RD |
1592 | bool atLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0; |
1593 | // Comment folding | |
1dcf666d | 1594 | if (options.foldComment && atEOL && IsCommentLine(lineCurrent, styler)) { |
9e96e16f | 1595 | if (!IsCommentLine(lineCurrent - 1, styler) |
1dcf666d | 1596 | && IsCommentLine(lineCurrent + 1, styler)) |
9e96e16f RD |
1597 | levelCurrent++; |
1598 | else if (IsCommentLine(lineCurrent - 1, styler) | |
1dcf666d | 1599 | && !IsCommentLine(lineCurrent + 1, styler)) |
9e96e16f RD |
1600 | levelCurrent--; |
1601 | } | |
1dcf666d | 1602 | // {} [] block folding |
9e96e16f | 1603 | if (style == SCE_PL_OPERATOR) { |
9e730a78 | 1604 | if (ch == '{') { |
1dcf666d RD |
1605 | if (options.foldAtElse && levelCurrent < levelPrev) |
1606 | --levelPrev; | |
9e730a78 RD |
1607 | levelCurrent++; |
1608 | } else if (ch == '}') { | |
1609 | levelCurrent--; | |
1610 | } | |
1dcf666d RD |
1611 | if (ch == '[') { |
1612 | if (options.foldAtElse && levelCurrent < levelPrev) | |
1613 | --levelPrev; | |
1614 | levelCurrent++; | |
1615 | } else if (ch == ']') { | |
1616 | levelCurrent--; | |
1617 | } | |
9e730a78 | 1618 | } |
1dcf666d RD |
1619 | // POD folding |
1620 | if (options.foldPOD && atLineStart) { | |
1e9bafca RD |
1621 | if (style == SCE_PL_POD) { |
1622 | if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB) | |
1623 | levelCurrent++; | |
1624 | else if (styler.Match(i, "=cut")) | |
1dcf666d | 1625 | levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1; |
1e9bafca | 1626 | else if (styler.Match(i, "=head")) |
1dcf666d | 1627 | podHeading = PodHeadingLevel(i, styler); |
1e9bafca | 1628 | } else if (style == SCE_PL_DATASECTION) { |
1dcf666d | 1629 | if (ch == '=' && isascii(chNext) && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE) |
9e96e16f RD |
1630 | levelCurrent++; |
1631 | else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE) | |
1dcf666d | 1632 | levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1; |
9e96e16f | 1633 | else if (styler.Match(i, "=head")) |
1dcf666d | 1634 | podHeading = PodHeadingLevel(i, styler); |
9e96e16f RD |
1635 | // if package used or unclosed brace, level > SC_FOLDLEVELBASE! |
1636 | // reset needed as level test is vs. SC_FOLDLEVELBASE | |
1dcf666d | 1637 | else if (stylePrevCh != SCE_PL_DATASECTION) |
9e96e16f RD |
1638 | levelCurrent = SC_FOLDLEVELBASE; |
1639 | } | |
1e9bafca | 1640 | } |
1dcf666d RD |
1641 | // package folding |
1642 | if (options.foldPackage && atLineStart) { | |
1643 | if (IsPackageLine(lineCurrent, styler) | |
1644 | && !IsPackageLine(lineCurrent + 1, styler)) | |
1e9bafca | 1645 | isPackageLine = true; |
1dcf666d RD |
1646 | } |
1647 | ||
1648 | //heredoc folding | |
1649 | switch (style) { | |
1650 | case SCE_PL_HERE_QQ : | |
1651 | case SCE_PL_HERE_Q : | |
1652 | case SCE_PL_HERE_QX : | |
1653 | switch (stylePrevCh) { | |
1654 | case SCE_PL_HERE_QQ : | |
1655 | case SCE_PL_HERE_Q : | |
1656 | case SCE_PL_HERE_QX : | |
1657 | //do nothing; | |
1658 | break; | |
1659 | default : | |
1660 | levelCurrent++; | |
1661 | break; | |
1662 | } | |
1663 | break; | |
1664 | default: | |
1665 | switch (stylePrevCh) { | |
1666 | case SCE_PL_HERE_QQ : | |
1667 | case SCE_PL_HERE_Q : | |
1668 | case SCE_PL_HERE_QX : | |
1669 | levelCurrent--; | |
1670 | break; | |
1671 | default : | |
1672 | //do nothing; | |
1673 | break; | |
1674 | } | |
1675 | break; | |
1676 | } | |
1677 | ||
1678 | //explicit folding | |
1679 | if (options.foldCommentExplicit && style == SCE_PL_COMMENTLINE && ch == '#') { | |
1680 | if (chNext == '{') { | |
1681 | levelCurrent++; | |
1682 | } else if (levelCurrent > SC_FOLDLEVELBASE && chNext == '}') { | |
1683 | levelCurrent--; | |
1e9bafca RD |
1684 | } |
1685 | } | |
1686 | ||
9e730a78 RD |
1687 | if (atEOL) { |
1688 | int lev = levelPrev; | |
1dcf666d RD |
1689 | // POD headings occupy bits 7-4, leaving some breathing room for |
1690 | // non-standard practice -- POD sections stuck in blocks, etc. | |
1691 | if (podHeading > 0) { | |
1692 | levelCurrent = (lev & ~PERL_HEADFOLD_MASK) | (podHeading << PERL_HEADFOLD_SHIFT); | |
1693 | lev = levelCurrent - 1; | |
9e96e16f | 1694 | lev |= SC_FOLDLEVELHEADERFLAG; |
1dcf666d | 1695 | podHeading = 0; |
1e9bafca RD |
1696 | } |
1697 | // Check if line was a package declaration | |
1698 | // because packages need "special" treatment | |
1699 | if (isPackageLine) { | |
1700 | lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG; | |
1701 | levelCurrent = SC_FOLDLEVELBASE + 1; | |
1702 | isPackageLine = false; | |
1703 | } | |
9e96e16f | 1704 | lev |= levelCurrent << 16; |
1dcf666d | 1705 | if (visibleChars == 0 && options.foldCompact) |
9e730a78 RD |
1706 | lev |= SC_FOLDLEVELWHITEFLAG; |
1707 | if ((levelCurrent > levelPrev) && (visibleChars > 0)) | |
1708 | lev |= SC_FOLDLEVELHEADERFLAG; | |
1709 | if (lev != styler.LevelAt(lineCurrent)) { | |
1710 | styler.SetLevel(lineCurrent, lev); | |
1711 | } | |
1712 | lineCurrent++; | |
1713 | levelPrev = levelCurrent; | |
1714 | visibleChars = 0; | |
1715 | } | |
1716 | if (!isspacechar(ch)) | |
1717 | visibleChars++; | |
1e9bafca | 1718 | chPrev = ch; |
9e730a78 RD |
1719 | } |
1720 | // Fill in the real level of the next line, keeping the current flags as they will be filled in later | |
1721 | int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK; | |
1722 | styler.SetLevel(lineCurrent, levelPrev | flagsNext); | |
1723 | } | |
1724 | ||
1dcf666d | 1725 | LexerModule lmPerl(SCLEX_PERL, LexerPerl::LexerFactoryPerl, "perl", perlWordListDesc, 8); |