]> git.saurik.com Git - wxWidgets.git/blob - contrib/src/stc/scintilla/src/LexPerl.cxx
a7186fe5525641672f16b746da4cbdc68830aec1
[wxWidgets.git] / contrib / src / stc / scintilla / src / LexPerl.cxx
1 // SciTE - Scintilla based Text Editor
2 // LexPerl.cxx - lexer for subset of Perl
3 // Copyright 1998-2000 by Neil Hodgson <neilh@scintilla.org>
4 // The License.txt file describes the conditions under which this software may be distributed.
5
6 #include <stdlib.h>
7 #include <string.h>
8 #include <ctype.h>
9 #include <stdio.h>
10 #include <stdarg.h>
11
12 #include "Platform.h"
13
14 #include "PropSet.h"
15 #include "Accessor.h"
16 #include "KeyWords.h"
17 #include "Scintilla.h"
18 #include "SciLexer.h"
19
20 inline bool isPerlOperator(char ch) {
21 if (isalnum(ch))
22 return false;
23 // '.' left out as it is used to make up numbers
24 if (ch == '%' || ch == '^' || ch == '&' || ch == '*' || ch == '\\' ||
25 ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
26 ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
27 ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
28 ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
29 ch == '?' || ch == '!' || ch == '.' || ch == '~')
30 return true;
31 return false;
32 }
33
34 static int classifyWordPerl(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
35 char s[100];
36 bool wordIsNumber = isdigit(styler[start]) || (styler[start] == '.');
37 for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) {
38 s[i] = styler[start + i];
39 s[i + 1] = '\0';
40 }
41 char chAttr = SCE_PL_IDENTIFIER;
42 if (wordIsNumber)
43 chAttr = SCE_PL_NUMBER;
44 else {
45 if (keywords.InList(s))
46 chAttr = SCE_PL_WORD;
47 }
48 styler.ColourTo(end, chAttr);
49 return chAttr;
50 }
51
52 static bool isEndVar(char ch) {
53 return !isalnum(ch) && ch != '#' && ch != '$' &&
54 ch != '_' && ch != '\'';
55 }
56
57 static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
58 if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
59 return false;
60 }
61 while (*val) {
62 if (*val != styler[pos++]) {
63 return false;
64 }
65 val++;
66 }
67 return true;
68 }
69
70 static char opposite(char ch) {
71 if (ch == '(')
72 return ')';
73 if (ch == '[')
74 return ']';
75 if (ch == '{')
76 return '}';
77 if (ch == '<')
78 return '>';
79 return ch;
80 }
81
82 static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
83 WordList *keywordlists[], Accessor &styler) {
84
85 // Lexer for perl often has to backtrack to start of current style to determine
86 // which characters are being used as quotes, how deeply nested is the
87 // start position and what the termination string is for here documents
88
89 WordList &keywords = *keywordlists[0];
90
91 char sooked[100];
92 int quotes = 0;
93 char quoteDown = 'd';
94 char quoteUp = 'd';
95 int quoteRep = 1;
96 int sookedpos = 0;
97 bool preferRE = true;
98 sooked[sookedpos] = '\0';
99 int state = initStyle;
100 int lengthDoc = startPos + length;
101 // If in a long distance lexical state, seek to the beginning to find quote characters
102 if (state == SCE_PL_HERE || state == SCE_PL_REGEX ||
103 state == SCE_PL_REGSUBST || state == SCE_PL_LONGQUOTE) {
104 while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) {
105 startPos--;
106 }
107 state = SCE_PL_DEFAULT;
108 }
109 styler.StartAt(startPos);
110 char chPrev = styler.SafeGetCharAt(startPos - 1);
111 char chNext = styler[startPos];
112 styler.StartSegment(startPos);
113 for (int i = startPos; i < lengthDoc; i++) {
114 char ch = chNext;
115 chNext = styler.SafeGetCharAt(i + 1);
116 char chNext2 = styler.SafeGetCharAt(i + 2);
117
118 if (styler.IsLeadByte(ch)) {
119 chNext = styler.SafeGetCharAt(i + 2);
120 chPrev = ' ';
121 i += 1;
122 continue;
123 }
124
125 if (state == SCE_PL_DEFAULT) {
126 if (iswordstart(ch)) {
127 styler.ColourTo(i - 1, state);
128 if (ch == 's' && !isalnum(chNext)) {
129 state = SCE_PL_REGSUBST;
130 quotes = 0;
131 quoteUp = '\0';
132 quoteDown = '\0';
133 quoteRep = 2;
134 } else if (ch == 'm' && !isalnum(chNext)) {
135 state = SCE_PL_REGEX;
136 quotes = 0;
137 quoteUp = '\0';
138 quoteDown = '\0';
139 quoteRep = 1;
140 } else if (ch == 't' && chNext == 'r' && !isalnum(chNext2)) {
141 state = SCE_PL_REGSUBST;
142 quotes = 0;
143 quoteUp = '\0';
144 quoteDown = '\0';
145 quoteRep = 2;
146 i++;
147 chNext = chNext2;
148 } else if (ch == 'q' && (chNext == 'q' || chNext == 'r' || chNext == 'w' || chNext == 'x') && !isalnum(chNext2)) {
149 state = SCE_PL_LONGQUOTE;
150 i++;
151 chNext = chNext2;
152 quotes = 0;
153 quoteUp = '\0';
154 quoteDown = '\0';
155 quoteRep = 1;
156 } else {
157 state = SCE_PL_WORD;
158 preferRE = false;
159 }
160 } else if (ch == '#') {
161 styler.ColourTo(i - 1, state);
162 state = SCE_PL_COMMENTLINE;
163 } else if (ch == '\"') {
164 styler.ColourTo(i - 1, state);
165 state = SCE_PL_STRING;
166 } else if (ch == '\'') {
167 if (chPrev == '&') {
168 // Archaic call
169 styler.ColourTo(i, state);
170 } else {
171 styler.ColourTo(i - 1, state);
172 state = SCE_PL_CHARACTER;
173 }
174 } else if (ch == '`') {
175 styler.ColourTo(i - 1, state);
176 state = SCE_PL_BACKTICKS;
177 } else if (ch == '$') {
178 preferRE = false;
179 styler.ColourTo(i - 1, state);
180 if (isalnum(chNext) || chNext == '#' || chNext == '$' || chNext == '_') {
181 state = SCE_PL_SCALAR;
182 } else if (chNext != '{' && chNext != '[') {
183 styler.ColourTo(i, SCE_PL_SCALAR);
184 i++;
185 ch = ' ';
186 chNext = ' ';
187 } else {
188 styler.ColourTo(i, SCE_PL_SCALAR);
189 }
190 } else if (ch == '@') {
191 preferRE = false;
192 styler.ColourTo(i - 1, state);
193 if (isalpha(chNext) || chNext == '#' || chNext == '$' || chNext == '_') {
194 state = SCE_PL_ARRAY;
195 } else if (chNext != '{' && chNext != '[') {
196 styler.ColourTo(i, SCE_PL_ARRAY);
197 i++;
198 ch = ' ';
199 } else {
200 styler.ColourTo(i, SCE_PL_ARRAY);
201 }
202 } else if (ch == '%') {
203 preferRE = false;
204 styler.ColourTo(i - 1, state);
205 if (isalpha(chNext) || chNext == '#' || chNext == '$' || chNext == '_') {
206 state = SCE_PL_HASH;
207 } else if (chNext != '{' && chNext != '[') {
208 styler.ColourTo(i, SCE_PL_HASH);
209 i++;
210 ch = ' ';
211 } else {
212 styler.ColourTo(i, SCE_PL_HASH);
213 }
214 } else if (ch == '*') {
215 styler.ColourTo(i - 1, state);
216 state = SCE_PL_SYMBOLTABLE;
217 } else if (ch == '/' && preferRE) {
218 styler.ColourTo(i - 1, state);
219 state = SCE_PL_REGEX;
220 quoteUp = '/';
221 quoteDown = '/';
222 quotes = 1;
223 quoteRep = 1;
224 } else if (ch == '<' && chNext == '<') {
225 styler.ColourTo(i - 1, state);
226 state = SCE_PL_HERE;
227 i++;
228 ch = chNext;
229 chNext = chNext2;
230 quotes = 0;
231 sookedpos = 0;
232 sooked[sookedpos] = '\0';
233 } else if (ch == '=' && (chPrev == '\r' || chPrev == '\n') && isalpha(chNext)) {
234 styler.ColourTo(i - 1, state);
235 state = SCE_PL_POD;
236 quotes = 0;
237 sookedpos = 0;
238 sooked[sookedpos] = '\0';
239 } else if (isPerlOperator(ch)) {
240 if (ch == ')' || ch == ']')
241 preferRE = false;
242 else
243 preferRE = true;
244 styler.ColourTo(i - 1, state);
245 styler.ColourTo(i, SCE_PL_OPERATOR);
246 }
247 } else if (state == SCE_PL_WORD) {
248 if (!iswordchar(ch) && ch != '\'') { // Archaic Perl has quotes inside names
249 if (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__DATA__")) {
250 styler.ColourTo(i, SCE_PL_DATASECTION);
251 state = SCE_PL_DATASECTION;
252 } else if (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__END__")) {
253 styler.ColourTo(i, SCE_PL_DATASECTION);
254 state = SCE_PL_DATASECTION;
255 } else {
256 if (classifyWordPerl(styler.GetStartSegment(), i - 1, keywords, styler) == SCE_PL_WORD)
257 preferRE = true;
258 state = SCE_PL_DEFAULT;
259 if (ch == '#') {
260 state = SCE_PL_COMMENTLINE;
261 } else if (ch == '\"') {
262 state = SCE_PL_STRING;
263 } else if (ch == '\'') {
264 state = SCE_PL_CHARACTER;
265 } else if (ch == '<' && chNext == '<') {
266 state = SCE_PL_HERE;
267 quotes = 0;
268 sookedpos = 0;
269 sooked[sookedpos] = '\0';
270 } else if (isPerlOperator(ch)) {
271 if (ch == ')' || ch == ']')
272 preferRE = false;
273 else
274 preferRE = true;
275 styler.ColourTo(i, SCE_PL_OPERATOR);
276 state = SCE_PL_DEFAULT;
277 }
278 }
279 }
280 } else {
281 if (state == SCE_PL_COMMENTLINE) {
282 if (ch == '\r' || ch == '\n') {
283 styler.ColourTo(i - 1, state);
284 state = SCE_PL_DEFAULT;
285 }
286 } else if (state == SCE_PL_HERE) {
287 if ((isalnum(ch) || ch == '_') && quotes < 2) {
288 sooked[sookedpos++] = ch;
289 sooked[sookedpos] = '\0';
290 if (quotes == 0)
291 quotes = 1;
292 } else {
293 quotes++;
294 }
295 if ((quotes > 1) &&
296 (chPrev == '\n' || chPrev == '\r') &&
297 isMatch(styler, lengthDoc, i, sooked)) {
298 i += sookedpos;
299 chNext = styler.SafeGetCharAt(i);
300 if (chNext == '\n' || chNext == '\r') {
301 styler.ColourTo(i - 1, SCE_PL_HERE);
302 state = SCE_PL_DEFAULT;
303 }
304 ch = chNext;
305 chNext = styler.SafeGetCharAt(i + 1);
306 }
307 } else if (state == SCE_PL_STRING) {
308 if (ch == '\\') {
309 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
310 i++;
311 ch = chNext;
312 chNext = styler.SafeGetCharAt(i + 1);
313 }
314 } else if (ch == '\"') {
315 styler.ColourTo(i, state);
316 state = SCE_PL_DEFAULT;
317 i++;
318 ch = chNext;
319 chNext = styler.SafeGetCharAt(i + 1);
320 }
321 } else if (state == SCE_PL_CHARACTER) {
322 if (ch == '\\') {
323 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
324 i++;
325 ch = chNext;
326 chNext = styler.SafeGetCharAt(i + 1);
327 }
328 } else if (ch == '\'') {
329 styler.ColourTo(i, state);
330 state = SCE_PL_DEFAULT;
331 i++;
332 ch = chNext;
333 chNext = styler.SafeGetCharAt(i + 1);
334 }
335 } else if (state == SCE_PL_BACKTICKS) {
336 if (ch == '`') {
337 styler.ColourTo(i, state);
338 state = SCE_PL_DEFAULT;
339 i++;
340 ch = chNext;
341 chNext = styler.SafeGetCharAt(i + 1);
342 }
343 } else if (state == SCE_PL_POD) {
344 if (ch == '=' && (chPrev == '\r' || chPrev == '\n')) {
345 if (isMatch(styler, lengthDoc, i, "=cut")) {
346 styler.ColourTo(i - 1 + 4, state);
347 i += 4;
348 state = SCE_PL_DEFAULT;
349 ch = styler.SafeGetCharAt(i);
350 chNext = styler.SafeGetCharAt(i + 1);
351 }
352 }
353 } else if (state == SCE_PL_SCALAR) {
354 if (isEndVar(ch)) {
355 styler.ColourTo(i - 1, state);
356 state = SCE_PL_DEFAULT;
357 }
358 } else if (state == SCE_PL_ARRAY) {
359 if (isEndVar(ch)) {
360 styler.ColourTo(i - 1, state);
361 state = SCE_PL_DEFAULT;
362 }
363 } else if (state == SCE_PL_HASH) {
364 if (isEndVar(ch)) {
365 styler.ColourTo(i - 1, state);
366 state = SCE_PL_DEFAULT;
367 }
368 } else if (state == SCE_PL_SYMBOLTABLE) {
369 if (isEndVar(ch)) {
370 styler.ColourTo(i - 1, state);
371 state = SCE_PL_DEFAULT;
372 }
373 } else if (state == SCE_PL_REF) {
374 if (isEndVar(ch)) {
375 styler.ColourTo(i - 1, state);
376 state = SCE_PL_DEFAULT;
377 }
378 } else if (state == SCE_PL_REGEX) {
379 if (!quoteUp && !isspace(ch)) {
380 quoteUp = ch;
381 quoteDown = opposite(ch);
382 quotes++;
383 } else {
384 if (ch == quoteDown && chPrev != '\\') {
385 quotes--;
386 if (quotes == 0) {
387 quoteRep--;
388 if (quoteUp == quoteDown) {
389 quotes++;
390 }
391 }
392 if (!isalpha(chNext)) {
393 if (quoteRep <= 0) {
394 styler.ColourTo(i, state);
395 state = SCE_PL_DEFAULT;
396 ch = ' ';
397 }
398 }
399 } else if (ch == quoteUp && chPrev != '\\') {
400 quotes++;
401 } else if (!isalpha(chNext)) {
402 if (quoteRep <= 0) {
403 styler.ColourTo(i, state);
404 state = SCE_PL_DEFAULT;
405 ch = ' ';
406 }
407 }
408 }
409 } else if (state == SCE_PL_REGSUBST) {
410 if (!quoteUp && !isspace(ch)) {
411 quoteUp = ch;
412 quoteDown = opposite(ch);
413 quotes++;
414 } else {
415 if (quotes == 0 && quoteRep == 1) {
416 /* We matched something like s(...) or tr{...}
417 * and are looking for the next matcher characters,
418 * which could be either bracketed ({...}) or non-bracketed
419 * (/.../).
420 *
421 * Number-signs are problematic. If they occur after
422 * the close of the first part, treat them like
423 * a quoteUp char, even if they actually start comments.
424 *
425 * If we find an alnum, we end the regsubst, and punt.
426 *
427 * Eric Promislow ericp@activestate.com Aug 9,2000
428 */
429 if (isspace(ch)) {
430 // Keep going
431 } else if (isalnum(ch)) {
432 styler.ColourTo(i, state);
433 state = SCE_PL_DEFAULT;
434 ch = ' ';
435 } else {
436 quoteUp = ch;
437 quoteDown = opposite(ch);
438 quotes++;
439 }
440 } else if (ch == quoteDown && chPrev != '\\') {
441 quotes--;
442 if (quotes == 0) {
443 quoteRep--;
444 }
445 if (!isalpha(chNext)) {
446 if (quoteRep <= 0) {
447 styler.ColourTo(i, state);
448 state = SCE_PL_DEFAULT;
449 ch = ' ';
450 }
451 }
452 if (quoteUp == quoteDown) {
453 quotes++;
454 }
455 } else if (ch == quoteUp && chPrev != '\\') {
456 quotes++;
457 } else if (!isalpha(chNext)) {
458 if (quoteRep <= 0) {
459 styler.ColourTo(i, state);
460 state = SCE_PL_DEFAULT;
461 ch = ' ';
462 }
463 }
464 }
465 } else if (state == SCE_PL_LONGQUOTE) {
466 if (!quoteDown && !isspace(ch)) {
467 quoteUp = ch;
468 quoteDown = opposite(quoteUp);
469 quotes++;
470 } else if (ch == quoteDown) {
471 quotes--;
472 if (quotes == 0) {
473 quoteRep--;
474 if (quoteRep <= 0) {
475 styler.ColourTo(i, state);
476 state = SCE_PL_DEFAULT;
477 ch = ' ';
478 }
479 if (quoteUp == quoteDown) {
480 quotes++;
481 }
482 }
483 } else if (ch == quoteUp) {
484 quotes++;
485 }
486 }
487
488 if (state == SCE_PL_DEFAULT) { // One of the above succeeded
489 if (ch == '#') {
490 state = SCE_PL_COMMENTLINE;
491 } else if (ch == '\"') {
492 state = SCE_PL_STRING;
493 } else if (ch == '\'') {
494 state = SCE_PL_CHARACTER;
495 } else if (iswordstart(ch)) {
496 state = SCE_PL_WORD;
497 preferRE = false;
498 } else if (isoperator(ch)) {
499 styler.ColourTo(i, SCE_PL_OPERATOR);
500 }
501 }
502 }
503 chPrev = ch;
504 }
505 styler.ColourTo(lengthDoc, state);
506 }
507
508 LexerModule lmPerl(SCLEX_PERL, ColourisePerlDoc);