]> git.saurik.com Git - wxWidgets.git/blob - contrib/src/stc/scintilla/src/LexPerl.cxx
9ccb207d0f3ae44e28507357b0c2efec3fdbaee0
[wxWidgets.git] / contrib / src / stc / scintilla / src / LexPerl.cxx
1 // SciTE - Scintilla based Text Editor
2 // LexPerl.cxx - lexer for subset of Perl
3 // Copyright 1998-2000 by Neil Hodgson <neilh@scintilla.org>
4 // The License.txt file describes the conditions under which this software may be distributed.
5
6 #include <stdlib.h>
7 #include <string.h>
8 #include <ctype.h>
9 #include <stdio.h>
10 #include <stdarg.h>
11
12 #include "Platform.h"
13
14 #include "PropSet.h"
15 #include "Accessor.h"
16 #include "KeyWords.h"
17 #include "Scintilla.h"
18 #include "SciLexer.h"
19
20 inline bool isPerlOperator(char ch) {
21 if (isalnum(ch))
22 return false;
23 // '.' left out as it is used to make up numbers
24 if (ch == '%' || ch == '^' || ch == '&' || ch == '*' || ch == '\\' ||
25 ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
26 ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
27 ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
28 ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
29 ch == '?' || ch == '!' || ch == '.' || ch == '~')
30 return true;
31 return false;
32 }
33
34 static int classifyWordPerl(unsigned int start, unsigned int end, WordList &keywords, StylingContext &styler) {
35 char s[100];
36 bool wordIsNumber = isdigit(styler[start]) || (styler[start] == '.');
37 for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) {
38 s[i] = styler[start + i];
39 s[i + 1] = '\0';
40 }
41 char chAttr = SCE_PL_IDENTIFIER;
42 if (wordIsNumber)
43 chAttr = SCE_PL_NUMBER;
44 else {
45 if (keywords.InList(s))
46 chAttr = SCE_PL_WORD;
47 }
48 styler.ColourTo(end, chAttr);
49 return chAttr;
50 }
51
52 static bool isEndVar(char ch) {
53 return !isalnum(ch) && ch != '#' && ch != '$' &&
54 ch != '_' && ch != '\'';
55 }
56
57 static bool isMatch(StylingContext &styler, int lengthDoc, int pos, const char *val) {
58 if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
59 return false;
60 }
61 while (*val) {
62 if (*val != styler[pos++]) {
63 return false;
64 }
65 val++;
66 }
67 return true;
68 }
69
70 static bool isOKQuote(char ch) {
71 if (isalnum(ch))
72 return false;
73 if (isspace(ch))
74 return false;
75 if (iscntrl(ch))
76 return false;
77 return true;
78 }
79
80 static char opposite(char ch) {
81 if (ch == '(')
82 return ')';
83 if (ch == '[')
84 return ']';
85 if (ch == '{')
86 return '}';
87 if (ch == '<')
88 return '>';
89 return ch;
90 }
91
92 static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
93 WordList *keywordlists[], StylingContext &styler) {
94
95 // Lexer for perl often has to backtrack to start of current style to determine
96 // which characters are being used as quotes, how deeply nested is the
97 // start position and what the termination string is for here documents
98
99 WordList &keywords = *keywordlists[0];
100
101 char sooked[100];
102 int quotes = 0;
103 char quoteDown = 'd';
104 char quoteUp = 'd';
105 int quoteRep = 1;
106 int sookedpos = 0;
107 bool preferRE = true;
108 sooked[sookedpos] = '\0';
109 int state = initStyle;
110 int lengthDoc = startPos + length;
111 // If in a long distance lexical state, seek to the beginning to find quote characters
112 if (state == SCE_PL_HERE || state == SCE_PL_REGEX ||
113 state == SCE_PL_REGSUBST || state == SCE_PL_LONGQUOTE) {
114 while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) {
115 startPos--;
116 }
117 state = SCE_PL_DEFAULT;
118 }
119 styler.StartAt(startPos);
120 char chPrev = ' ';
121 char chNext = styler[startPos];
122 styler.StartSegment(startPos);
123 for (int i = startPos; i <= lengthDoc; i++) {
124 char ch = chNext;
125 chNext = styler.SafeGetCharAt(i + 1);
126 char chNext2 = styler.SafeGetCharAt(i + 2);
127
128 if (styler.IsLeadByte(ch)) {
129 chNext = styler.SafeGetCharAt(i + 2);
130 chPrev = ' ';
131 i += 1;
132 continue;
133 }
134
135 if (state == SCE_PL_DEFAULT) {
136 if (iswordstart(ch)) {
137 styler.ColourTo(i - 1, state);
138 if (ch == 's' && !isalnum(chNext)) {
139 state = SCE_PL_REGSUBST;
140 quotes = 0;
141 quoteUp = '\0';
142 quoteDown = '\0';
143 quoteRep = 2;
144 } else if (ch == 'm' && !isalnum(chNext)) {
145 state = SCE_PL_REGEX;
146 quotes = 0;
147 quoteUp = '\0';
148 quoteDown = '\0';
149 quoteRep = 1;
150 } else if (ch == 't' && chNext == 'r' && !isalnum(chNext2)) {
151 state = SCE_PL_REGSUBST;
152 quotes = 0;
153 quoteUp = '\0';
154 quoteDown = '\0';
155 quoteRep = 2;
156 i++;
157 chNext = chNext2;
158 } else if (ch == 'q' && (chNext == 'q' || chNext == 'r' || chNext == 'w' || chNext == 'x') && !isalnum(chNext2)) {
159 state = SCE_PL_LONGQUOTE;
160 i++;
161 chNext = chNext2;
162 quotes = 0;
163 quoteUp = '\0';
164 quoteDown = '\0';
165 quoteRep = 1;
166 } else {
167 state = SCE_PL_WORD;
168 preferRE = false;
169 }
170 } else if (ch == '#') {
171 styler.ColourTo(i - 1, state);
172 state = SCE_PL_COMMENTLINE;
173 } else if (ch == '\"') {
174 styler.ColourTo(i - 1, state);
175 state = SCE_PL_STRING;
176 } else if (ch == '\'') {
177 if (chPrev == '&') {
178 // Archaic call
179 styler.ColourTo(i, state);
180 } else {
181 styler.ColourTo(i - 1, state);
182 state = SCE_PL_CHARACTER;
183 }
184 } else if (ch == '`') {
185 styler.ColourTo(i - 1, state);
186 state = SCE_PL_BACKTICKS;
187 } else if (ch == '$') {
188 preferRE = false;
189 styler.ColourTo(i - 1, state);
190 if (isalnum(chNext) || chNext == '#' || chNext == '$' || chNext == '_') {
191 state = SCE_PL_SCALAR;
192 } else if (chNext != '{' && chNext != '[') {
193 styler.ColourTo(i, SCE_PL_SCALAR);
194 i++;
195 ch = ' ';
196 chNext = ' ';
197 } else {
198 styler.ColourTo(i, SCE_PL_SCALAR);
199 }
200 } else if (ch == '@') {
201 preferRE = false;
202 styler.ColourTo(i - 1, state);
203 if (isalpha(chNext) || chNext == '#' || chNext == '$' || chNext == '_') {
204 state = SCE_PL_ARRAY;
205 } else if (chNext != '{' && chNext != '[') {
206 styler.ColourTo(i, SCE_PL_ARRAY);
207 i++;
208 ch = ' ';
209 } else {
210 styler.ColourTo(i, SCE_PL_ARRAY);
211 }
212 } else if (ch == '%') {
213 preferRE = false;
214 styler.ColourTo(i - 1, state);
215 if (isalpha(chNext) || chNext == '#' || chNext == '$' || chNext == '_') {
216 state = SCE_PL_HASH;
217 } else if (chNext != '{' && chNext != '[') {
218 styler.ColourTo(i, SCE_PL_HASH);
219 i++;
220 ch = ' ';
221 } else {
222 styler.ColourTo(i, SCE_PL_HASH);
223 }
224 } else if (ch == '*') {
225 styler.ColourTo(i - 1, state);
226 state = SCE_PL_SYMBOLTABLE;
227 } else if (ch == '/' && preferRE) {
228 styler.ColourTo(i - 1, state);
229 state = SCE_PL_REGEX;
230 quoteUp = '/';
231 quoteDown = '/';
232 quotes = 1;
233 quoteRep = 1;
234 } else if (ch == '<' && chNext == '<') {
235 styler.ColourTo(i - 1, state);
236 state = SCE_PL_HERE;
237 i++;
238 ch = chNext;
239 chNext = chNext2;
240 quotes = 0;
241 sookedpos = 0;
242 sooked[sookedpos] = '\0';
243 } else if (ch == '=' && isalpha(chNext)) {
244 styler.ColourTo(i - 1, state);
245 state = SCE_PL_POD;
246 quotes = 0;
247 sookedpos = 0;
248 sooked[sookedpos] = '\0';
249 } else if (isPerlOperator(ch)) {
250 if (ch == ')' || ch == ']')
251 preferRE = false;
252 else
253 preferRE = true;
254 styler.ColourTo(i - 1, state);
255 styler.ColourTo(i, SCE_PL_OPERATOR);
256 }
257 } else if (state == SCE_PL_WORD) {
258 if (!iswordchar(ch) && ch != '\'') { // Archaic Perl has quotes inside names
259 if (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__DATA__")) {
260 styler.ColourTo(i, SCE_PL_DATASECTION);
261 state = SCE_PL_DATASECTION;
262 } else if (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__END__")) {
263 styler.ColourTo(i, SCE_PL_DATASECTION);
264 state = SCE_PL_DATASECTION;
265 } else {
266 if (classifyWordPerl(styler.GetStartSegment(), i - 1, keywords, styler) == SCE_PL_WORD)
267 preferRE = true;
268 state = SCE_PL_DEFAULT;
269 if (ch == '#') {
270 state = SCE_PL_COMMENTLINE;
271 } else if (ch == '\"') {
272 state = SCE_PL_STRING;
273 } else if (ch == '\'') {
274 state = SCE_PL_CHARACTER;
275 } else if (ch == '<' && chNext == '<') {
276 state = SCE_PL_HERE;
277 quotes = 0;
278 sookedpos = 0;
279 sooked[sookedpos] = '\0';
280 } else if (isPerlOperator(ch)) {
281 if (ch == ')' || ch == ']')
282 preferRE = false;
283 else
284 preferRE = true;
285 styler.ColourTo(i, SCE_PL_OPERATOR);
286 state = SCE_PL_DEFAULT;
287 }
288 }
289 }
290 } else {
291 if (state == SCE_PL_COMMENTLINE) {
292 if (ch == '\r' || ch == '\n') {
293 styler.ColourTo(i - 1, state);
294 state = SCE_PL_DEFAULT;
295 }
296 } else if (state == SCE_PL_HERE) {
297 if (isalnum(ch) && quotes < 2) {
298 sooked[sookedpos++] = ch;
299 sooked[sookedpos] = '\0';
300 if (quotes == 0)
301 quotes = 1;
302 } else {
303 quotes++;
304 }
305
306 if (quotes > 1 && isMatch(styler, lengthDoc, i, sooked)) {
307 styler.ColourTo(i + sookedpos - 1, SCE_PL_HERE);
308 state = SCE_PL_DEFAULT;
309 i += sookedpos;
310 chNext = ' ';
311 }
312 } else if (state == SCE_PL_STRING) {
313 if (ch == '\\') {
314 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
315 i++;
316 ch = chNext;
317 chNext = styler.SafeGetCharAt(i + 1);
318 }
319 } else if (ch == '\"') {
320 styler.ColourTo(i, state);
321 state = SCE_PL_DEFAULT;
322 i++;
323 ch = chNext;
324 chNext = styler.SafeGetCharAt(i + 1);
325 }
326 } else if (state == SCE_PL_CHARACTER) {
327 if (ch == '\\') {
328 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
329 i++;
330 ch = chNext;
331 chNext = styler.SafeGetCharAt(i + 1);
332 }
333 } else if (ch == '\'') {
334 styler.ColourTo(i, state);
335 state = SCE_PL_DEFAULT;
336 i++;
337 ch = chNext;
338 chNext = styler.SafeGetCharAt(i + 1);
339 }
340 } else if (state == SCE_PL_BACKTICKS) {
341 if (ch == '`') {
342 styler.ColourTo(i, state);
343 state = SCE_PL_DEFAULT;
344 i++;
345 ch = chNext;
346 chNext = styler.SafeGetCharAt(i + 1);
347 }
348 } else if (state == SCE_PL_POD) {
349 if (ch == '=') {
350 if (isMatch(styler, lengthDoc, i, "=cut")) {
351 styler.ColourTo(i - 1 + 4, state);
352 i += 4;
353 state = SCE_PL_DEFAULT;
354 chNext = ' ';
355 ch = ' ';
356 }
357 }
358 } else if (state == SCE_PL_SCALAR) {
359 if (isEndVar(ch)) {
360 styler.ColourTo(i - 1, state);
361 state = SCE_PL_DEFAULT;
362 }
363 } else if (state == SCE_PL_ARRAY) {
364 if (isEndVar(ch)) {
365 styler.ColourTo(i - 1, state);
366 state = SCE_PL_DEFAULT;
367 }
368 } else if (state == SCE_PL_HASH) {
369 if (isEndVar(ch)) {
370 styler.ColourTo(i - 1, state);
371 state = SCE_PL_DEFAULT;
372 }
373 } else if (state == SCE_PL_SYMBOLTABLE) {
374 if (isEndVar(ch)) {
375 styler.ColourTo(i - 1, state);
376 state = SCE_PL_DEFAULT;
377 }
378 } else if (state == SCE_PL_REF) {
379 if (isEndVar(ch)) {
380 styler.ColourTo(i - 1, state);
381 state = SCE_PL_DEFAULT;
382 }
383 } else if (state == SCE_PL_REGEX) {
384 if (!quoteUp && !isspace(ch)) {
385 quoteUp = ch;
386 quoteDown = opposite(ch);
387 quotes++;
388 } else {
389 if (ch == quoteDown && chPrev != '\\') {
390 quotes--;
391 if (quotes == 0) {
392 quoteRep--;
393 if (quoteUp == quoteDown) {
394 quotes++;
395 }
396 }
397 if (!isalpha(chNext)) {
398 if (quoteRep <= 0) {
399 styler.ColourTo(i, state);
400 state = SCE_PL_DEFAULT;
401 ch = ' ';
402 }
403 }
404 } else if (ch == quoteUp && chPrev != '\\') {
405 quotes++;
406 } else if (!isalpha(chNext)) {
407 if (quoteRep <= 0) {
408 styler.ColourTo(i, state);
409 state = SCE_PL_DEFAULT;
410 ch = ' ';
411 }
412 }
413 }
414 } else if (state == SCE_PL_REGSUBST) {
415 if (!quoteUp && !isspace(ch)) {
416 quoteUp = ch;
417 quoteDown = opposite(ch);
418 quotes++;
419 } else {
420 if (ch == quoteDown && chPrev != '\\') {
421 quotes--;
422 if (quotes == 0) {
423 quoteRep--;
424 }
425 if (!isalpha(chNext)) {
426 if (quoteRep <= 0) {
427 styler.ColourTo(i, state);
428 state = SCE_PL_DEFAULT;
429 ch = ' ';
430 }
431 }
432 if (quoteUp == quoteDown) {
433 quotes++;
434 }
435 } else if (ch == quoteUp && chPrev != '\\') {
436 quotes++;
437 } else if (!isalpha(chNext)) {
438 if (quoteRep <= 0) {
439 styler.ColourTo(i, state);
440 state = SCE_PL_DEFAULT;
441 ch = ' ';
442 }
443 }
444 }
445 } else if (state == SCE_PL_LONGQUOTE) {
446 if (!quoteDown && !isspace(ch)) {
447 quoteUp = ch;
448 quoteDown = opposite(quoteUp);
449 quotes++;
450 } else if (ch == quoteDown) {
451 quotes--;
452 if (quotes == 0) {
453 quoteRep--;
454 if (quoteRep <= 0) {
455 styler.ColourTo(i, state);
456 state = SCE_PL_DEFAULT;
457 ch = ' ';
458 }
459 if (quoteUp == quoteDown) {
460 quotes++;
461 }
462 }
463 } else if (ch == quoteUp) {
464 quotes++;
465 }
466 }
467
468 if (state == SCE_PL_DEFAULT) { // One of the above succeeded
469 if (ch == '#') {
470 state = SCE_PL_COMMENTLINE;
471 } else if (ch == '\"') {
472 state = SCE_PL_STRING;
473 } else if (ch == '\'') {
474 state = SCE_PL_CHARACTER;
475 } else if (iswordstart(ch)) {
476 state = SCE_PL_WORD;
477 preferRE = false;
478 } else if (isoperator(ch)) {
479 styler.ColourTo(i, SCE_PL_OPERATOR);
480 }
481 }
482 }
483 chPrev = ch;
484 }
485 styler.ColourTo(lengthDoc, state);
486 }
487
488 LexerModule lmPerl(SCLEX_PERL, ColourisePerlDoc);