]> git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/src/LexPerl.cxx
3c9972e2d6877e31adc5a6787013964dc714b8f2
[wxWidgets.git] / src / stc / scintilla / src / LexPerl.cxx
1 // SciTE - Scintilla based Text Editor
2 // LexPerl.cxx - lexer for subset of Perl
3 // Copyright 1998-2000 by Neil Hodgson <neilh@scintilla.org>
4 // The License.txt file describes the conditions under which this software may be distributed.
5
6 #include <stdlib.h>
7 #include <string.h>
8 #include <ctype.h>
9 #include <stdio.h>
10 #include <stdarg.h>
11
12 #include "Platform.h"
13
14 #include "PropSet.h"
15 #include "Accessor.h"
16 #include "KeyWords.h"
17 #include "Scintilla.h"
18 #include "SciLexer.h"
19
20 inline bool isPerlOperator(char ch) {
21 if (isalnum(ch))
22 return false;
23 // '.' left out as it is used to make up numbers
24 if (ch == '%' || ch == '^' || ch == '&' || ch == '*' || ch == '\\' ||
25 ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
26 ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
27 ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
28 ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
29 ch == '?' || ch == '!' || ch == '.' || ch == '~')
30 return true;
31 return false;
32 }
33
34 static int classifyWordPerl(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
35 char s[100];
36 bool wordIsNumber = isdigit(styler[start]) || (styler[start] == '.');
37 for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) {
38 s[i] = styler[start + i];
39 s[i + 1] = '\0';
40 }
41 char chAttr = SCE_PL_IDENTIFIER;
42 if (wordIsNumber)
43 chAttr = SCE_PL_NUMBER;
44 else {
45 if (keywords.InList(s))
46 chAttr = SCE_PL_WORD;
47 }
48 styler.ColourTo(end, chAttr);
49 return chAttr;
50 }
51
52 static bool isEndVar(char ch) {
53 return !isalnum(ch) && ch != '#' && ch != '$' &&
54 ch != '_' && ch != '\'';
55 }
56
57 static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
58 if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
59 return false;
60 }
61 while (*val) {
62 if (*val != styler[pos++]) {
63 return false;
64 }
65 val++;
66 }
67 return true;
68 }
69
70 static char opposite(char ch) {
71 if (ch == '(')
72 return ')';
73 if (ch == '[')
74 return ']';
75 if (ch == '{')
76 return '}';
77 if (ch == '<')
78 return '>';
79 return ch;
80 }
81
82 static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
83 WordList *keywordlists[], Accessor &styler) {
84
85 // Lexer for perl often has to backtrack to start of current style to determine
86 // which characters are being used as quotes, how deeply nested is the
87 // start position and what the termination string is for here documents
88
89 WordList &keywords = *keywordlists[0];
90
91 char sooked[100];
92 int quotes = 0;
93 char quoteDown = 'd';
94 char quoteUp = 'd';
95 int quoteRep = 1;
96 int sookedpos = 0;
97 bool preferRE = true;
98 sooked[sookedpos] = '\0';
99 int state = initStyle;
100 int lengthDoc = startPos + length;
101 // If in a long distance lexical state, seek to the beginning to find quote characters
102 if (state == SCE_PL_HERE || state == SCE_PL_REGEX ||
103 state == SCE_PL_REGSUBST || state == SCE_PL_LONGQUOTE) {
104 while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) {
105 startPos--;
106 }
107 state = SCE_PL_DEFAULT;
108 }
109 styler.StartAt(startPos);
110 char chPrev = ' ';
111 char chNext = styler[startPos];
112 styler.StartSegment(startPos);
113 for (int i = startPos; i < lengthDoc; i++) {
114 char ch = chNext;
115 chNext = styler.SafeGetCharAt(i + 1);
116 char chNext2 = styler.SafeGetCharAt(i + 2);
117
118 if (styler.IsLeadByte(ch)) {
119 chNext = styler.SafeGetCharAt(i + 2);
120 chPrev = ' ';
121 i += 1;
122 continue;
123 }
124
125 if (state == SCE_PL_DEFAULT) {
126 if (iswordstart(ch)) {
127 styler.ColourTo(i - 1, state);
128 if (ch == 's' && !isalnum(chNext)) {
129 state = SCE_PL_REGSUBST;
130 quotes = 0;
131 quoteUp = '\0';
132 quoteDown = '\0';
133 quoteRep = 2;
134 } else if (ch == 'm' && !isalnum(chNext)) {
135 state = SCE_PL_REGEX;
136 quotes = 0;
137 quoteUp = '\0';
138 quoteDown = '\0';
139 quoteRep = 1;
140 } else if (ch == 't' && chNext == 'r' && !isalnum(chNext2)) {
141 state = SCE_PL_REGSUBST;
142 quotes = 0;
143 quoteUp = '\0';
144 quoteDown = '\0';
145 quoteRep = 2;
146 i++;
147 chNext = chNext2;
148 } else if (ch == 'q' && (chNext == 'q' || chNext == 'r' || chNext == 'w' || chNext == 'x') && !isalnum(chNext2)) {
149 state = SCE_PL_LONGQUOTE;
150 i++;
151 chNext = chNext2;
152 quotes = 0;
153 quoteUp = '\0';
154 quoteDown = '\0';
155 quoteRep = 1;
156 } else {
157 state = SCE_PL_WORD;
158 preferRE = false;
159 }
160 } else if (ch == '#') {
161 styler.ColourTo(i - 1, state);
162 state = SCE_PL_COMMENTLINE;
163 } else if (ch == '\"') {
164 styler.ColourTo(i - 1, state);
165 state = SCE_PL_STRING;
166 } else if (ch == '\'') {
167 if (chPrev == '&') {
168 // Archaic call
169 styler.ColourTo(i, state);
170 } else {
171 styler.ColourTo(i - 1, state);
172 state = SCE_PL_CHARACTER;
173 }
174 } else if (ch == '`') {
175 styler.ColourTo(i - 1, state);
176 state = SCE_PL_BACKTICKS;
177 } else if (ch == '$') {
178 preferRE = false;
179 styler.ColourTo(i - 1, state);
180 if (isalnum(chNext) || chNext == '#' || chNext == '$' || chNext == '_') {
181 state = SCE_PL_SCALAR;
182 } else if (chNext != '{' && chNext != '[') {
183 styler.ColourTo(i, SCE_PL_SCALAR);
184 i++;
185 ch = ' ';
186 chNext = ' ';
187 } else {
188 styler.ColourTo(i, SCE_PL_SCALAR);
189 }
190 } else if (ch == '@') {
191 preferRE = false;
192 styler.ColourTo(i - 1, state);
193 if (isalpha(chNext) || chNext == '#' || chNext == '$' || chNext == '_') {
194 state = SCE_PL_ARRAY;
195 } else if (chNext != '{' && chNext != '[') {
196 styler.ColourTo(i, SCE_PL_ARRAY);
197 i++;
198 ch = ' ';
199 } else {
200 styler.ColourTo(i, SCE_PL_ARRAY);
201 }
202 } else if (ch == '%') {
203 preferRE = false;
204 styler.ColourTo(i - 1, state);
205 if (isalpha(chNext) || chNext == '#' || chNext == '$' || chNext == '_') {
206 state = SCE_PL_HASH;
207 } else if (chNext != '{' && chNext != '[') {
208 styler.ColourTo(i, SCE_PL_HASH);
209 i++;
210 ch = ' ';
211 } else {
212 styler.ColourTo(i, SCE_PL_HASH);
213 }
214 } else if (ch == '*') {
215 styler.ColourTo(i - 1, state);
216 state = SCE_PL_SYMBOLTABLE;
217 } else if (ch == '/' && preferRE) {
218 styler.ColourTo(i - 1, state);
219 state = SCE_PL_REGEX;
220 quoteUp = '/';
221 quoteDown = '/';
222 quotes = 1;
223 quoteRep = 1;
224 } else if (ch == '<' && chNext == '<') {
225 styler.ColourTo(i - 1, state);
226 state = SCE_PL_HERE;
227 i++;
228 ch = chNext;
229 chNext = chNext2;
230 quotes = 0;
231 sookedpos = 0;
232 sooked[sookedpos] = '\0';
233 } else if (ch == '=' && isalpha(chNext)) {
234 styler.ColourTo(i - 1, state);
235 state = SCE_PL_POD;
236 quotes = 0;
237 sookedpos = 0;
238 sooked[sookedpos] = '\0';
239 } else if (isPerlOperator(ch)) {
240 if (ch == ')' || ch == ']')
241 preferRE = false;
242 else
243 preferRE = true;
244 styler.ColourTo(i - 1, state);
245 styler.ColourTo(i, SCE_PL_OPERATOR);
246 }
247 } else if (state == SCE_PL_WORD) {
248 if (!iswordchar(ch) && ch != '\'') { // Archaic Perl has quotes inside names
249 if (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__DATA__")) {
250 styler.ColourTo(i, SCE_PL_DATASECTION);
251 state = SCE_PL_DATASECTION;
252 } else if (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__END__")) {
253 styler.ColourTo(i, SCE_PL_DATASECTION);
254 state = SCE_PL_DATASECTION;
255 } else {
256 if (classifyWordPerl(styler.GetStartSegment(), i - 1, keywords, styler) == SCE_PL_WORD)
257 preferRE = true;
258 state = SCE_PL_DEFAULT;
259 if (ch == '#') {
260 state = SCE_PL_COMMENTLINE;
261 } else if (ch == '\"') {
262 state = SCE_PL_STRING;
263 } else if (ch == '\'') {
264 state = SCE_PL_CHARACTER;
265 } else if (ch == '<' && chNext == '<') {
266 state = SCE_PL_HERE;
267 quotes = 0;
268 sookedpos = 0;
269 sooked[sookedpos] = '\0';
270 } else if (isPerlOperator(ch)) {
271 if (ch == ')' || ch == ']')
272 preferRE = false;
273 else
274 preferRE = true;
275 styler.ColourTo(i, SCE_PL_OPERATOR);
276 state = SCE_PL_DEFAULT;
277 }
278 }
279 }
280 } else {
281 if (state == SCE_PL_COMMENTLINE) {
282 if (ch == '\r' || ch == '\n') {
283 styler.ColourTo(i - 1, state);
284 state = SCE_PL_DEFAULT;
285 }
286 } else if (state == SCE_PL_HERE) {
287 if (isalnum(ch) && quotes < 2) {
288 sooked[sookedpos++] = ch;
289 sooked[sookedpos] = '\0';
290 if (quotes == 0)
291 quotes = 1;
292 } else {
293 quotes++;
294 }
295 if (quotes > 1 && isMatch(styler, lengthDoc, i, sooked) && (chPrev == '\n' || chPrev == '\r') ) {
296 styler.ColourTo(i + sookedpos - 1, SCE_PL_HERE);
297 i += sookedpos;
298 chNext = styler.SafeGetCharAt(i);
299 if (chNext == '\n' || chNext == '\r') {
300 state = SCE_PL_DEFAULT;
301 }
302 chNext = ' ';
303 }
304 } else if (state == SCE_PL_STRING) {
305 if (ch == '\\') {
306 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
307 i++;
308 ch = chNext;
309 chNext = styler.SafeGetCharAt(i + 1);
310 }
311 } else if (ch == '\"') {
312 styler.ColourTo(i, state);
313 state = SCE_PL_DEFAULT;
314 i++;
315 ch = chNext;
316 chNext = styler.SafeGetCharAt(i + 1);
317 }
318 } else if (state == SCE_PL_CHARACTER) {
319 if (ch == '\\') {
320 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
321 i++;
322 ch = chNext;
323 chNext = styler.SafeGetCharAt(i + 1);
324 }
325 } else if (ch == '\'') {
326 styler.ColourTo(i, state);
327 state = SCE_PL_DEFAULT;
328 i++;
329 ch = chNext;
330 chNext = styler.SafeGetCharAt(i + 1);
331 }
332 } else if (state == SCE_PL_BACKTICKS) {
333 if (ch == '`') {
334 styler.ColourTo(i, state);
335 state = SCE_PL_DEFAULT;
336 i++;
337 ch = chNext;
338 chNext = styler.SafeGetCharAt(i + 1);
339 }
340 } else if (state == SCE_PL_POD) {
341 if (ch == '=') {
342 if (isMatch(styler, lengthDoc, i, "=cut")) {
343 styler.ColourTo(i - 1 + 4, state);
344 i += 4;
345 state = SCE_PL_DEFAULT;
346 chNext = ' ';
347 ch = ' ';
348 }
349 }
350 } else if (state == SCE_PL_SCALAR) {
351 if (isEndVar(ch)) {
352 styler.ColourTo(i - 1, state);
353 state = SCE_PL_DEFAULT;
354 }
355 } else if (state == SCE_PL_ARRAY) {
356 if (isEndVar(ch)) {
357 styler.ColourTo(i - 1, state);
358 state = SCE_PL_DEFAULT;
359 }
360 } else if (state == SCE_PL_HASH) {
361 if (isEndVar(ch)) {
362 styler.ColourTo(i - 1, state);
363 state = SCE_PL_DEFAULT;
364 }
365 } else if (state == SCE_PL_SYMBOLTABLE) {
366 if (isEndVar(ch)) {
367 styler.ColourTo(i - 1, state);
368 state = SCE_PL_DEFAULT;
369 }
370 } else if (state == SCE_PL_REF) {
371 if (isEndVar(ch)) {
372 styler.ColourTo(i - 1, state);
373 state = SCE_PL_DEFAULT;
374 }
375 } else if (state == SCE_PL_REGEX) {
376 if (!quoteUp && !isspace(ch)) {
377 quoteUp = ch;
378 quoteDown = opposite(ch);
379 quotes++;
380 } else {
381 if (ch == quoteDown && chPrev != '\\') {
382 quotes--;
383 if (quotes == 0) {
384 quoteRep--;
385 if (quoteUp == quoteDown) {
386 quotes++;
387 }
388 }
389 if (!isalpha(chNext)) {
390 if (quoteRep <= 0) {
391 styler.ColourTo(i, state);
392 state = SCE_PL_DEFAULT;
393 ch = ' ';
394 }
395 }
396 } else if (ch == quoteUp && chPrev != '\\') {
397 quotes++;
398 } else if (!isalpha(chNext)) {
399 if (quoteRep <= 0) {
400 styler.ColourTo(i, state);
401 state = SCE_PL_DEFAULT;
402 ch = ' ';
403 }
404 }
405 }
406 } else if (state == SCE_PL_REGSUBST) {
407 if (!quoteUp && !isspace(ch)) {
408 quoteUp = ch;
409 quoteDown = opposite(ch);
410 quotes++;
411 } else {
412 if (ch == quoteDown && chPrev != '\\') {
413 quotes--;
414 if (quotes == 0) {
415 quoteRep--;
416 }
417 if (!isalpha(chNext)) {
418 if (quoteRep <= 0) {
419 styler.ColourTo(i, state);
420 state = SCE_PL_DEFAULT;
421 ch = ' ';
422 }
423 }
424 if (quoteUp == quoteDown) {
425 quotes++;
426 }
427 } else if (ch == quoteUp && chPrev != '\\') {
428 quotes++;
429 } else if (!isalpha(chNext)) {
430 if (quoteRep <= 0) {
431 styler.ColourTo(i, state);
432 state = SCE_PL_DEFAULT;
433 ch = ' ';
434 }
435 }
436 }
437 } else if (state == SCE_PL_LONGQUOTE) {
438 if (!quoteDown && !isspace(ch)) {
439 quoteUp = ch;
440 quoteDown = opposite(quoteUp);
441 quotes++;
442 } else if (ch == quoteDown) {
443 quotes--;
444 if (quotes == 0) {
445 quoteRep--;
446 if (quoteRep <= 0) {
447 styler.ColourTo(i, state);
448 state = SCE_PL_DEFAULT;
449 ch = ' ';
450 }
451 if (quoteUp == quoteDown) {
452 quotes++;
453 }
454 }
455 } else if (ch == quoteUp) {
456 quotes++;
457 }
458 }
459
460 if (state == SCE_PL_DEFAULT) { // One of the above succeeded
461 if (ch == '#') {
462 state = SCE_PL_COMMENTLINE;
463 } else if (ch == '\"') {
464 state = SCE_PL_STRING;
465 } else if (ch == '\'') {
466 state = SCE_PL_CHARACTER;
467 } else if (iswordstart(ch)) {
468 state = SCE_PL_WORD;
469 preferRE = false;
470 } else if (isoperator(ch)) {
471 styler.ColourTo(i, SCE_PL_OPERATOR);
472 }
473 }
474 }
475 chPrev = ch;
476 }
477 styler.ColourTo(lengthDoc, state);
478 }
479
480 LexerModule lmPerl(SCLEX_PERL, ColourisePerlDoc);