]> git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/lexers/LexErlang.cxx
simplify code so it always returns the same object
[wxWidgets.git] / src / stc / scintilla / lexers / LexErlang.cxx
1 // Scintilla source code edit control
2 // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
3 // The License.txt file describes the conditions under which this software may be distributed.
4 /** @file LexErlang.cxx
5 ** Lexer for Erlang.
6 ** Enhanced by Etienne 'Lenain' Girondel (lenaing@gmail.com)
7 ** Originally wrote by Peter-Henry Mander,
8 ** based on Matlab lexer by José Fonseca.
9 **/
10
11 #include <stdlib.h>
12 #include <string.h>
13 #include <stdio.h>
14 #include <stdarg.h>
15 #include <assert.h>
16 #include <ctype.h>
17
18 #include "ILexer.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21
22 #include "WordList.h"
23 #include "LexAccessor.h"
24 #include "Accessor.h"
25 #include "StyleContext.h"
26 #include "CharacterSet.h"
27 #include "LexerModule.h"
28
29 #ifdef SCI_NAMESPACE
30 using namespace Scintilla;
31 #endif
32
33 static int is_radix(int radix, int ch) {
34 int digit;
35
36 if (36 < radix || 2 > radix)
37 return 0;
38
39 if (isdigit(ch)) {
40 digit = ch - '0';
41 } else if (isalnum(ch)) {
42 digit = toupper(ch) - 'A' + 10;
43 } else {
44 return 0;
45 }
46
47 return (digit < radix);
48 }
49
50 typedef enum {
51 STATE_NULL,
52 COMMENT,
53 COMMENT_FUNCTION,
54 COMMENT_MODULE,
55 COMMENT_DOC,
56 COMMENT_DOC_MACRO,
57 ATOM_UNQUOTED,
58 ATOM_QUOTED,
59 NODE_NAME_UNQUOTED,
60 NODE_NAME_QUOTED,
61 MACRO_START,
62 MACRO_UNQUOTED,
63 MACRO_QUOTED,
64 RECORD_START,
65 RECORD_UNQUOTED,
66 RECORD_QUOTED,
67 NUMERAL_START,
68 NUMERAL_BASE_VALUE,
69 NUMERAL_FLOAT,
70 NUMERAL_EXPONENT,
71 PREPROCESSOR
72 } atom_parse_state_t;
73
74 static inline bool IsAWordChar(const int ch) {
75 return (ch < 0x80) && (ch != ' ') && (isalnum(ch) || ch == '_');
76 }
77
78 static void ColouriseErlangDoc(unsigned int startPos, int length, int initStyle,
79 WordList *keywordlists[], Accessor &styler) {
80
81 StyleContext sc(startPos, length, initStyle, styler);
82 WordList &reservedWords = *keywordlists[0];
83 WordList &erlangBIFs = *keywordlists[1];
84 WordList &erlangPreproc = *keywordlists[2];
85 WordList &erlangModulesAtt = *keywordlists[3];
86 WordList &erlangDoc = *keywordlists[4];
87 WordList &erlangDocMacro = *keywordlists[5];
88 int radix_digits = 0;
89 int exponent_digits = 0;
90 atom_parse_state_t parse_state = STATE_NULL;
91 atom_parse_state_t old_parse_state = STATE_NULL;
92 bool to_late_to_comment = false;
93 char cur[100];
94 int old_style = SCE_ERLANG_DEFAULT;
95
96 styler.StartAt(startPos);
97
98 for (; sc.More(); sc.Forward()) {
99 int style = SCE_ERLANG_DEFAULT;
100 if (STATE_NULL != parse_state) {
101
102 switch (parse_state) {
103
104 case STATE_NULL : sc.SetState(SCE_ERLANG_DEFAULT); break;
105
106 /* COMMENTS ------------------------------------------------------*/
107 case COMMENT : {
108 if (sc.ch != '%') {
109 to_late_to_comment = true;
110 } else if (!to_late_to_comment && sc.ch == '%') {
111 // Switch to comment level 2 (Function)
112 sc.ChangeState(SCE_ERLANG_COMMENT_FUNCTION);
113 old_style = SCE_ERLANG_COMMENT_FUNCTION;
114 parse_state = COMMENT_FUNCTION;
115 sc.Forward();
116 }
117 }
118 // V--- Falling through!
119 case COMMENT_FUNCTION : {
120 if (sc.ch != '%') {
121 to_late_to_comment = true;
122 } else if (!to_late_to_comment && sc.ch == '%') {
123 // Switch to comment level 3 (Module)
124 sc.ChangeState(SCE_ERLANG_COMMENT_MODULE);
125 old_style = SCE_ERLANG_COMMENT_MODULE;
126 parse_state = COMMENT_MODULE;
127 sc.Forward();
128 }
129 }
130 // V--- Falling through!
131 case COMMENT_MODULE : {
132 if (parse_state != COMMENT) {
133 // Search for comment documentation
134 if (sc.chNext == '@') {
135 old_parse_state = parse_state;
136 parse_state = ('{' == sc.ch)
137 ? COMMENT_DOC_MACRO
138 : COMMENT_DOC;
139 sc.ForwardSetState(sc.state);
140 }
141 }
142
143 // All comments types fall here.
144 if (sc.atLineEnd) {
145 to_late_to_comment = false;
146 sc.SetState(SCE_ERLANG_DEFAULT);
147 parse_state = STATE_NULL;
148 }
149 } break;
150
151 case COMMENT_DOC :
152 // V--- Falling through!
153 case COMMENT_DOC_MACRO : {
154
155 if (!isalnum(sc.ch)) {
156 // Try to match documentation comment
157 sc.GetCurrent(cur, sizeof(cur));
158
159 if (parse_state == COMMENT_DOC_MACRO
160 && erlangDocMacro.InList(cur)) {
161 sc.ChangeState(SCE_ERLANG_COMMENT_DOC_MACRO);
162 while (sc.ch != '}' && !sc.atLineEnd)
163 sc.Forward();
164 } else if (erlangDoc.InList(cur)) {
165 sc.ChangeState(SCE_ERLANG_COMMENT_DOC);
166 } else {
167 sc.ChangeState(old_style);
168 }
169
170 // Switch back to old state
171 sc.SetState(old_style);
172 parse_state = old_parse_state;
173 }
174
175 if (sc.atLineEnd) {
176 to_late_to_comment = false;
177 sc.ChangeState(old_style);
178 sc.SetState(SCE_ERLANG_DEFAULT);
179 parse_state = STATE_NULL;
180 }
181 } break;
182
183 /* -------------------------------------------------------------- */
184 /* Atoms ---------------------------------------------------------*/
185 case ATOM_UNQUOTED : {
186 if ('@' == sc.ch){
187 parse_state = NODE_NAME_UNQUOTED;
188 } else if (sc.ch == ':') {
189 // Searching for module name
190 if (sc.chNext == ' ') {
191 // error
192 sc.ChangeState(SCE_ERLANG_UNKNOWN);
193 parse_state = STATE_NULL;
194 } else {
195 sc.Forward();
196 if (isalnum(sc.ch)) {
197 sc.GetCurrent(cur, sizeof(cur));
198 sc.ChangeState(SCE_ERLANG_MODULES);
199 sc.SetState(SCE_ERLANG_MODULES);
200 }
201 }
202 } else if (!IsAWordChar(sc.ch)) {
203
204 sc.GetCurrent(cur, sizeof(cur));
205 if (reservedWords.InList(cur)) {
206 style = SCE_ERLANG_KEYWORD;
207 } else if (erlangBIFs.InList(cur)
208 && strcmp(cur,"erlang:")){
209 style = SCE_ERLANG_BIFS;
210 } else if (sc.ch == '(' || '/' == sc.ch){
211 style = SCE_ERLANG_FUNCTION_NAME;
212 } else {
213 style = SCE_ERLANG_ATOM;
214 }
215
216 sc.ChangeState(style);
217 sc.SetState(SCE_ERLANG_DEFAULT);
218 parse_state = STATE_NULL;
219 }
220
221 } break;
222
223 case ATOM_QUOTED : {
224 if ( '@' == sc.ch ){
225 parse_state = NODE_NAME_QUOTED;
226 } else if ('\'' == sc.ch && '\\' != sc.chPrev) {
227 sc.ChangeState(SCE_ERLANG_ATOM);
228 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
229 parse_state = STATE_NULL;
230 }
231 } break;
232
233 /* -------------------------------------------------------------- */
234 /* Node names ----------------------------------------------------*/
235 case NODE_NAME_UNQUOTED : {
236 if ('@' == sc.ch) {
237 sc.SetState(SCE_ERLANG_DEFAULT);
238 parse_state = STATE_NULL;
239 } else if (!IsAWordChar(sc.ch)) {
240 sc.ChangeState(SCE_ERLANG_NODE_NAME);
241 sc.SetState(SCE_ERLANG_DEFAULT);
242 parse_state = STATE_NULL;
243 }
244 } break;
245
246 case NODE_NAME_QUOTED : {
247 if ('@' == sc.ch) {
248 sc.SetState(SCE_ERLANG_DEFAULT);
249 parse_state = STATE_NULL;
250 } else if ('\'' == sc.ch && '\\' != sc.chPrev) {
251 sc.ChangeState(SCE_ERLANG_NODE_NAME_QUOTED);
252 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
253 parse_state = STATE_NULL;
254 }
255 } break;
256
257 /* -------------------------------------------------------------- */
258 /* Records -------------------------------------------------------*/
259 case RECORD_START : {
260 if ('\'' == sc.ch) {
261 parse_state = RECORD_QUOTED;
262 } else if (isalpha(sc.ch) && islower(sc.ch)) {
263 parse_state = RECORD_UNQUOTED;
264 } else { // error
265 sc.SetState(SCE_ERLANG_DEFAULT);
266 parse_state = STATE_NULL;
267 }
268 } break;
269
270 case RECORD_UNQUOTED : {
271 if (!IsAWordChar(sc.ch)) {
272 sc.ChangeState(SCE_ERLANG_RECORD);
273 sc.SetState(SCE_ERLANG_DEFAULT);
274 parse_state = STATE_NULL;
275 }
276 } break;
277
278 case RECORD_QUOTED : {
279 if ('\'' == sc.ch && '\\' != sc.chPrev) {
280 sc.ChangeState(SCE_ERLANG_RECORD_QUOTED);
281 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
282 parse_state = STATE_NULL;
283 }
284 } break;
285
286 /* -------------------------------------------------------------- */
287 /* Macros --------------------------------------------------------*/
288 case MACRO_START : {
289 if ('\'' == sc.ch) {
290 parse_state = MACRO_QUOTED;
291 } else if (isalpha(sc.ch)) {
292 parse_state = MACRO_UNQUOTED;
293 } else { // error
294 sc.SetState(SCE_ERLANG_DEFAULT);
295 parse_state = STATE_NULL;
296 }
297 } break;
298
299 case MACRO_UNQUOTED : {
300 if (!IsAWordChar(sc.ch)) {
301 sc.ChangeState(SCE_ERLANG_MACRO);
302 sc.SetState(SCE_ERLANG_DEFAULT);
303 parse_state = STATE_NULL;
304 }
305 } break;
306
307 case MACRO_QUOTED : {
308 if ('\'' == sc.ch && '\\' != sc.chPrev) {
309 sc.ChangeState(SCE_ERLANG_MACRO_QUOTED);
310 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
311 parse_state = STATE_NULL;
312 }
313 } break;
314
315 /* -------------------------------------------------------------- */
316 /* Numerics ------------------------------------------------------*/
317 /* Simple integer */
318 case NUMERAL_START : {
319 if (isdigit(sc.ch)) {
320 radix_digits *= 10;
321 radix_digits += sc.ch - '0'; // Assuming ASCII here!
322 } else if ('#' == sc.ch) {
323 if (2 > radix_digits || 36 < radix_digits) {
324 sc.SetState(SCE_ERLANG_DEFAULT);
325 parse_state = STATE_NULL;
326 } else {
327 parse_state = NUMERAL_BASE_VALUE;
328 }
329 } else if ('.' == sc.ch && isdigit(sc.chNext)) {
330 radix_digits = 0;
331 parse_state = NUMERAL_FLOAT;
332 } else if ('e' == sc.ch || 'E' == sc.ch) {
333 exponent_digits = 0;
334 parse_state = NUMERAL_EXPONENT;
335 } else {
336 radix_digits = 0;
337 sc.ChangeState(SCE_ERLANG_NUMBER);
338 sc.SetState(SCE_ERLANG_DEFAULT);
339 parse_state = STATE_NULL;
340 }
341 } break;
342
343 /* Integer in other base than 10 (x#yyy) */
344 case NUMERAL_BASE_VALUE : {
345 if (!is_radix(radix_digits,sc.ch)) {
346 radix_digits = 0;
347
348 if (!isalnum(sc.ch))
349 sc.ChangeState(SCE_ERLANG_NUMBER);
350
351 sc.SetState(SCE_ERLANG_DEFAULT);
352 parse_state = STATE_NULL;
353 }
354 } break;
355
356 /* Float (x.yyy) */
357 case NUMERAL_FLOAT : {
358 if ('e' == sc.ch || 'E' == sc.ch) {
359 exponent_digits = 0;
360 parse_state = NUMERAL_EXPONENT;
361 } else if (!isdigit(sc.ch)) {
362 sc.ChangeState(SCE_ERLANG_NUMBER);
363 sc.SetState(SCE_ERLANG_DEFAULT);
364 parse_state = STATE_NULL;
365 }
366 } break;
367
368 /* Exponent, either integer or float (xEyy, x.yyEzzz) */
369 case NUMERAL_EXPONENT : {
370 if (('-' == sc.ch || '+' == sc.ch)
371 && (isdigit(sc.chNext))) {
372 sc.Forward();
373 } else if (!isdigit(sc.ch)) {
374 if (0 < exponent_digits)
375 sc.ChangeState(SCE_ERLANG_NUMBER);
376 sc.SetState(SCE_ERLANG_DEFAULT);
377 parse_state = STATE_NULL;
378 } else {
379 ++exponent_digits;
380 }
381 } break;
382
383 /* -------------------------------------------------------------- */
384 /* Preprocessor --------------------------------------------------*/
385 case PREPROCESSOR : {
386 if (!IsAWordChar(sc.ch)) {
387
388 sc.GetCurrent(cur, sizeof(cur));
389 if (erlangPreproc.InList(cur)) {
390 style = SCE_ERLANG_PREPROC;
391 } else if (erlangModulesAtt.InList(cur)) {
392 style = SCE_ERLANG_MODULES_ATT;
393 }
394
395 sc.ChangeState(style);
396 sc.SetState(SCE_ERLANG_DEFAULT);
397 parse_state = STATE_NULL;
398 }
399 } break;
400
401 }
402
403 } /* End of : STATE_NULL != parse_state */
404 else
405 {
406 switch (sc.state) {
407 case SCE_ERLANG_VARIABLE : {
408 if (!IsAWordChar(sc.ch))
409 sc.SetState(SCE_ERLANG_DEFAULT);
410 } break;
411 case SCE_ERLANG_STRING : {
412 if (sc.ch == '\"' && sc.chPrev != '\\')
413 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
414 } break;
415 case SCE_ERLANG_COMMENT : {
416 if (sc.atLineEnd)
417 sc.SetState(SCE_ERLANG_DEFAULT);
418 } break;
419 case SCE_ERLANG_CHARACTER : {
420 if (sc.chPrev == '\\') {
421 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
422 } else if (sc.ch != '\\') {
423 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
424 }
425 } break;
426 case SCE_ERLANG_OPERATOR : {
427 if (sc.chPrev == '.') {
428 if (sc.ch == '*' || sc.ch == '/' || sc.ch == '\\'
429 || sc.ch == '^') {
430 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
431 } else if (sc.ch == '\'') {
432 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
433 } else {
434 sc.SetState(SCE_ERLANG_DEFAULT);
435 }
436 } else {
437 sc.SetState(SCE_ERLANG_DEFAULT);
438 }
439 } break;
440 }
441 }
442
443 if (sc.state == SCE_ERLANG_DEFAULT) {
444 bool no_new_state = false;
445
446 switch (sc.ch) {
447 case '\"' : sc.SetState(SCE_ERLANG_STRING); break;
448 case '$' : sc.SetState(SCE_ERLANG_CHARACTER); break;
449 case '%' : {
450 parse_state = COMMENT;
451 sc.SetState(SCE_ERLANG_COMMENT);
452 } break;
453 case '#' : {
454 parse_state = RECORD_START;
455 sc.SetState(SCE_ERLANG_UNKNOWN);
456 } break;
457 case '?' : {
458 parse_state = MACRO_START;
459 sc.SetState(SCE_ERLANG_UNKNOWN);
460 } break;
461 case '\'' : {
462 parse_state = ATOM_QUOTED;
463 sc.SetState(SCE_ERLANG_UNKNOWN);
464 } break;
465 case '+' :
466 case '-' : {
467 if (IsADigit(sc.chNext)) {
468 parse_state = NUMERAL_START;
469 radix_digits = 0;
470 sc.SetState(SCE_ERLANG_UNKNOWN);
471 } else if (sc.ch != '+') {
472 parse_state = PREPROCESSOR;
473 sc.SetState(SCE_ERLANG_UNKNOWN);
474 }
475 } break;
476 default : no_new_state = true;
477 }
478
479 if (no_new_state) {
480 if (isdigit(sc.ch)) {
481 parse_state = NUMERAL_START;
482 radix_digits = sc.ch - '0';
483 sc.SetState(SCE_ERLANG_UNKNOWN);
484 } else if (isupper(sc.ch) || '_' == sc.ch) {
485 sc.SetState(SCE_ERLANG_VARIABLE);
486 } else if (isalpha(sc.ch)) {
487 parse_state = ATOM_UNQUOTED;
488 sc.SetState(SCE_ERLANG_UNKNOWN);
489 } else if (isoperator(static_cast<char>(sc.ch))
490 || sc.ch == '\\') {
491 sc.SetState(SCE_ERLANG_OPERATOR);
492 }
493 }
494 }
495
496 }
497 sc.Complete();
498 }
499
500 static int ClassifyErlangFoldPoint(
501 Accessor &styler,
502 int styleNext,
503 int keyword_start
504 ) {
505 int lev = 0;
506 if (styler.Match(keyword_start,"case")
507 || (
508 styler.Match(keyword_start,"fun")
509 && (SCE_ERLANG_FUNCTION_NAME != styleNext)
510 )
511 || styler.Match(keyword_start,"if")
512 || styler.Match(keyword_start,"query")
513 || styler.Match(keyword_start,"receive")
514 ) {
515 ++lev;
516 } else if (styler.Match(keyword_start,"end")) {
517 --lev;
518 }
519
520 return lev;
521 }
522
523 static void FoldErlangDoc(
524 unsigned int startPos, int length, int initStyle,
525 WordList** /*keywordlists*/, Accessor &styler
526 ) {
527 unsigned int endPos = startPos + length;
528 int currentLine = styler.GetLine(startPos);
529 int lev;
530 int previousLevel = styler.LevelAt(currentLine) & SC_FOLDLEVELNUMBERMASK;
531 int currentLevel = previousLevel;
532 int styleNext = styler.StyleAt(startPos);
533 int style = initStyle;
534 int stylePrev;
535 int keyword_start = 0;
536 char ch;
537 char chNext = styler.SafeGetCharAt(startPos);
538 bool atEOL;
539
540 for (unsigned int i = startPos; i < endPos; i++) {
541 ch = chNext;
542 chNext = styler.SafeGetCharAt(i + 1);
543
544 // Get styles
545 stylePrev = style;
546 style = styleNext;
547 styleNext = styler.StyleAt(i + 1);
548 atEOL = ((ch == '\r') && (chNext != '\n')) || (ch == '\n');
549
550 if (stylePrev != SCE_ERLANG_KEYWORD
551 && style == SCE_ERLANG_KEYWORD) {
552 keyword_start = i;
553 }
554
555 // Fold on keywords
556 if (stylePrev == SCE_ERLANG_KEYWORD
557 && style != SCE_ERLANG_KEYWORD
558 && style != SCE_ERLANG_ATOM
559 ) {
560 currentLevel += ClassifyErlangFoldPoint(styler,
561 styleNext,
562 keyword_start);
563 }
564
565 // Fold on comments
566 if (style == SCE_ERLANG_COMMENT
567 || style == SCE_ERLANG_COMMENT_MODULE
568 || style == SCE_ERLANG_COMMENT_FUNCTION) {
569
570 if (ch == '%' && chNext == '{') {
571 currentLevel++;
572 } else if (ch == '%' && chNext == '}') {
573 currentLevel--;
574 }
575 }
576
577 // Fold on braces
578 if (style == SCE_ERLANG_OPERATOR) {
579 if (ch == '{' || ch == '(' || ch == '[') {
580 currentLevel++;
581 } else if (ch == '}' || ch == ')' || ch == ']') {
582 currentLevel--;
583 }
584 }
585
586
587 if (atEOL) {
588 lev = previousLevel;
589
590 if (currentLevel > previousLevel)
591 lev |= SC_FOLDLEVELHEADERFLAG;
592
593 if (lev != styler.LevelAt(currentLine))
594 styler.SetLevel(currentLine, lev);
595
596 currentLine++;
597 previousLevel = currentLevel;
598 }
599
600 }
601
602 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
603 styler.SetLevel(currentLine,
604 previousLevel
605 | (styler.LevelAt(currentLine) & ~SC_FOLDLEVELNUMBERMASK));
606 }
607
608 static const char * const erlangWordListDesc[] = {
609 "Erlang Reserved words",
610 "Erlang BIFs",
611 "Erlang Preprocessor",
612 "Erlang Module Attributes",
613 "Erlang Documentation",
614 "Erlang Documentation Macro",
615 0
616 };
617
618 LexerModule lmErlang(
619 SCLEX_ERLANG,
620 ColouriseErlangDoc,
621 "erlang",
622 FoldErlangDoc,
623 erlangWordListDesc);