]> git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/src/LexErlang.cxx
Apply patch (plus some additional changes) upgrading Scintilla to version 2.03. ...
[wxWidgets.git] / src / stc / scintilla / src / LexErlang.cxx
1 // Scintilla source code edit control
2 // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
3 // The License.txt file describes the conditions under which this software may be distributed.
4 /** @file LexErlang.cxx
5 ** Lexer for Erlang.
6 ** Enhanced by Etienne 'Lenain' Girondel (lenaing@gmail.com)
7 ** Originally wrote by Peter-Henry Mander,
8 ** based on Matlab lexer by José Fonseca.
9 **/
10
11 #include <stdlib.h>
12 #include <string.h>
13 #include <ctype.h>
14 #include <stdio.h>
15 #include <stdarg.h>
16
17 #include "Platform.h"
18 #include "PropSet.h"
19 #include "Accessor.h"
20 #include "StyleContext.h"
21 #include "KeyWords.h"
22 #include "Scintilla.h"
23 #include "SciLexer.h"
24
25 #ifdef SCI_NAMESPACE
26 using namespace Scintilla;
27 #endif
28
29 static int is_radix(int radix, int ch) {
30 int digit;
31
32 if (36 < radix || 2 > radix)
33 return 0;
34
35 if (isdigit(ch)) {
36 digit = ch - '0';
37 } else if (isalnum(ch)) {
38 digit = toupper(ch) - 'A' + 10;
39 } else {
40 return 0;
41 }
42
43 return (digit < radix);
44 }
45
46 typedef enum {
47 STATE_NULL,
48 COMMENT,
49 COMMENT_FUNCTION,
50 COMMENT_MODULE,
51 COMMENT_DOC,
52 COMMENT_DOC_MACRO,
53 ATOM_UNQUOTED,
54 ATOM_QUOTED,
55 NODE_NAME_UNQUOTED,
56 NODE_NAME_QUOTED,
57 MACRO_START,
58 MACRO_UNQUOTED,
59 MACRO_QUOTED,
60 RECORD_START,
61 RECORD_UNQUOTED,
62 RECORD_QUOTED,
63 NUMERAL_START,
64 NUMERAL_BASE_VALUE,
65 NUMERAL_FLOAT,
66 NUMERAL_EXPONENT,
67 PREPROCESSOR
68 } atom_parse_state_t;
69
70 static inline bool IsAWordChar(const int ch) {
71 return (ch < 0x80) && (ch != ' ') && (isalnum(ch) || ch == '_');
72 }
73
74 static void ColouriseErlangDoc(unsigned int startPos, int length, int initStyle,
75 WordList *keywordlists[], Accessor &styler) {
76
77 StyleContext sc(startPos, length, initStyle, styler);
78 WordList &reservedWords = *keywordlists[0];
79 WordList &erlangBIFs = *keywordlists[1];
80 WordList &erlangPreproc = *keywordlists[2];
81 WordList &erlangModulesAtt = *keywordlists[3];
82 WordList &erlangDoc = *keywordlists[4];
83 WordList &erlangDocMacro = *keywordlists[5];
84 int radix_digits = 0;
85 int exponent_digits = 0;
86 atom_parse_state_t parse_state = STATE_NULL;
87 atom_parse_state_t old_parse_state = STATE_NULL;
88 bool to_late_to_comment = false;
89 char cur[100];
90 int old_style = SCE_ERLANG_DEFAULT;
91
92 styler.StartAt(startPos);
93
94 for (; sc.More(); sc.Forward()) {
95 int style = SCE_ERLANG_DEFAULT;
96 if (STATE_NULL != parse_state) {
97
98 switch (parse_state) {
99
100 case STATE_NULL : sc.SetState(SCE_ERLANG_DEFAULT); break;
101
102 /* COMMENTS ------------------------------------------------------*/
103 case COMMENT : {
104 if (sc.ch != '%') {
105 to_late_to_comment = true;
106 } else if (!to_late_to_comment && sc.ch == '%') {
107 // Switch to comment level 2 (Function)
108 sc.ChangeState(SCE_ERLANG_COMMENT_FUNCTION);
109 old_style = SCE_ERLANG_COMMENT_FUNCTION;
110 parse_state = COMMENT_FUNCTION;
111 sc.Forward();
112 }
113 }
114 // V--- Falling through!
115 case COMMENT_FUNCTION : {
116 if (sc.ch != '%') {
117 to_late_to_comment = true;
118 } else if (!to_late_to_comment && sc.ch == '%') {
119 // Switch to comment level 3 (Module)
120 sc.ChangeState(SCE_ERLANG_COMMENT_MODULE);
121 old_style = SCE_ERLANG_COMMENT_MODULE;
122 parse_state = COMMENT_MODULE;
123 sc.Forward();
124 }
125 }
126 // V--- Falling through!
127 case COMMENT_MODULE : {
128 if (parse_state != COMMENT) {
129 // Search for comment documentation
130 if (sc.chNext == '@') {
131 old_parse_state = parse_state;
132 parse_state = ('{' == sc.ch)
133 ? COMMENT_DOC_MACRO
134 : COMMENT_DOC;
135 sc.ForwardSetState(sc.state);
136 }
137 }
138
139 // All comments types fall here.
140 if (sc.atLineEnd) {
141 to_late_to_comment = false;
142 sc.SetState(SCE_ERLANG_DEFAULT);
143 parse_state = STATE_NULL;
144 }
145 } break;
146
147 case COMMENT_DOC :
148 // V--- Falling through!
149 case COMMENT_DOC_MACRO : {
150
151 if (!isalnum(sc.ch)) {
152 // Try to match documentation comment
153 sc.GetCurrent(cur, sizeof(cur));
154
155 if (parse_state == COMMENT_DOC_MACRO
156 && erlangDocMacro.InList(cur)) {
157 sc.ChangeState(SCE_ERLANG_COMMENT_DOC_MACRO);
158 while (sc.ch != '}' && !sc.atLineEnd)
159 sc.Forward();
160 } else if (erlangDoc.InList(cur)) {
161 sc.ChangeState(SCE_ERLANG_COMMENT_DOC);
162 } else {
163 sc.ChangeState(old_style);
164 }
165
166 // Switch back to old state
167 sc.SetState(old_style);
168 parse_state = old_parse_state;
169 }
170
171 if (sc.atLineEnd) {
172 to_late_to_comment = false;
173 sc.ChangeState(old_style);
174 sc.SetState(SCE_ERLANG_DEFAULT);
175 parse_state = STATE_NULL;
176 }
177 } break;
178
179 /* -------------------------------------------------------------- */
180 /* Atoms ---------------------------------------------------------*/
181 case ATOM_UNQUOTED : {
182 if ('@' == sc.ch){
183 parse_state = NODE_NAME_UNQUOTED;
184 } else if (sc.ch == ':') {
185 // Searching for module name
186 if (sc.chNext == ' ') {
187 // error
188 sc.ChangeState(SCE_ERLANG_UNKNOWN);
189 parse_state = STATE_NULL;
190 } else {
191 sc.Forward();
192 if (isalnum(sc.ch)) {
193 sc.GetCurrent(cur, sizeof(cur));
194 sc.ChangeState(SCE_ERLANG_MODULES);
195 sc.SetState(SCE_ERLANG_MODULES);
196 }
197 }
198 } else if (!IsAWordChar(sc.ch)) {
199
200 sc.GetCurrent(cur, sizeof(cur));
201 if (reservedWords.InList(cur)) {
202 style = SCE_ERLANG_KEYWORD;
203 } else if (erlangBIFs.InList(cur)
204 && strcmp(cur,"erlang:")){
205 style = SCE_ERLANG_BIFS;
206 } else if (sc.ch == '(' || '/' == sc.ch){
207 style = SCE_ERLANG_FUNCTION_NAME;
208 } else {
209 style = SCE_ERLANG_ATOM;
210 }
211
212 sc.ChangeState(style);
213 sc.SetState(SCE_ERLANG_DEFAULT);
214 parse_state = STATE_NULL;
215 }
216
217 } break;
218
219 case ATOM_QUOTED : {
220 if ( '@' == sc.ch ){
221 parse_state = NODE_NAME_QUOTED;
222 } else if ('\'' == sc.ch && '\\' != sc.chPrev) {
223 sc.ChangeState(SCE_ERLANG_ATOM);
224 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
225 parse_state = STATE_NULL;
226 }
227 } break;
228
229 /* -------------------------------------------------------------- */
230 /* Node names ----------------------------------------------------*/
231 case NODE_NAME_UNQUOTED : {
232 if ('@' == sc.ch) {
233 sc.SetState(SCE_ERLANG_DEFAULT);
234 parse_state = STATE_NULL;
235 } else if (!IsAWordChar(sc.ch)) {
236 sc.ChangeState(SCE_ERLANG_NODE_NAME);
237 sc.SetState(SCE_ERLANG_DEFAULT);
238 parse_state = STATE_NULL;
239 }
240 } break;
241
242 case NODE_NAME_QUOTED : {
243 if ('@' == sc.ch) {
244 sc.SetState(SCE_ERLANG_DEFAULT);
245 parse_state = STATE_NULL;
246 } else if ('\'' == sc.ch && '\\' != sc.chPrev) {
247 sc.ChangeState(SCE_ERLANG_NODE_NAME_QUOTED);
248 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
249 parse_state = STATE_NULL;
250 }
251 } break;
252
253 /* -------------------------------------------------------------- */
254 /* Records -------------------------------------------------------*/
255 case RECORD_START : {
256 if ('\'' == sc.ch) {
257 parse_state = RECORD_QUOTED;
258 } else if (isalpha(sc.ch) && islower(sc.ch)) {
259 parse_state = RECORD_UNQUOTED;
260 } else { // error
261 sc.SetState(SCE_ERLANG_DEFAULT);
262 parse_state = STATE_NULL;
263 }
264 } break;
265
266 case RECORD_UNQUOTED : {
267 if (!IsAWordChar(sc.ch)) {
268 sc.ChangeState(SCE_ERLANG_RECORD);
269 sc.SetState(SCE_ERLANG_DEFAULT);
270 parse_state = STATE_NULL;
271 }
272 } break;
273
274 case RECORD_QUOTED : {
275 if ('\'' == sc.ch && '\\' != sc.chPrev) {
276 sc.ChangeState(SCE_ERLANG_RECORD_QUOTED);
277 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
278 parse_state = STATE_NULL;
279 }
280 } break;
281
282 /* -------------------------------------------------------------- */
283 /* Macros --------------------------------------------------------*/
284 case MACRO_START : {
285 if ('\'' == sc.ch) {
286 parse_state = MACRO_QUOTED;
287 } else if (isalpha(sc.ch)) {
288 parse_state = MACRO_UNQUOTED;
289 } else { // error
290 sc.SetState(SCE_ERLANG_DEFAULT);
291 parse_state = STATE_NULL;
292 }
293 } break;
294
295 case MACRO_UNQUOTED : {
296 if (!IsAWordChar(sc.ch)) {
297 sc.ChangeState(SCE_ERLANG_MACRO);
298 sc.SetState(SCE_ERLANG_DEFAULT);
299 parse_state = STATE_NULL;
300 }
301 } break;
302
303 case MACRO_QUOTED : {
304 if ('\'' == sc.ch && '\\' != sc.chPrev) {
305 sc.ChangeState(SCE_ERLANG_MACRO_QUOTED);
306 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
307 parse_state = STATE_NULL;
308 }
309 } break;
310
311 /* -------------------------------------------------------------- */
312 /* Numerics ------------------------------------------------------*/
313 /* Simple integer */
314 case NUMERAL_START : {
315 if (isdigit(sc.ch)) {
316 radix_digits *= 10;
317 radix_digits += sc.ch - '0'; // Assuming ASCII here!
318 } else if ('#' == sc.ch) {
319 if (2 > radix_digits || 36 < radix_digits) {
320 sc.SetState(SCE_ERLANG_DEFAULT);
321 parse_state = STATE_NULL;
322 } else {
323 parse_state = NUMERAL_BASE_VALUE;
324 }
325 } else if ('.' == sc.ch && isdigit(sc.chNext)) {
326 radix_digits = 0;
327 parse_state = NUMERAL_FLOAT;
328 } else if ('e' == sc.ch || 'E' == sc.ch) {
329 exponent_digits = 0;
330 parse_state = NUMERAL_EXPONENT;
331 } else {
332 radix_digits = 0;
333 sc.ChangeState(SCE_ERLANG_NUMBER);
334 sc.SetState(SCE_ERLANG_DEFAULT);
335 parse_state = STATE_NULL;
336 }
337 } break;
338
339 /* Integer in other base than 10 (x#yyy) */
340 case NUMERAL_BASE_VALUE : {
341 if (!is_radix(radix_digits,sc.ch)) {
342 radix_digits = 0;
343
344 if (!isalnum(sc.ch))
345 sc.ChangeState(SCE_ERLANG_NUMBER);
346
347 sc.SetState(SCE_ERLANG_DEFAULT);
348 parse_state = STATE_NULL;
349 }
350 } break;
351
352 /* Float (x.yyy) */
353 case NUMERAL_FLOAT : {
354 if ('e' == sc.ch || 'E' == sc.ch) {
355 exponent_digits = 0;
356 parse_state = NUMERAL_EXPONENT;
357 } else if (!isdigit(sc.ch)) {
358 sc.ChangeState(SCE_ERLANG_NUMBER);
359 sc.SetState(SCE_ERLANG_DEFAULT);
360 parse_state = STATE_NULL;
361 }
362 } break;
363
364 /* Exponent, either integer or float (xEyy, x.yyEzzz) */
365 case NUMERAL_EXPONENT : {
366 if (('-' == sc.ch || '+' == sc.ch)
367 && (isdigit(sc.chNext))) {
368 sc.Forward();
369 } else if (!isdigit(sc.ch)) {
370 if (0 < exponent_digits)
371 sc.ChangeState(SCE_ERLANG_NUMBER);
372 sc.SetState(SCE_ERLANG_DEFAULT);
373 parse_state = STATE_NULL;
374 } else {
375 ++exponent_digits;
376 }
377 } break;
378
379 /* -------------------------------------------------------------- */
380 /* Preprocessor --------------------------------------------------*/
381 case PREPROCESSOR : {
382 if (!IsAWordChar(sc.ch)) {
383
384 sc.GetCurrent(cur, sizeof(cur));
385 if (erlangPreproc.InList(cur)) {
386 style = SCE_ERLANG_PREPROC;
387 } else if (erlangModulesAtt.InList(cur)) {
388 style = SCE_ERLANG_MODULES_ATT;
389 }
390
391 sc.ChangeState(style);
392 sc.SetState(SCE_ERLANG_DEFAULT);
393 parse_state = STATE_NULL;
394 }
395 } break;
396
397 }
398
399 } /* End of : STATE_NULL != parse_state */
400 else
401 {
402 switch (sc.state) {
403 case SCE_ERLANG_VARIABLE : {
404 if (!IsAWordChar(sc.ch))
405 sc.SetState(SCE_ERLANG_DEFAULT);
406 } break;
407 case SCE_ERLANG_STRING : {
408 if (sc.ch == '\"' && sc.chPrev != '\\')
409 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
410 } break;
411 case SCE_ERLANG_COMMENT : {
412 if (sc.atLineEnd)
413 sc.SetState(SCE_ERLANG_DEFAULT);
414 } break;
415 case SCE_ERLANG_CHARACTER : {
416 if (sc.chPrev == '\\') {
417 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
418 } else if (sc.ch != '\\') {
419 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
420 }
421 } break;
422 case SCE_ERLANG_OPERATOR : {
423 if (sc.chPrev == '.') {
424 if (sc.ch == '*' || sc.ch == '/' || sc.ch == '\\'
425 || sc.ch == '^') {
426 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
427 } else if (sc.ch == '\'') {
428 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
429 } else {
430 sc.SetState(SCE_ERLANG_DEFAULT);
431 }
432 } else {
433 sc.SetState(SCE_ERLANG_DEFAULT);
434 }
435 } break;
436 }
437 }
438
439 if (sc.state == SCE_ERLANG_DEFAULT) {
440 bool no_new_state = false;
441
442 switch (sc.ch) {
443 case '\"' : sc.SetState(SCE_ERLANG_STRING); break;
444 case '$' : sc.SetState(SCE_ERLANG_CHARACTER); break;
445 case '%' : {
446 parse_state = COMMENT;
447 sc.SetState(SCE_ERLANG_COMMENT);
448 } break;
449 case '#' : {
450 parse_state = RECORD_START;
451 sc.SetState(SCE_ERLANG_UNKNOWN);
452 } break;
453 case '?' : {
454 parse_state = MACRO_START;
455 sc.SetState(SCE_ERLANG_UNKNOWN);
456 } break;
457 case '\'' : {
458 parse_state = ATOM_QUOTED;
459 sc.SetState(SCE_ERLANG_UNKNOWN);
460 } break;
461 case '+' :
462 case '-' : {
463 if (IsADigit(sc.chNext)) {
464 parse_state = NUMERAL_START;
465 radix_digits = 0;
466 sc.SetState(SCE_ERLANG_UNKNOWN);
467 } else if (sc.ch != '+') {
468 parse_state = PREPROCESSOR;
469 sc.SetState(SCE_ERLANG_UNKNOWN);
470 }
471 } break;
472 default : no_new_state = true;
473 }
474
475 if (no_new_state) {
476 if (isdigit(sc.ch)) {
477 parse_state = NUMERAL_START;
478 radix_digits = sc.ch - '0';
479 sc.SetState(SCE_ERLANG_UNKNOWN);
480 } else if (isupper(sc.ch) || '_' == sc.ch) {
481 sc.SetState(SCE_ERLANG_VARIABLE);
482 } else if (isalpha(sc.ch)) {
483 parse_state = ATOM_UNQUOTED;
484 sc.SetState(SCE_ERLANG_UNKNOWN);
485 } else if (isoperator(static_cast<char>(sc.ch))
486 || sc.ch == '\\') {
487 sc.SetState(SCE_ERLANG_OPERATOR);
488 }
489 }
490 }
491
492 }
493 sc.Complete();
494 }
495
496 static int ClassifyErlangFoldPoint(
497 Accessor &styler,
498 int styleNext,
499 int keyword_start
500 ) {
501 int lev = 0;
502 if (styler.Match(keyword_start,"case")
503 || (
504 styler.Match(keyword_start,"fun")
505 && (SCE_ERLANG_FUNCTION_NAME != styleNext)
506 )
507 || styler.Match(keyword_start,"if")
508 || styler.Match(keyword_start,"query")
509 || styler.Match(keyword_start,"receive")
510 ) {
511 ++lev;
512 } else if (styler.Match(keyword_start,"end")) {
513 --lev;
514 }
515
516 return lev;
517 }
518
519 static void FoldErlangDoc(
520 unsigned int startPos, int length, int initStyle,
521 WordList** /*keywordlists*/, Accessor &styler
522 ) {
523 unsigned int endPos = startPos + length;
524 int currentLine = styler.GetLine(startPos);
525 int lev;
526 int previousLevel = styler.LevelAt(currentLine) & SC_FOLDLEVELNUMBERMASK;
527 int currentLevel = previousLevel;
528 int styleNext = styler.StyleAt(startPos);
529 int style = initStyle;
530 int stylePrev;
531 int keyword_start = 0;
532 char ch;
533 char chNext = styler.SafeGetCharAt(startPos);
534 bool atEOL;
535
536 for (unsigned int i = startPos; i < endPos; i++) {
537 ch = chNext;
538 chNext = styler.SafeGetCharAt(i + 1);
539
540 // Get styles
541 stylePrev = style;
542 style = styleNext;
543 styleNext = styler.StyleAt(i + 1);
544 atEOL = ((ch == '\r') && (chNext != '\n')) || (ch == '\n');
545
546 if (stylePrev != SCE_ERLANG_KEYWORD
547 && style == SCE_ERLANG_KEYWORD) {
548 keyword_start = i;
549 }
550
551 // Fold on keywords
552 if (stylePrev == SCE_ERLANG_KEYWORD
553 && style != SCE_ERLANG_KEYWORD
554 && style != SCE_ERLANG_ATOM
555 ) {
556 currentLevel += ClassifyErlangFoldPoint(styler,
557 styleNext,
558 keyword_start);
559 }
560
561 // Fold on comments
562 if (style == SCE_ERLANG_COMMENT
563 || style == SCE_ERLANG_COMMENT_MODULE
564 || style == SCE_ERLANG_COMMENT_FUNCTION) {
565
566 if (ch == '%' && chNext == '{') {
567 currentLevel++;
568 } else if (ch == '%' && chNext == '}') {
569 currentLevel--;
570 }
571 }
572
573 // Fold on braces
574 if (style == SCE_ERLANG_OPERATOR) {
575 if (ch == '{' || ch == '(' || ch == '[') {
576 currentLevel++;
577 } else if (ch == '}' || ch == ')' || ch == ']') {
578 currentLevel--;
579 }
580 }
581
582
583 if (atEOL) {
584 lev = previousLevel;
585
586 if (currentLevel > previousLevel)
587 lev |= SC_FOLDLEVELHEADERFLAG;
588
589 if (lev != styler.LevelAt(currentLine))
590 styler.SetLevel(currentLine, lev);
591
592 currentLine++;
593 previousLevel = currentLevel;
594 }
595
596 }
597
598 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
599 styler.SetLevel(currentLine,
600 previousLevel
601 | (styler.LevelAt(currentLine) & ~SC_FOLDLEVELNUMBERMASK));
602 }
603
604 static const char * const erlangWordListDesc[] = {
605 "Erlang Reserved words",
606 "Erlang BIFs",
607 "Erlang Preprocessor",
608 "Erlang Module Attributes",
609 "Erlang Documentation",
610 "Erlang Documentation Macro",
611 0
612 };
613
614 LexerModule lmErlang(
615 SCLEX_ERLANG,
616 ColouriseErlangDoc,
617 "erlang",
618 FoldErlangDoc,
619 erlangWordListDesc);