]> git.saurik.com Git - wxWidgets.git/blob - contrib/src/stc/scintilla/src/LexBash.cxx
Cast to void* before casting to Node** to make the compiler do no strict-aliasing...
[wxWidgets.git] / contrib / src / stc / scintilla / src / LexBash.cxx
1 // Scintilla source code edit control
2 /** @file LexBash.cxx
3 ** Lexer for Bash.
4 **/
5 // Copyright 2004 by Neil Hodgson <neilh@scintilla.org>
6 // Adapted from LexPerl by Kein-Hong Man <mkh@pl.jaring.my> 2004
7 // The License.txt file describes the conditions under which this software may be distributed.
8
9 #include <stdlib.h>
10 #include <string.h>
11 #include <ctype.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14
15 #include "Platform.h"
16
17 #include "PropSet.h"
18 #include "Accessor.h"
19 #include "KeyWords.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
22
23 #define BASH_BASE_ERROR 65
24 #define BASH_BASE_DECIMAL 66
25 #define BASH_BASE_HEX 67
26 #define BASH_BASE_OCTAL 68
27 #define BASH_BASE_OCTAL_ERROR 69
28
29 #define HERE_DELIM_MAX 256
30
31 static inline int translateBashDigit(char ch) {
32 if (ch >= '0' && ch <= '9') {
33 return ch - '0';
34 } else if (ch >= 'a' && ch <= 'z') {
35 return ch - 'a' + 10;
36 } else if (ch >= 'A' && ch <= 'Z') {
37 return ch - 'A' + 36;
38 } else if (ch == '@') {
39 return 62;
40 } else if (ch == '_') {
41 return 63;
42 }
43 return BASH_BASE_ERROR;
44 }
45
46 static inline bool isEOLChar(char ch) {
47 return (ch == '\r') || (ch == '\n');
48 }
49
50 static bool isSingleCharOp(char ch) {
51 char strCharSet[2];
52 strCharSet[0] = ch;
53 strCharSet[1] = '\0';
54 return (NULL != strstr("rwxoRWXOezsfdlpSbctugkTBMACahGLNn", strCharSet));
55 }
56
57 static inline bool isBashOperator(char ch) {
58 if (ch == '^' || ch == '&' || ch == '\\' || ch == '%' ||
59 ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
60 ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
61 ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
62 ch == '>' || ch == ',' || ch == '/' || ch == '<' ||
63 ch == '?' || ch == '!' || ch == '.' || ch == '~' ||
64 ch == '@')
65 return true;
66 return false;
67 }
68
69 static int classifyWordBash(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
70 char s[100];
71 for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) {
72 s[i] = styler[start + i];
73 s[i + 1] = '\0';
74 }
75 char chAttr = SCE_SH_IDENTIFIER;
76 if (keywords.InList(s))
77 chAttr = SCE_SH_WORD;
78 styler.ColourTo(end, chAttr);
79 return chAttr;
80 }
81
82 static inline int getBashNumberBase(unsigned int start, unsigned int end, Accessor &styler) {
83 int base = 0;
84 for (unsigned int i = 0; i < end - start + 1 && i < 10; i++) {
85 base = base * 10 + (styler[start + i] - '0');
86 }
87 if (base > 64 || (end - start) > 1) {
88 return BASH_BASE_ERROR;
89 }
90 return base;
91 }
92
93 static inline bool isEndVar(char ch) {
94 return !isalnum(ch) && ch != '$' && ch != '_';
95 }
96
97 static inline bool isNonQuote(char ch) {
98 return isalnum(ch) || ch == '_';
99 }
100
101 static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
102 if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
103 return false;
104 }
105 while (*val) {
106 if (*val != styler[pos++]) {
107 return false;
108 }
109 val++;
110 }
111 return true;
112 }
113
114 static char opposite(char ch) {
115 if (ch == '(')
116 return ')';
117 if (ch == '[')
118 return ']';
119 if (ch == '{')
120 return '}';
121 if (ch == '<')
122 return '>';
123 return ch;
124 }
125
126 static void ColouriseBashDoc(unsigned int startPos, int length, int initStyle,
127 WordList *keywordlists[], Accessor &styler) {
128
129 // Lexer for bash often has to backtrack to start of current style to determine
130 // which characters are being used as quotes, how deeply nested is the
131 // start position and what the termination string is for here documents
132
133 WordList &keywords = *keywordlists[0];
134
135 class HereDocCls {
136 public:
137 int State; // 0: '<<' encountered
138 // 1: collect the delimiter
139 // 2: here doc text (lines after the delimiter)
140 char Quote; // the char after '<<'
141 bool Quoted; // true if Quote in ('\'','"','`')
142 bool Indent; // indented delimiter (for <<-)
143 int DelimiterLength; // strlen(Delimiter)
144 char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf
145 HereDocCls() {
146 State = 0;
147 DelimiterLength = 0;
148 Delimiter = new char[HERE_DELIM_MAX];
149 Delimiter[0] = '\0';
150 }
151 ~HereDocCls() {
152 delete []Delimiter;
153 }
154 };
155 HereDocCls HereDoc;
156
157 class QuoteCls {
158 public:
159 int Rep;
160 int Count;
161 char Up;
162 char Down;
163 QuoteCls() {
164 this->New(1);
165 }
166 void New(int r) {
167 Rep = r;
168 Count = 0;
169 Up = '\0';
170 Down = '\0';
171 }
172 void Open(char u) {
173 Count++;
174 Up = u;
175 Down = opposite(Up);
176 }
177 };
178 QuoteCls Quote;
179
180 int state = initStyle;
181 int numBase = 0;
182 unsigned int lengthDoc = startPos + length;
183
184 // If in a long distance lexical state, seek to the beginning to find quote characters
185 // Bash strings can be multi-line with embedded newlines, so backtrack.
186 // Bash numbers have additional state during lexing, so backtrack too.
187 if (state == SCE_SH_HERE_Q) {
188 while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_SH_HERE_DELIM)) {
189 startPos--;
190 }
191 startPos = styler.LineStart(styler.GetLine(startPos));
192 state = styler.StyleAt(startPos - 1);
193 }
194 if (state == SCE_SH_STRING
195 || state == SCE_SH_BACKTICKS
196 || state == SCE_SH_CHARACTER
197 || state == SCE_SH_NUMBER
198 || state == SCE_SH_IDENTIFIER
199 || state == SCE_SH_COMMENTLINE
200 ) {
201 while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) {
202 startPos--;
203 }
204 state = SCE_SH_DEFAULT;
205 }
206
207 styler.StartAt(startPos);
208 char chPrev = styler.SafeGetCharAt(startPos - 1);
209 if (startPos == 0)
210 chPrev = '\n';
211 char chNext = styler[startPos];
212 styler.StartSegment(startPos);
213
214 for (unsigned int i = startPos; i < lengthDoc; i++) {
215 char ch = chNext;
216 // if the current character is not consumed due to the completion of an
217 // earlier style, lexing can be restarted via a simple goto
218 restartLexer:
219 chNext = styler.SafeGetCharAt(i + 1);
220 char chNext2 = styler.SafeGetCharAt(i + 2);
221
222 if (styler.IsLeadByte(ch)) {
223 chNext = styler.SafeGetCharAt(i + 2);
224 chPrev = ' ';
225 i += 1;
226 continue;
227 }
228
229 if ((chPrev == '\r' && ch == '\n')) { // skip on DOS/Windows
230 styler.ColourTo(i, state);
231 chPrev = ch;
232 continue;
233 }
234
235 if (HereDoc.State == 1 && isEOLChar(ch)) {
236 // Begin of here-doc (the line after the here-doc delimiter):
237 // Lexically, the here-doc starts from the next line after the >>, but the
238 // first line of here-doc seem to follow the style of the last EOL sequence
239 HereDoc.State = 2;
240 if (HereDoc.Quoted) {
241 if (state == SCE_SH_HERE_DELIM) {
242 // Missing quote at end of string! We are stricter than bash.
243 // Colour here-doc anyway while marking this bit as an error.
244 state = SCE_SH_ERROR;
245 }
246 styler.ColourTo(i - 1, state);
247 // HereDoc.Quote always == '\''
248 state = SCE_SH_HERE_Q;
249 } else {
250 styler.ColourTo(i - 1, state);
251 // always switch
252 state = SCE_SH_HERE_Q;
253 }
254 }
255
256 if (state == SCE_SH_DEFAULT) {
257 if (ch == '\\') { // escaped character
258 i++;
259 ch = chNext;
260 chNext = chNext2;
261 styler.ColourTo(i, SCE_SH_IDENTIFIER);
262 } else if (isdigit(ch)) {
263 state = SCE_SH_NUMBER;
264 numBase = BASH_BASE_DECIMAL;
265 if (ch == '0') { // hex,octal
266 if (chNext == 'x' || chNext == 'X') {
267 numBase = BASH_BASE_HEX;
268 i++;
269 ch = chNext;
270 chNext = chNext2;
271 } else if (isdigit(chNext)) {
272 numBase = BASH_BASE_OCTAL;
273 }
274 }
275 } else if (iswordstart(ch)) {
276 state = SCE_SH_WORD;
277 if (!iswordchar(chNext) && chNext != '+' && chNext != '-') {
278 // We need that if length of word == 1!
279 // This test is copied from the SCE_SH_WORD handler.
280 classifyWordBash(styler.GetStartSegment(), i, keywords, styler);
281 state = SCE_SH_DEFAULT;
282 }
283 } else if (ch == '#') {
284 state = SCE_SH_COMMENTLINE;
285 } else if (ch == '\"') {
286 state = SCE_SH_STRING;
287 Quote.New(1);
288 Quote.Open(ch);
289 } else if (ch == '\'') {
290 state = SCE_SH_CHARACTER;
291 Quote.New(1);
292 Quote.Open(ch);
293 } else if (ch == '`') {
294 state = SCE_SH_BACKTICKS;
295 Quote.New(1);
296 Quote.Open(ch);
297 } else if (ch == '$') {
298 if (chNext == '{') {
299 state = SCE_SH_PARAM;
300 goto startQuote;
301 } else if (chNext == '\'') {
302 state = SCE_SH_CHARACTER;
303 goto startQuote;
304 } else if (chNext == '"') {
305 state = SCE_SH_STRING;
306 goto startQuote;
307 } else if (chNext == '(' && chNext2 == '(') {
308 styler.ColourTo(i, SCE_SH_OPERATOR);
309 state = SCE_SH_DEFAULT;
310 goto skipChar;
311 } else if (chNext == '(' || chNext == '`') {
312 state = SCE_SH_BACKTICKS;
313 startQuote:
314 Quote.New(1);
315 Quote.Open(chNext);
316 goto skipChar;
317 } else {
318 state = SCE_SH_SCALAR;
319 skipChar:
320 i++;
321 ch = chNext;
322 chNext = chNext2;
323 }
324 } else if (ch == '*') {
325 if (chNext == '*') { // exponentiation
326 i++;
327 ch = chNext;
328 chNext = chNext2;
329 }
330 styler.ColourTo(i, SCE_SH_OPERATOR);
331 } else if (ch == '<' && chNext == '<') {
332 state = SCE_SH_HERE_DELIM;
333 HereDoc.State = 0;
334 HereDoc.Indent = false;
335 } else if (ch == '-' // file test operators
336 && isSingleCharOp(chNext)
337 && !isalnum((chNext2 = styler.SafeGetCharAt(i+2)))) {
338 styler.ColourTo(i + 1, SCE_SH_WORD);
339 state = SCE_SH_DEFAULT;
340 i++;
341 ch = chNext;
342 chNext = chNext2;
343 } else if (isBashOperator(ch)) {
344 styler.ColourTo(i, SCE_SH_OPERATOR);
345 } else {
346 // keep colouring defaults to make restart easier
347 styler.ColourTo(i, SCE_SH_DEFAULT);
348 }
349 } else if (state == SCE_SH_NUMBER) {
350 int digit = translateBashDigit(ch);
351 if (numBase == BASH_BASE_DECIMAL) {
352 if (ch == '#') {
353 numBase = getBashNumberBase(styler.GetStartSegment(), i - 1, styler);
354 if (numBase == BASH_BASE_ERROR) // take the rest as comment
355 goto numAtEnd;
356 } else if (!isdigit(ch))
357 goto numAtEnd;
358 } else if (numBase == BASH_BASE_HEX) {
359 if ((digit < 16) || (digit >= 36 && digit <= 41)) {
360 // hex digit 0-9a-fA-F
361 } else
362 goto numAtEnd;
363 } else if (numBase == BASH_BASE_OCTAL ||
364 numBase == BASH_BASE_OCTAL_ERROR) {
365 if (digit > 7) {
366 if (digit <= 9) {
367 numBase = BASH_BASE_OCTAL_ERROR;
368 } else
369 goto numAtEnd;
370 }
371 } else if (numBase == BASH_BASE_ERROR) {
372 if (digit > 9)
373 goto numAtEnd;
374 } else { // DD#DDDD number style handling
375 if (digit != BASH_BASE_ERROR) {
376 if (numBase <= 36) {
377 // case-insensitive if base<=36
378 if (digit >= 36) digit -= 26;
379 }
380 if (digit >= numBase) {
381 if (digit <= 9) {
382 numBase = BASH_BASE_ERROR;
383 } else
384 goto numAtEnd;
385 }
386 } else {
387 numAtEnd:
388 if (numBase == BASH_BASE_ERROR ||
389 numBase == BASH_BASE_OCTAL_ERROR)
390 state = SCE_SH_ERROR;
391 styler.ColourTo(i - 1, state);
392 state = SCE_SH_DEFAULT;
393 goto restartLexer;
394 }
395 }
396 } else if (state == SCE_SH_WORD) {
397 if (!iswordchar(chNext) && chNext != '+' && chNext != '-') {
398 // "." never used in Bash variable names
399 // but used in file names
400 classifyWordBash(styler.GetStartSegment(), i, keywords, styler);
401 state = SCE_SH_DEFAULT;
402 ch = ' ';
403 }
404 } else if (state == SCE_SH_IDENTIFIER) {
405 if (!iswordchar(chNext) && chNext != '+' && chNext != '-') {
406 styler.ColourTo(i, SCE_SH_IDENTIFIER);
407 state = SCE_SH_DEFAULT;
408 ch = ' ';
409 }
410 } else {
411 if (state == SCE_SH_COMMENTLINE) {
412 if (ch == '\\' && isEOLChar(chNext)) {
413 // comment continuation
414 if (chNext == '\r' && chNext2 == '\n') {
415 i += 2;
416 ch = styler.SafeGetCharAt(i);
417 chNext = styler.SafeGetCharAt(i + 1);
418 } else {
419 i++;
420 ch = chNext;
421 chNext = chNext2;
422 }
423 } else if (isEOLChar(ch)) {
424 styler.ColourTo(i - 1, state);
425 state = SCE_SH_DEFAULT;
426 goto restartLexer;
427 } else if (isEOLChar(chNext)) {
428 styler.ColourTo(i, state);
429 state = SCE_SH_DEFAULT;
430 }
431 } else if (state == SCE_SH_HERE_DELIM) {
432 //
433 // From Bash info:
434 // ---------------
435 // Specifier format is: <<[-]WORD
436 // Optional '-' is for removal of leading tabs from here-doc.
437 // Whitespace acceptable after <<[-] operator
438 //
439 if (HereDoc.State == 0) { // '<<' encountered
440 HereDoc.State = 1;
441 HereDoc.Quote = chNext;
442 HereDoc.Quoted = false;
443 HereDoc.DelimiterLength = 0;
444 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
445 if (chNext == '\'') { // a quoted here-doc delimiter (' only)
446 i++;
447 ch = chNext;
448 chNext = chNext2;
449 HereDoc.Quoted = true;
450 } else if (!HereDoc.Indent && chNext == '-') { // <<- indent case
451 HereDoc.Indent = true;
452 HereDoc.State = 0;
453 } else if (isalpha(chNext) || chNext == '_' || chNext == '\\'
454 || chNext == '-' || chNext == '+') {
455 // an unquoted here-doc delimiter, no special handling
456 } else if (chNext == '<') { // HERE string <<<
457 i++;
458 ch = chNext;
459 chNext = chNext2;
460 styler.ColourTo(i, SCE_SH_HERE_DELIM);
461 state = SCE_SH_DEFAULT;
462 HereDoc.State = 0;
463 } else if (isspacechar(chNext)) {
464 // eat whitespace
465 HereDoc.State = 0;
466 } else if (isdigit(chNext) || chNext == '=' || chNext == '$') {
467 // left shift << or <<= operator cases
468 styler.ColourTo(i, SCE_SH_OPERATOR);
469 state = SCE_SH_DEFAULT;
470 HereDoc.State = 0;
471 } else {
472 // symbols terminates; deprecated zero-length delimiter
473 }
474 } else if (HereDoc.State == 1) { // collect the delimiter
475 if (HereDoc.Quoted) { // a quoted here-doc delimiter
476 if (ch == HereDoc.Quote) { // closing quote => end of delimiter
477 styler.ColourTo(i, state);
478 state = SCE_SH_DEFAULT;
479 } else {
480 if (ch == '\\' && chNext == HereDoc.Quote) { // escaped quote
481 i++;
482 ch = chNext;
483 chNext = chNext2;
484 }
485 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
486 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
487 }
488 } else { // an unquoted here-doc delimiter
489 if (isalnum(ch) || ch == '_' || ch == '-' || ch == '+') {
490 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
491 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
492 } else if (ch == '\\') {
493 // skip escape prefix
494 } else {
495 styler.ColourTo(i - 1, state);
496 state = SCE_SH_DEFAULT;
497 goto restartLexer;
498 }
499 }
500 if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
501 styler.ColourTo(i - 1, state);
502 state = SCE_SH_ERROR;
503 goto restartLexer;
504 }
505 }
506 } else if (HereDoc.State == 2) {
507 // state == SCE_SH_HERE_Q
508 if (isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
509 if (!HereDoc.Indent && isEOLChar(chPrev)) {
510 endHereDoc:
511 // standard HERE delimiter
512 i += HereDoc.DelimiterLength;
513 chPrev = styler.SafeGetCharAt(i - 1);
514 ch = styler.SafeGetCharAt(i);
515 if (isEOLChar(ch)) {
516 styler.ColourTo(i - 1, state);
517 state = SCE_SH_DEFAULT;
518 HereDoc.State = 0;
519 goto restartLexer;
520 }
521 chNext = styler.SafeGetCharAt(i + 1);
522 } else if (HereDoc.Indent) {
523 // indented HERE delimiter
524 unsigned int bk = (i > 0)? i - 1: 0;
525 while (i > 0) {
526 ch = styler.SafeGetCharAt(bk--);
527 if (isEOLChar(ch)) {
528 goto endHereDoc;
529 } else if (!isspacechar(ch)) {
530 break; // got leading non-whitespace
531 }
532 }
533 }
534 }
535 } else if (state == SCE_SH_SCALAR) { // variable names
536 if (isEndVar(ch)) {
537 if ((state == SCE_SH_SCALAR)
538 && i == (styler.GetStartSegment() + 1)) {
539 // Special variable: $(, $_ etc.
540 styler.ColourTo(i, state);
541 state = SCE_SH_DEFAULT;
542 } else {
543 styler.ColourTo(i - 1, state);
544 state = SCE_SH_DEFAULT;
545 goto restartLexer;
546 }
547 }
548 } else if (state == SCE_SH_STRING
549 || state == SCE_SH_CHARACTER
550 || state == SCE_SH_BACKTICKS
551 || state == SCE_SH_PARAM
552 ) {
553 if (!Quote.Down && !isspacechar(ch)) {
554 Quote.Open(ch);
555 } else if (ch == '\\' && Quote.Up != '\\') {
556 i++;
557 ch = chNext;
558 chNext = styler.SafeGetCharAt(i + 1);
559 } else if (ch == Quote.Down) {
560 Quote.Count--;
561 if (Quote.Count == 0) {
562 Quote.Rep--;
563 if (Quote.Rep <= 0) {
564 styler.ColourTo(i, state);
565 state = SCE_SH_DEFAULT;
566 ch = ' ';
567 }
568 if (Quote.Up == Quote.Down) {
569 Quote.Count++;
570 }
571 }
572 } else if (ch == Quote.Up) {
573 Quote.Count++;
574 }
575 }
576 }
577 if (state == SCE_SH_ERROR) {
578 break;
579 }
580 chPrev = ch;
581 }
582 styler.ColourTo(lengthDoc - 1, state);
583 }
584
585 static void FoldBashDoc(unsigned int startPos, int length, int, WordList *[],
586 Accessor &styler) {
587 bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
588 bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
589 unsigned int endPos = startPos + length;
590 int visibleChars = 0;
591 int lineCurrent = styler.GetLine(startPos);
592 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
593 int levelCurrent = levelPrev;
594 char chNext = styler[startPos];
595 int styleNext = styler.StyleAt(startPos);
596 for (unsigned int i = startPos; i < endPos; i++) {
597 char ch = chNext;
598 chNext = styler.SafeGetCharAt(i + 1);
599 int style = styleNext;
600 styleNext = styler.StyleAt(i + 1);
601 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
602 if (foldComment && (style == SCE_SH_COMMENTLINE)) {
603 if ((ch == '/') && (chNext == '/')) {
604 char chNext2 = styler.SafeGetCharAt(i + 2);
605 if (chNext2 == '{') {
606 levelCurrent++;
607 } else if (chNext2 == '}') {
608 levelCurrent--;
609 }
610 }
611 }
612 if (style == SCE_C_OPERATOR) {
613 if (ch == '{') {
614 levelCurrent++;
615 } else if (ch == '}') {
616 levelCurrent--;
617 }
618 }
619 if (atEOL) {
620 int lev = levelPrev;
621 if (visibleChars == 0 && foldCompact)
622 lev |= SC_FOLDLEVELWHITEFLAG;
623 if ((levelCurrent > levelPrev) && (visibleChars > 0))
624 lev |= SC_FOLDLEVELHEADERFLAG;
625 if (lev != styler.LevelAt(lineCurrent)) {
626 styler.SetLevel(lineCurrent, lev);
627 }
628 lineCurrent++;
629 levelPrev = levelCurrent;
630 visibleChars = 0;
631 }
632 if (!isspacechar(ch))
633 visibleChars++;
634 }
635 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
636 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
637 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
638 }
639
640 static const char * const bashWordListDesc[] = {
641 "Keywords",
642 0
643 };
644
645 LexerModule lmBash(SCLEX_BASH, ColouriseBashDoc, "bash", FoldBashDoc, bashWordListDesc);