]>
git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/src/LexBash.cxx
e9c31d6b38fad7f73ec121ac1bf8c2cee4eff234
1 // Scintilla source code edit control
5 // Copyright 2004-2005 by Neil Hodgson <neilh@scintilla.org>
6 // Adapted from LexPerl by Kein-Hong Man <mkh@pl.jaring.my> 2004
7 // The License.txt file describes the conditions under which this software may be distributed.
20 #include "Scintilla.h"
23 #define BASH_BASE_ERROR 65
24 #define BASH_BASE_DECIMAL 66
25 #define BASH_BASE_HEX 67
26 #define BASH_BASE_OCTAL 68
27 #define BASH_BASE_OCTAL_ERROR 69
29 #define HERE_DELIM_MAX 256
31 static inline int translateBashDigit(char ch
) {
32 if (ch
>= '0' && ch
<= '9') {
34 } else if (ch
>= 'a' && ch
<= 'z') {
36 } else if (ch
>= 'A' && ch
<= 'Z') {
38 } else if (ch
== '@') {
40 } else if (ch
== '_') {
43 return BASH_BASE_ERROR
;
46 static inline bool isEOLChar(char ch
) {
47 return (ch
== '\r') || (ch
== '\n');
50 static bool isSingleCharOp(char ch
) {
54 return (NULL
!= strstr("rwxoRWXOezsfdlpSbctugkTBMACahGLNn", strCharSet
));
57 static inline bool isBashOperator(char ch
) {
58 if (ch
== '^' || ch
== '&' || ch
== '\\' || ch
== '%' ||
59 ch
== '(' || ch
== ')' || ch
== '-' || ch
== '+' ||
60 ch
== '=' || ch
== '|' || ch
== '{' || ch
== '}' ||
61 ch
== '[' || ch
== ']' || ch
== ':' || ch
== ';' ||
62 ch
== '>' || ch
== ',' || ch
== '/' || ch
== '<' ||
63 ch
== '?' || ch
== '!' || ch
== '.' || ch
== '~' ||
69 static int classifyWordBash(unsigned int start
, unsigned int end
, WordList
&keywords
, Accessor
&styler
) {
71 for (unsigned int i
= 0; i
< end
- start
+ 1 && i
< 30; i
++) {
72 s
[i
] = styler
[start
+ i
];
75 char chAttr
= SCE_SH_IDENTIFIER
;
76 if (keywords
.InList(s
))
78 styler
.ColourTo(end
, chAttr
);
82 static inline int getBashNumberBase(unsigned int start
, unsigned int end
, Accessor
&styler
) {
84 for (unsigned int i
= 0; i
< end
- start
+ 1 && i
< 10; i
++) {
85 base
= base
* 10 + (styler
[start
+ i
] - '0');
87 if (base
> 64 || (end
- start
) > 1) {
88 return BASH_BASE_ERROR
;
93 static inline bool isEndVar(char ch
) {
94 return !isalnum(ch
) && ch
!= '$' && ch
!= '_';
97 static inline bool isNonQuote(char ch
) {
98 return isalnum(ch
) || ch
== '_';
101 static bool isMatch(Accessor
&styler
, int lengthDoc
, int pos
, const char *val
) {
102 if ((pos
+ static_cast<int>(strlen(val
))) >= lengthDoc
) {
106 if (*val
!= styler
[pos
++]) {
114 static char opposite(char ch
) {
126 static void ColouriseBashDoc(unsigned int startPos
, int length
, int initStyle
,
127 WordList
*keywordlists
[], Accessor
&styler
) {
129 // Lexer for bash often has to backtrack to start of current style to determine
130 // which characters are being used as quotes, how deeply nested is the
131 // start position and what the termination string is for here documents
133 WordList
&keywords
= *keywordlists
[0];
137 int State
; // 0: '<<' encountered
138 // 1: collect the delimiter
139 // 2: here doc text (lines after the delimiter)
140 char Quote
; // the char after '<<'
141 bool Quoted
; // true if Quote in ('\'','"','`')
142 bool Indent
; // indented delimiter (for <<-)
143 int DelimiterLength
; // strlen(Delimiter)
144 char *Delimiter
; // the Delimiter, 256: sizeof PL_tokenbuf
151 Delimiter
= new char[HERE_DELIM_MAX
];
183 int state
= initStyle
;
185 unsigned int lengthDoc
= startPos
+ length
;
187 // If in a long distance lexical state, seek to the beginning to find quote characters
188 // Bash strings can be multi-line with embedded newlines, so backtrack.
189 // Bash numbers have additional state during lexing, so backtrack too.
190 if (state
== SCE_SH_HERE_Q
) {
191 while ((startPos
> 1) && (styler
.StyleAt(startPos
) != SCE_SH_HERE_DELIM
)) {
194 startPos
= styler
.LineStart(styler
.GetLine(startPos
));
195 state
= styler
.StyleAt(startPos
- 1);
197 if (state
== SCE_SH_STRING
198 || state
== SCE_SH_BACKTICKS
199 || state
== SCE_SH_CHARACTER
200 || state
== SCE_SH_NUMBER
201 || state
== SCE_SH_IDENTIFIER
202 || state
== SCE_SH_COMMENTLINE
204 while ((startPos
> 1) && (styler
.StyleAt(startPos
- 1) == state
)) {
207 state
= SCE_SH_DEFAULT
;
210 styler
.StartAt(startPos
);
211 char chPrev
= styler
.SafeGetCharAt(startPos
- 1);
214 char chNext
= styler
[startPos
];
215 styler
.StartSegment(startPos
);
217 for (unsigned int i
= startPos
; i
< lengthDoc
; i
++) {
219 // if the current character is not consumed due to the completion of an
220 // earlier style, lexing can be restarted via a simple goto
222 chNext
= styler
.SafeGetCharAt(i
+ 1);
223 char chNext2
= styler
.SafeGetCharAt(i
+ 2);
225 if (styler
.IsLeadByte(ch
)) {
226 chNext
= styler
.SafeGetCharAt(i
+ 2);
232 if ((chPrev
== '\r' && ch
== '\n')) { // skip on DOS/Windows
233 styler
.ColourTo(i
, state
);
238 if (HereDoc
.State
== 1 && isEOLChar(ch
)) {
239 // Begin of here-doc (the line after the here-doc delimiter):
240 // Lexically, the here-doc starts from the next line after the >>, but the
241 // first line of here-doc seem to follow the style of the last EOL sequence
243 if (HereDoc
.Quoted
) {
244 if (state
== SCE_SH_HERE_DELIM
) {
245 // Missing quote at end of string! We are stricter than bash.
246 // Colour here-doc anyway while marking this bit as an error.
247 state
= SCE_SH_ERROR
;
249 styler
.ColourTo(i
- 1, state
);
250 // HereDoc.Quote always == '\''
251 state
= SCE_SH_HERE_Q
;
253 styler
.ColourTo(i
- 1, state
);
255 state
= SCE_SH_HERE_Q
;
259 if (state
== SCE_SH_DEFAULT
) {
260 if (ch
== '\\') { // escaped character
261 if (i
< lengthDoc
- 1)
265 styler
.ColourTo(i
, SCE_SH_IDENTIFIER
);
266 } else if (isdigit(ch
)) {
267 state
= SCE_SH_NUMBER
;
268 numBase
= BASH_BASE_DECIMAL
;
269 if (ch
== '0') { // hex,octal
270 if (chNext
== 'x' || chNext
== 'X') {
271 numBase
= BASH_BASE_HEX
;
275 } else if (isdigit(chNext
)) {
276 numBase
= BASH_BASE_OCTAL
;
279 } else if (iswordstart(ch
)) {
281 if (!iswordchar(chNext
) && chNext
!= '+' && chNext
!= '-') {
282 // We need that if length of word == 1!
283 // This test is copied from the SCE_SH_WORD handler.
284 classifyWordBash(styler
.GetStartSegment(), i
, keywords
, styler
);
285 state
= SCE_SH_DEFAULT
;
287 } else if (ch
== '#') {
288 state
= SCE_SH_COMMENTLINE
;
289 } else if (ch
== '\"') {
290 state
= SCE_SH_STRING
;
293 } else if (ch
== '\'') {
294 state
= SCE_SH_CHARACTER
;
297 } else if (ch
== '`') {
298 state
= SCE_SH_BACKTICKS
;
301 } else if (ch
== '$') {
303 state
= SCE_SH_PARAM
;
305 } else if (chNext
== '\'') {
306 state
= SCE_SH_CHARACTER
;
308 } else if (chNext
== '"') {
309 state
= SCE_SH_STRING
;
311 } else if (chNext
== '(' && chNext2
== '(') {
312 styler
.ColourTo(i
, SCE_SH_OPERATOR
);
313 state
= SCE_SH_DEFAULT
;
315 } else if (chNext
== '(' || chNext
== '`') {
316 state
= SCE_SH_BACKTICKS
;
322 state
= SCE_SH_SCALAR
;
328 } else if (ch
== '*') {
329 if (chNext
== '*') { // exponentiation
334 styler
.ColourTo(i
, SCE_SH_OPERATOR
);
335 } else if (ch
== '<' && chNext
== '<') {
336 state
= SCE_SH_HERE_DELIM
;
338 HereDoc
.Indent
= false;
339 } else if (ch
== '-' // file test operators
340 && isSingleCharOp(chNext
)
341 && !isalnum((chNext2
= styler
.SafeGetCharAt(i
+2)))) {
342 styler
.ColourTo(i
+ 1, SCE_SH_WORD
);
343 state
= SCE_SH_DEFAULT
;
347 } else if (isBashOperator(ch
)) {
348 styler
.ColourTo(i
, SCE_SH_OPERATOR
);
350 // keep colouring defaults to make restart easier
351 styler
.ColourTo(i
, SCE_SH_DEFAULT
);
353 } else if (state
== SCE_SH_NUMBER
) {
354 int digit
= translateBashDigit(ch
);
355 if (numBase
== BASH_BASE_DECIMAL
) {
357 numBase
= getBashNumberBase(styler
.GetStartSegment(), i
- 1, styler
);
358 if (numBase
== BASH_BASE_ERROR
) // take the rest as comment
360 } else if (!isdigit(ch
))
362 } else if (numBase
== BASH_BASE_HEX
) {
363 if ((digit
< 16) || (digit
>= 36 && digit
<= 41)) {
364 // hex digit 0-9a-fA-F
367 } else if (numBase
== BASH_BASE_OCTAL
||
368 numBase
== BASH_BASE_OCTAL_ERROR
) {
371 numBase
= BASH_BASE_OCTAL_ERROR
;
375 } else if (numBase
== BASH_BASE_ERROR
) {
378 } else { // DD#DDDD number style handling
379 if (digit
!= BASH_BASE_ERROR
) {
381 // case-insensitive if base<=36
382 if (digit
>= 36) digit
-= 26;
384 if (digit
>= numBase
) {
386 numBase
= BASH_BASE_ERROR
;
392 if (numBase
== BASH_BASE_ERROR
||
393 numBase
== BASH_BASE_OCTAL_ERROR
)
394 state
= SCE_SH_ERROR
;
395 styler
.ColourTo(i
- 1, state
);
396 state
= SCE_SH_DEFAULT
;
400 } else if (state
== SCE_SH_WORD
) {
401 if (!iswordchar(chNext
) && chNext
!= '+' && chNext
!= '-') {
402 // "." never used in Bash variable names
403 // but used in file names
404 classifyWordBash(styler
.GetStartSegment(), i
, keywords
, styler
);
405 state
= SCE_SH_DEFAULT
;
408 } else if (state
== SCE_SH_IDENTIFIER
) {
409 if (!iswordchar(chNext
) && chNext
!= '+' && chNext
!= '-') {
410 styler
.ColourTo(i
, SCE_SH_IDENTIFIER
);
411 state
= SCE_SH_DEFAULT
;
415 if (state
== SCE_SH_COMMENTLINE
) {
416 if (ch
== '\\' && isEOLChar(chNext
)) {
417 // comment continuation
418 if (chNext
== '\r' && chNext2
== '\n') {
420 ch
= styler
.SafeGetCharAt(i
);
421 chNext
= styler
.SafeGetCharAt(i
+ 1);
427 } else if (isEOLChar(ch
)) {
428 styler
.ColourTo(i
- 1, state
);
429 state
= SCE_SH_DEFAULT
;
431 } else if (isEOLChar(chNext
)) {
432 styler
.ColourTo(i
, state
);
433 state
= SCE_SH_DEFAULT
;
435 } else if (state
== SCE_SH_HERE_DELIM
) {
439 // Specifier format is: <<[-]WORD
440 // Optional '-' is for removal of leading tabs from here-doc.
441 // Whitespace acceptable after <<[-] operator
443 if (HereDoc
.State
== 0) { // '<<' encountered
445 HereDoc
.Quote
= chNext
;
446 HereDoc
.Quoted
= false;
447 HereDoc
.DelimiterLength
= 0;
448 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
449 if (chNext
== '\'' || chNext
== '\"') { // a quoted here-doc delimiter (' or ")
453 HereDoc
.Quoted
= true;
454 } else if (!HereDoc
.Indent
&& chNext
== '-') { // <<- indent case
455 HereDoc
.Indent
= true;
457 } else if (isalpha(chNext
) || chNext
== '_' || chNext
== '\\'
458 || chNext
== '-' || chNext
== '+' || chNext
== '!') {
459 // an unquoted here-doc delimiter, no special handling
460 // TODO check what exactly bash considers part of the delim
461 } else if (chNext
== '<') { // HERE string <<<
465 styler
.ColourTo(i
, SCE_SH_HERE_DELIM
);
466 state
= SCE_SH_DEFAULT
;
468 } else if (isspacechar(chNext
)) {
471 } else if (isdigit(chNext
) || chNext
== '=' || chNext
== '$') {
472 // left shift << or <<= operator cases
473 styler
.ColourTo(i
, SCE_SH_OPERATOR
);
474 state
= SCE_SH_DEFAULT
;
477 // symbols terminates; deprecated zero-length delimiter
479 } else if (HereDoc
.State
== 1) { // collect the delimiter
480 if (HereDoc
.Quoted
) { // a quoted here-doc delimiter
481 if (ch
== HereDoc
.Quote
) { // closing quote => end of delimiter
482 styler
.ColourTo(i
, state
);
483 state
= SCE_SH_DEFAULT
;
485 if (ch
== '\\' && chNext
== HereDoc
.Quote
) { // escaped quote
490 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
++] = ch
;
491 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
493 } else { // an unquoted here-doc delimiter
494 if (isalnum(ch
) || ch
== '_' || ch
== '-' || ch
== '+' || ch
== '!') {
495 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
++] = ch
;
496 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
497 } else if (ch
== '\\') {
498 // skip escape prefix
500 styler
.ColourTo(i
- 1, state
);
501 state
= SCE_SH_DEFAULT
;
505 if (HereDoc
.DelimiterLength
>= HERE_DELIM_MAX
- 1) {
506 styler
.ColourTo(i
- 1, state
);
507 state
= SCE_SH_ERROR
;
511 } else if (HereDoc
.State
== 2) {
512 // state == SCE_SH_HERE_Q
513 if (isMatch(styler
, lengthDoc
, i
, HereDoc
.Delimiter
)) {
514 if (!HereDoc
.Indent
&& isEOLChar(chPrev
)) {
516 // standard HERE delimiter
517 i
+= HereDoc
.DelimiterLength
;
518 chPrev
= styler
.SafeGetCharAt(i
- 1);
519 ch
= styler
.SafeGetCharAt(i
);
521 styler
.ColourTo(i
- 1, state
);
522 state
= SCE_SH_DEFAULT
;
526 chNext
= styler
.SafeGetCharAt(i
+ 1);
527 } else if (HereDoc
.Indent
) {
528 // indented HERE delimiter
529 unsigned int bk
= (i
> 0)? i
- 1: 0;
531 ch
= styler
.SafeGetCharAt(bk
--);
534 } else if (!isspacechar(ch
)) {
535 break; // got leading non-whitespace
540 } else if (state
== SCE_SH_SCALAR
) { // variable names
542 if ((state
== SCE_SH_SCALAR
)
543 && i
== (styler
.GetStartSegment() + 1)) {
544 // Special variable: $(, $_ etc.
545 styler
.ColourTo(i
, state
);
546 state
= SCE_SH_DEFAULT
;
548 styler
.ColourTo(i
- 1, state
);
549 state
= SCE_SH_DEFAULT
;
553 } else if (state
== SCE_SH_STRING
554 || state
== SCE_SH_CHARACTER
555 || state
== SCE_SH_BACKTICKS
556 || state
== SCE_SH_PARAM
558 if (!Quote
.Down
&& !isspacechar(ch
)) {
560 } else if (ch
== '\\' && Quote
.Up
!= '\\') {
563 chNext
= styler
.SafeGetCharAt(i
+ 1);
564 } else if (ch
== Quote
.Down
) {
566 if (Quote
.Count
== 0) {
568 if (Quote
.Rep
<= 0) {
569 styler
.ColourTo(i
, state
);
570 state
= SCE_SH_DEFAULT
;
573 if (Quote
.Up
== Quote
.Down
) {
577 } else if (ch
== Quote
.Up
) {
582 if (state
== SCE_SH_ERROR
) {
587 styler
.ColourTo(lengthDoc
- 1, state
);
590 static bool IsCommentLine(int line
, Accessor
&styler
) {
591 int pos
= styler
.LineStart(line
);
592 int eol_pos
= styler
.LineStart(line
+ 1) - 1;
593 for (int i
= pos
; i
< eol_pos
; i
++) {
597 else if (ch
!= ' ' && ch
!= '\t')
603 static void FoldBashDoc(unsigned int startPos
, int length
, int, WordList
*[],
605 bool foldComment
= styler
.GetPropertyInt("fold.comment") != 0;
606 bool foldCompact
= styler
.GetPropertyInt("fold.compact", 1) != 0;
607 unsigned int endPos
= startPos
+ length
;
608 int visibleChars
= 0;
609 int lineCurrent
= styler
.GetLine(startPos
);
610 int levelPrev
= styler
.LevelAt(lineCurrent
) & SC_FOLDLEVELNUMBERMASK
;
611 int levelCurrent
= levelPrev
;
612 char chNext
= styler
[startPos
];
613 int styleNext
= styler
.StyleAt(startPos
);
614 for (unsigned int i
= startPos
; i
< endPos
; i
++) {
616 chNext
= styler
.SafeGetCharAt(i
+ 1);
617 int style
= styleNext
;
618 styleNext
= styler
.StyleAt(i
+ 1);
619 bool atEOL
= (ch
== '\r' && chNext
!= '\n') || (ch
== '\n');
621 if (foldComment
&& atEOL
&& IsCommentLine(lineCurrent
, styler
))
623 if (!IsCommentLine(lineCurrent
- 1, styler
)
624 && IsCommentLine(lineCurrent
+ 1, styler
))
626 else if (IsCommentLine(lineCurrent
- 1, styler
)
627 && !IsCommentLine(lineCurrent
+1, styler
))
630 if (style
== SCE_SH_OPERATOR
) {
633 } else if (ch
== '}') {
639 if (visibleChars
== 0 && foldCompact
)
640 lev
|= SC_FOLDLEVELWHITEFLAG
;
641 if ((levelCurrent
> levelPrev
) && (visibleChars
> 0))
642 lev
|= SC_FOLDLEVELHEADERFLAG
;
643 if (lev
!= styler
.LevelAt(lineCurrent
)) {
644 styler
.SetLevel(lineCurrent
, lev
);
647 levelPrev
= levelCurrent
;
650 if (!isspacechar(ch
))
653 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
654 int flagsNext
= styler
.LevelAt(lineCurrent
) & ~SC_FOLDLEVELNUMBERMASK
;
655 styler
.SetLevel(lineCurrent
, levelPrev
| flagsNext
);
658 static const char * const bashWordListDesc
[] = {
663 LexerModule
lmBash(SCLEX_BASH
, ColouriseBashDoc
, "bash", FoldBashDoc
, bashWordListDesc
);