]>
git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/lexers/LexBash.cxx
1 // Scintilla source code edit control
5 // Copyright 2004-2010 by Neil Hodgson <neilh@scintilla.org>
6 // Adapted from LexPerl by Kein-Hong Man 2004
7 // The License.txt file describes the conditions under which this software may be distributed.
16 #include "Scintilla.h"
20 #include "LexAccessor.h"
22 #include "StyleContext.h"
23 #include "CharacterSet.h"
24 #include "LexerModule.h"
27 using namespace Scintilla
;
30 #define HERE_DELIM_MAX 256
32 // define this if you want 'invalid octals' to be marked as errors
33 // usually, this is not a good idea, permissive lexing is better
36 #define BASH_BASE_ERROR 65
37 #define BASH_BASE_DECIMAL 66
38 #define BASH_BASE_HEX 67
40 #define BASH_BASE_OCTAL 68
41 #define BASH_BASE_OCTAL_ERROR 69
44 // state constants for parts of a bash command segment
45 #define BASH_CMD_BODY 0
46 #define BASH_CMD_START 1
47 #define BASH_CMD_WORD 2
48 #define BASH_CMD_TEST 3
49 #define BASH_CMD_ARITH 4
50 #define BASH_CMD_DELIM 5
52 static inline int translateBashDigit(int ch
) {
53 if (ch
>= '0' && ch
<= '9') {
55 } else if (ch
>= 'a' && ch
<= 'z') {
57 } else if (ch
>= 'A' && ch
<= 'Z') {
59 } else if (ch
== '@') {
61 } else if (ch
== '_') {
64 return BASH_BASE_ERROR
;
67 static inline int getBashNumberBase(char *s
) {
71 base
= base
* 10 + (*s
++ - '0');
74 if (base
> 64 || i
> 2) {
75 return BASH_BASE_ERROR
;
80 static int opposite(int ch
) {
81 if (ch
== '(') return ')';
82 if (ch
== '[') return ']';
83 if (ch
== '{') return '}';
84 if (ch
== '<') return '>';
88 static void ColouriseBashDoc(unsigned int startPos
, int length
, int initStyle
,
89 WordList
*keywordlists
[], Accessor
&styler
) {
91 WordList
&keywords
= *keywordlists
[0];
92 WordList cmdDelimiter
, bashStruct
, bashStruct_in
;
93 cmdDelimiter
.Set("| || |& & && ; ;; ( ) { }");
94 bashStruct
.Set("if elif fi while until else then do done esac eval");
95 bashStruct_in
.Set("for case select");
97 CharacterSet
setWordStart(CharacterSet::setAlpha
, "_");
98 // note that [+-] are often parts of identifiers in shell scripts
99 CharacterSet
setWord(CharacterSet::setAlphaNum
, "._+-");
100 CharacterSet
setBashOperator(CharacterSet::setNone
, "^&%()-+=|{}[]:;>,*/<?!.~@");
101 CharacterSet
setSingleCharOp(CharacterSet::setNone
, "rwxoRWXOezsfdlpSbctugkTBMACahGLNn");
102 CharacterSet
setParam(CharacterSet::setAlphaNum
, "$_");
103 CharacterSet
setHereDoc(CharacterSet::setAlpha
, "_\\-+!");
104 CharacterSet
setHereDoc2(CharacterSet::setAlphaNum
, "_-+!");
105 CharacterSet
setLeftShift(CharacterSet::setDigits
, "=$");
107 class HereDocCls
{ // Class to manage HERE document elements
109 int State
; // 0: '<<' encountered
110 // 1: collect the delimiter
111 // 2: here doc text (lines after the delimiter)
112 int Quote
; // the char after '<<'
113 bool Quoted
; // true if Quote in ('\'','"','`')
114 bool Indent
; // indented delimiter (for <<-)
115 int DelimiterLength
; // strlen(Delimiter)
116 char *Delimiter
; // the Delimiter, 256: sizeof PL_tokenbuf
123 Delimiter
= new char[HERE_DELIM_MAX
];
126 void Append(int ch
) {
127 Delimiter
[DelimiterLength
++] = static_cast<char>(ch
);
128 Delimiter
[DelimiterLength
] = '\0';
136 class QuoteCls
{ // Class to manage quote pairs (simplified vs LexPerl)
159 unsigned int endPos
= startPos
+ length
;
160 int cmdState
= BASH_CMD_START
;
161 int testExprType
= 0;
163 // Always backtracks to the start of a line that is not a continuation
164 // of the previous line (i.e. start of a bash command segment)
165 int ln
= styler
.GetLine(startPos
);
167 startPos
= styler
.LineStart(ln
);
168 if (ln
== 0 || styler
.GetLineState(ln
) == BASH_CMD_START
)
172 initStyle
= SCE_SH_DEFAULT
;
174 StyleContext
sc(startPos
, endPos
- startPos
, initStyle
, styler
);
176 for (; sc
.More(); sc
.Forward()) {
178 // handle line continuation, updates per-line stored state
179 if (sc
.atLineStart
) {
180 ln
= styler
.GetLine(sc
.currentPos
);
181 if (sc
.state
== SCE_SH_STRING
182 || sc
.state
== SCE_SH_BACKTICKS
183 || sc
.state
== SCE_SH_CHARACTER
184 || sc
.state
== SCE_SH_HERE_Q
185 || sc
.state
== SCE_SH_COMMENTLINE
186 || sc
.state
== SCE_SH_PARAM
) {
187 // force backtrack while retaining cmdState
188 styler
.SetLineState(ln
, BASH_CMD_BODY
);
191 if ((sc
.GetRelative(-3) == '\\' && sc
.GetRelative(-2) == '\r' && sc
.chPrev
== '\n')
192 || sc
.GetRelative(-2) == '\\') { // handle '\' line continuation
193 // retain last line's state
195 cmdState
= BASH_CMD_START
;
197 styler
.SetLineState(ln
, cmdState
);
201 // controls change of cmdState at the end of a non-whitespace element
202 // states BODY|TEST|ARITH persist until the end of a command segment
203 // state WORD persist, but ends with 'in' or 'do' construct keywords
204 int cmdStateNew
= BASH_CMD_BODY
;
205 if (cmdState
== BASH_CMD_TEST
|| cmdState
== BASH_CMD_ARITH
|| cmdState
== BASH_CMD_WORD
)
206 cmdStateNew
= cmdState
;
207 int stylePrev
= sc
.state
;
209 // Determine if the current state should terminate.
211 case SCE_SH_OPERATOR
:
212 sc
.SetState(SCE_SH_DEFAULT
);
213 if (cmdState
== BASH_CMD_DELIM
) // if command delimiter, start new command
214 cmdStateNew
= BASH_CMD_START
;
215 else if (sc
.chPrev
== '\\') // propagate command state if line continued
216 cmdStateNew
= cmdState
;
219 // "." never used in Bash variable names but used in file names
220 if (!setWord
.Contains(sc
.ch
)) {
223 sc
.GetCurrent(s
, sizeof(s
));
224 // allow keywords ending in a whitespace or command delimiter
225 s2
[0] = static_cast<char>(sc
.ch
);
227 bool keywordEnds
= IsASpace(sc
.ch
) || cmdDelimiter
.InList(s2
);
228 // 'in' or 'do' may be construct keywords
229 if (cmdState
== BASH_CMD_WORD
) {
230 if (strcmp(s
, "in") == 0 && keywordEnds
)
231 cmdStateNew
= BASH_CMD_BODY
;
232 else if (strcmp(s
, "do") == 0 && keywordEnds
)
233 cmdStateNew
= BASH_CMD_START
;
235 sc
.ChangeState(SCE_SH_IDENTIFIER
);
236 sc
.SetState(SCE_SH_DEFAULT
);
239 // a 'test' keyword starts a test expression
240 if (strcmp(s
, "test") == 0) {
241 if (cmdState
== BASH_CMD_START
&& keywordEnds
) {
242 cmdStateNew
= BASH_CMD_TEST
;
245 sc
.ChangeState(SCE_SH_IDENTIFIER
);
247 // detect bash construct keywords
248 else if (bashStruct
.InList(s
)) {
249 if (cmdState
== BASH_CMD_START
&& keywordEnds
)
250 cmdStateNew
= BASH_CMD_START
;
252 sc
.ChangeState(SCE_SH_IDENTIFIER
);
254 // 'for'|'case'|'select' needs 'in'|'do' to be highlighted later
255 else if (bashStruct_in
.InList(s
)) {
256 if (cmdState
== BASH_CMD_START
&& keywordEnds
)
257 cmdStateNew
= BASH_CMD_WORD
;
259 sc
.ChangeState(SCE_SH_IDENTIFIER
);
261 // disambiguate option items and file test operators
262 else if (s
[0] == '-') {
263 if (cmdState
!= BASH_CMD_TEST
)
264 sc
.ChangeState(SCE_SH_IDENTIFIER
);
266 // disambiguate keywords and identifiers
267 else if (cmdState
!= BASH_CMD_START
268 || !(keywords
.InList(s
) && keywordEnds
)) {
269 sc
.ChangeState(SCE_SH_IDENTIFIER
);
271 sc
.SetState(SCE_SH_DEFAULT
);
274 case SCE_SH_IDENTIFIER
:
275 if (sc
.chPrev
== '\\') { // for escaped chars
276 sc
.ForwardSetState(SCE_SH_DEFAULT
);
277 } else if (!setWord
.Contains(sc
.ch
)) {
278 sc
.SetState(SCE_SH_DEFAULT
);
282 digit
= translateBashDigit(sc
.ch
);
283 if (numBase
== BASH_BASE_DECIMAL
) {
286 sc
.GetCurrent(s
, sizeof(s
));
287 numBase
= getBashNumberBase(s
);
288 if (numBase
!= BASH_BASE_ERROR
)
290 } else if (IsADigit(sc
.ch
))
292 } else if (numBase
== BASH_BASE_HEX
) {
293 if (IsADigit(sc
.ch
, 16))
295 #ifdef PEDANTIC_OCTAL
296 } else if (numBase
== BASH_BASE_OCTAL
||
297 numBase
== BASH_BASE_OCTAL_ERROR
) {
301 numBase
= BASH_BASE_OCTAL_ERROR
;
305 } else if (numBase
== BASH_BASE_ERROR
) {
308 } else { // DD#DDDD number style handling
309 if (digit
!= BASH_BASE_ERROR
) {
311 // case-insensitive if base<=36
312 if (digit
>= 36) digit
-= 26;
317 numBase
= BASH_BASE_ERROR
;
322 // fallthrough when number is at an end or error
323 if (numBase
== BASH_BASE_ERROR
324 #ifdef PEDANTIC_OCTAL
325 || numBase
== BASH_BASE_OCTAL_ERROR
328 sc
.ChangeState(SCE_SH_ERROR
);
330 sc
.SetState(SCE_SH_DEFAULT
);
332 case SCE_SH_COMMENTLINE
:
333 if (sc
.atLineEnd
&& sc
.chPrev
!= '\\') {
334 sc
.SetState(SCE_SH_DEFAULT
);
337 case SCE_SH_HERE_DELIM
:
340 // Specifier format is: <<[-]WORD
341 // Optional '-' is for removal of leading tabs from here-doc.
342 // Whitespace acceptable after <<[-] operator
344 if (HereDoc
.State
== 0) { // '<<' encountered
345 HereDoc
.Quote
= sc
.chNext
;
346 HereDoc
.Quoted
= false;
347 HereDoc
.DelimiterLength
= 0;
348 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
349 if (sc
.chNext
== '\'' || sc
.chNext
== '\"') { // a quoted here-doc delimiter (' or ")
351 HereDoc
.Quoted
= true;
353 } else if (!HereDoc
.Indent
&& sc
.chNext
== '-') { // <<- indent case
354 HereDoc
.Indent
= true;
355 } else if (setHereDoc
.Contains(sc
.chNext
)) {
356 // an unquoted here-doc delimiter, no special handling
357 // TODO check what exactly bash considers part of the delim
359 } else if (sc
.chNext
== '<') { // HERE string <<<
361 sc
.ForwardSetState(SCE_SH_DEFAULT
);
362 } else if (IsASpace(sc
.chNext
)) {
364 } else if (setLeftShift
.Contains(sc
.chNext
)) {
365 // left shift << or <<= operator cases
366 sc
.ChangeState(SCE_SH_OPERATOR
);
367 sc
.ForwardSetState(SCE_SH_DEFAULT
);
369 // symbols terminates; deprecated zero-length delimiter
372 } else if (HereDoc
.State
== 1) { // collect the delimiter
373 if (setHereDoc2
.Contains(sc
.ch
) || sc
.chPrev
== '\\') {
374 HereDoc
.Append(sc
.ch
);
375 } else if (HereDoc
.Quoted
&& sc
.ch
== HereDoc
.Quote
) { // closing quote => end of delimiter
376 sc
.ForwardSetState(SCE_SH_DEFAULT
);
377 } else if (sc
.ch
== '\\') {
378 // skip escape prefix
380 sc
.SetState(SCE_SH_DEFAULT
);
382 if (HereDoc
.DelimiterLength
>= HERE_DELIM_MAX
- 1) { // force blowup
383 sc
.SetState(SCE_SH_ERROR
);
389 // HereDoc.State == 2
390 if (sc
.atLineStart
) {
391 sc
.SetState(SCE_SH_HERE_Q
);
393 while (IsASpace(sc
.ch
) && !sc
.atLineEnd
) { // whitespace prefix
398 sc
.SetState(SCE_SH_HERE_Q
);
399 while (!sc
.atLineEnd
) {
402 char s
[HERE_DELIM_MAX
];
403 sc
.GetCurrent(s
, sizeof(s
));
404 if (sc
.LengthCurrent() == 0)
406 if (s
[strlen(s
) - 1] == '\r')
407 s
[strlen(s
) - 1] = '\0';
408 if (strcmp(HereDoc
.Delimiter
, s
) == 0) {
409 if ((prefixws
== 0) || // indentation rule
410 (prefixws
> 0 && HereDoc
.Indent
)) {
411 sc
.SetState(SCE_SH_DEFAULT
);
417 case SCE_SH_SCALAR
: // variable names
418 if (!setParam
.Contains(sc
.ch
)) {
419 if (sc
.LengthCurrent() == 1) {
420 // Special variable: $(, $_ etc.
421 sc
.ForwardSetState(SCE_SH_DEFAULT
);
423 sc
.SetState(SCE_SH_DEFAULT
);
427 case SCE_SH_STRING
: // delimited styles
428 case SCE_SH_BACKTICKS
:
430 if (sc
.ch
== '\\' && Quote
.Up
!= '\\') {
432 } else if (sc
.ch
== Quote
.Down
) {
434 if (Quote
.Count
== 0) {
435 sc
.ForwardSetState(SCE_SH_DEFAULT
);
437 } else if (sc
.ch
== Quote
.Up
) {
441 case SCE_SH_CHARACTER
: // singly-quoted strings
442 if (sc
.ch
== Quote
.Down
) {
444 if (Quote
.Count
== 0) {
445 sc
.ForwardSetState(SCE_SH_DEFAULT
);
451 // Must check end of HereDoc state 1 before default state is handled
452 if (HereDoc
.State
== 1 && sc
.atLineEnd
) {
453 // Begin of here-doc (the line after the here-doc delimiter):
454 // Lexically, the here-doc starts from the next line after the >>, but the
455 // first line of here-doc seem to follow the style of the last EOL sequence
457 if (HereDoc
.Quoted
) {
458 if (sc
.state
== SCE_SH_HERE_DELIM
) {
459 // Missing quote at end of string! We are stricter than bash.
460 // Colour here-doc anyway while marking this bit as an error.
461 sc
.ChangeState(SCE_SH_ERROR
);
463 // HereDoc.Quote always == '\''
465 sc
.SetState(SCE_SH_HERE_Q
);
468 // update cmdState about the current command segment
469 if (stylePrev
!= SCE_SH_DEFAULT
&& sc
.state
== SCE_SH_DEFAULT
) {
470 cmdState
= cmdStateNew
;
472 // Determine if a new state should be entered.
473 if (sc
.state
== SCE_SH_DEFAULT
) {
475 // Bash can escape any non-newline as a literal
476 sc
.SetState(SCE_SH_IDENTIFIER
);
477 if (sc
.chNext
== '\r' || sc
.chNext
== '\n')
478 sc
.SetState(SCE_SH_OPERATOR
);
479 } else if (IsADigit(sc
.ch
)) {
480 sc
.SetState(SCE_SH_NUMBER
);
481 numBase
= BASH_BASE_DECIMAL
;
482 if (sc
.ch
== '0') { // hex,octal
483 if (sc
.chNext
== 'x' || sc
.chNext
== 'X') {
484 numBase
= BASH_BASE_HEX
;
486 } else if (IsADigit(sc
.chNext
)) {
487 #ifdef PEDANTIC_OCTAL
488 numBase
= BASH_BASE_OCTAL
;
490 numBase
= BASH_BASE_HEX
;
494 } else if (setWordStart
.Contains(sc
.ch
)) {
495 sc
.SetState(SCE_SH_WORD
);
496 } else if (sc
.ch
== '#') {
497 sc
.SetState(SCE_SH_COMMENTLINE
);
498 } else if (sc
.ch
== '\"') {
499 sc
.SetState(SCE_SH_STRING
);
501 } else if (sc
.ch
== '\'') {
502 sc
.SetState(SCE_SH_CHARACTER
);
504 } else if (sc
.ch
== '`') {
505 sc
.SetState(SCE_SH_BACKTICKS
);
507 } else if (sc
.ch
== '$') {
508 if (sc
.Match("$((")) {
509 sc
.SetState(SCE_SH_OPERATOR
); // handle '((' later
512 sc
.SetState(SCE_SH_SCALAR
);
515 sc
.ChangeState(SCE_SH_PARAM
);
516 } else if (sc
.ch
== '\'') {
517 sc
.ChangeState(SCE_SH_STRING
);
518 } else if (sc
.ch
== '"') {
519 sc
.ChangeState(SCE_SH_STRING
);
520 } else if (sc
.ch
== '(' || sc
.ch
== '`') {
521 sc
.ChangeState(SCE_SH_BACKTICKS
);
523 continue; // scalar has no delimiter pair
525 // fallthrough, open delim for $[{'"(`]
527 } else if (sc
.Match('<', '<')) {
528 sc
.SetState(SCE_SH_HERE_DELIM
);
530 HereDoc
.Indent
= false;
531 } else if (sc
.ch
== '-' && // one-char file test operators
532 setSingleCharOp
.Contains(sc
.chNext
) &&
533 !setWord
.Contains(sc
.GetRelative(2)) &&
534 IsASpace(sc
.chPrev
)) {
535 sc
.SetState(SCE_SH_WORD
);
537 } else if (setBashOperator
.Contains(sc
.ch
)) {
539 bool isCmdDelim
= false;
540 sc
.SetState(SCE_SH_OPERATOR
);
541 // handle opening delimiters for test/arithmetic expressions - ((,[[,[
542 if (cmdState
== BASH_CMD_START
543 || cmdState
== BASH_CMD_BODY
) {
544 if (sc
.Match('(', '(')) {
545 cmdState
= BASH_CMD_ARITH
;
547 } else if (sc
.Match('[', '[') && IsASpace(sc
.GetRelative(2))) {
548 cmdState
= BASH_CMD_TEST
;
551 } else if (sc
.ch
== '[' && IsASpace(sc
.chNext
)) {
552 cmdState
= BASH_CMD_TEST
;
556 // special state -- for ((x;y;z)) in ... looping
557 if (cmdState
== BASH_CMD_WORD
&& sc
.Match('(', '(')) {
558 cmdState
= BASH_CMD_ARITH
;
562 // handle command delimiters in command START|BODY|WORD state, also TEST if 'test'
563 if (cmdState
== BASH_CMD_START
564 || cmdState
== BASH_CMD_BODY
565 || cmdState
== BASH_CMD_WORD
566 || (cmdState
== BASH_CMD_TEST
&& testExprType
== 0)) {
567 s
[0] = static_cast<char>(sc
.ch
);
568 if (setBashOperator
.Contains(sc
.chNext
)) {
569 s
[1] = static_cast<char>(sc
.chNext
);
571 isCmdDelim
= cmdDelimiter
.InList(s
);
577 isCmdDelim
= cmdDelimiter
.InList(s
);
580 cmdState
= BASH_CMD_DELIM
;
584 // handle closing delimiters for test/arithmetic expressions - )),]],]
585 if (cmdState
== BASH_CMD_ARITH
&& sc
.Match(')', ')')) {
586 cmdState
= BASH_CMD_BODY
;
588 } else if (cmdState
== BASH_CMD_TEST
&& IsASpace(sc
.chPrev
)) {
589 if (sc
.Match(']', ']') && testExprType
== 1) {
591 cmdState
= BASH_CMD_BODY
;
592 } else if (sc
.ch
== ']' && testExprType
== 2) {
593 cmdState
= BASH_CMD_BODY
;
602 static bool IsCommentLine(int line
, Accessor
&styler
) {
603 int pos
= styler
.LineStart(line
);
604 int eol_pos
= styler
.LineStart(line
+ 1) - 1;
605 for (int i
= pos
; i
< eol_pos
; i
++) {
609 else if (ch
!= ' ' && ch
!= '\t')
615 static void FoldBashDoc(unsigned int startPos
, int length
, int, WordList
*[],
617 bool foldComment
= styler
.GetPropertyInt("fold.comment") != 0;
618 bool foldCompact
= styler
.GetPropertyInt("fold.compact", 1) != 0;
619 unsigned int endPos
= startPos
+ length
;
620 int visibleChars
= 0;
621 int lineCurrent
= styler
.GetLine(startPos
);
622 int levelPrev
= styler
.LevelAt(lineCurrent
) & SC_FOLDLEVELNUMBERMASK
;
623 int levelCurrent
= levelPrev
;
624 char chNext
= styler
[startPos
];
625 int styleNext
= styler
.StyleAt(startPos
);
626 for (unsigned int i
= startPos
; i
< endPos
; i
++) {
628 chNext
= styler
.SafeGetCharAt(i
+ 1);
629 int style
= styleNext
;
630 styleNext
= styler
.StyleAt(i
+ 1);
631 bool atEOL
= (ch
== '\r' && chNext
!= '\n') || (ch
== '\n');
633 if (foldComment
&& atEOL
&& IsCommentLine(lineCurrent
, styler
))
635 if (!IsCommentLine(lineCurrent
- 1, styler
)
636 && IsCommentLine(lineCurrent
+ 1, styler
))
638 else if (IsCommentLine(lineCurrent
- 1, styler
)
639 && !IsCommentLine(lineCurrent
+ 1, styler
))
642 if (style
== SCE_SH_OPERATOR
) {
645 } else if (ch
== '}') {
649 // Here Document folding
650 if (style
== SCE_SH_HERE_DELIM
) {
651 if (ch
== '<' && chNext
== '<') {
654 } else if (style
== SCE_SH_HERE_Q
&& styler
.StyleAt(i
+1) == SCE_PL_DEFAULT
) {
659 if (visibleChars
== 0 && foldCompact
)
660 lev
|= SC_FOLDLEVELWHITEFLAG
;
661 if ((levelCurrent
> levelPrev
) && (visibleChars
> 0))
662 lev
|= SC_FOLDLEVELHEADERFLAG
;
663 if (lev
!= styler
.LevelAt(lineCurrent
)) {
664 styler
.SetLevel(lineCurrent
, lev
);
667 levelPrev
= levelCurrent
;
670 if (!isspacechar(ch
))
673 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
674 int flagsNext
= styler
.LevelAt(lineCurrent
) & ~SC_FOLDLEVELNUMBERMASK
;
675 styler
.SetLevel(lineCurrent
, levelPrev
| flagsNext
);
678 static const char * const bashWordListDesc
[] = {
683 LexerModule
lmBash(SCLEX_BASH
, ColouriseBashDoc
, "bash", FoldBashDoc
, bashWordListDesc
);