]>
git.saurik.com Git - wxWidgets.git/blob - contrib/src/stc/scintilla/src/LexBash.cxx
903d793bb0b7b5551cbbcabe39eda665ffb083f1
1 // Scintilla source code edit control
5 // Copyright 2004-2005 by Neil Hodgson <neilh@scintilla.org>
6 // Adapted from LexPerl by Kein-Hong Man <mkh@pl.jaring.my> 2004
7 // The License.txt file describes the conditions under which this software may be distributed.
20 #include "Scintilla.h"
23 #define BASH_BASE_ERROR 65
24 #define BASH_BASE_DECIMAL 66
25 #define BASH_BASE_HEX 67
26 #define BASH_BASE_OCTAL 68
27 #define BASH_BASE_OCTAL_ERROR 69
29 #define HERE_DELIM_MAX 256
31 static inline int translateBashDigit(char ch
) {
32 if (ch
>= '0' && ch
<= '9') {
34 } else if (ch
>= 'a' && ch
<= 'z') {
36 } else if (ch
>= 'A' && ch
<= 'Z') {
38 } else if (ch
== '@') {
40 } else if (ch
== '_') {
43 return BASH_BASE_ERROR
;
46 static inline bool isEOLChar(char ch
) {
47 return (ch
== '\r') || (ch
== '\n');
50 static bool isSingleCharOp(char ch
) {
54 return (NULL
!= strstr("rwxoRWXOezsfdlpSbctugkTBMACahGLNn", strCharSet
));
57 static inline bool isBashOperator(char ch
) {
58 if (ch
== '^' || ch
== '&' || ch
== '\\' || ch
== '%' ||
59 ch
== '(' || ch
== ')' || ch
== '-' || ch
== '+' ||
60 ch
== '=' || ch
== '|' || ch
== '{' || ch
== '}' ||
61 ch
== '[' || ch
== ']' || ch
== ':' || ch
== ';' ||
62 ch
== '>' || ch
== ',' || ch
== '/' || ch
== '<' ||
63 ch
== '?' || ch
== '!' || ch
== '.' || ch
== '~' ||
69 static int classifyWordBash(unsigned int start
, unsigned int end
, WordList
&keywords
, Accessor
&styler
) {
71 for (unsigned int i
= 0; i
< end
- start
+ 1 && i
< 30; i
++) {
72 s
[i
] = styler
[start
+ i
];
75 char chAttr
= SCE_SH_IDENTIFIER
;
76 if (keywords
.InList(s
))
78 styler
.ColourTo(end
, chAttr
);
82 static inline int getBashNumberBase(unsigned int start
, unsigned int end
, Accessor
&styler
) {
84 for (unsigned int i
= 0; i
< end
- start
+ 1 && i
< 10; i
++) {
85 base
= base
* 10 + (styler
[start
+ i
] - '0');
87 if (base
> 64 || (end
- start
) > 1) {
88 return BASH_BASE_ERROR
;
93 static inline bool isEndVar(char ch
) {
94 return !isalnum(ch
) && ch
!= '$' && ch
!= '_';
97 static inline bool isNonQuote(char ch
) {
98 return isalnum(ch
) || ch
== '_';
101 static bool isMatch(Accessor
&styler
, int lengthDoc
, int pos
, const char *val
) {
102 if ((pos
+ static_cast<int>(strlen(val
))) >= lengthDoc
) {
106 if (*val
!= styler
[pos
++]) {
114 static char opposite(char ch
) {
126 static void ColouriseBashDoc(unsigned int startPos
, int length
, int initStyle
,
127 WordList
*keywordlists
[], Accessor
&styler
) {
129 // Lexer for bash often has to backtrack to start of current style to determine
130 // which characters are being used as quotes, how deeply nested is the
131 // start position and what the termination string is for here documents
133 WordList
&keywords
= *keywordlists
[0];
137 int State
; // 0: '<<' encountered
138 // 1: collect the delimiter
139 // 2: here doc text (lines after the delimiter)
140 char Quote
; // the char after '<<'
141 bool Quoted
; // true if Quote in ('\'','"','`')
142 bool Indent
; // indented delimiter (for <<-)
143 int DelimiterLength
; // strlen(Delimiter)
144 char *Delimiter
; // the Delimiter, 256: sizeof PL_tokenbuf
151 Delimiter
= new char[HERE_DELIM_MAX
];
183 int state
= initStyle
;
185 unsigned int lengthDoc
= startPos
+ length
;
187 // If in a long distance lexical state, seek to the beginning to find quote characters
188 // Bash strings can be multi-line with embedded newlines, so backtrack.
189 // Bash numbers have additional state during lexing, so backtrack too.
190 if (state
== SCE_SH_HERE_Q
) {
191 while ((startPos
> 1) && (styler
.StyleAt(startPos
) != SCE_SH_HERE_DELIM
)) {
194 startPos
= styler
.LineStart(styler
.GetLine(startPos
));
195 state
= styler
.StyleAt(startPos
- 1);
197 if (state
== SCE_SH_STRING
198 || state
== SCE_SH_BACKTICKS
199 || state
== SCE_SH_CHARACTER
200 || state
== SCE_SH_NUMBER
201 || state
== SCE_SH_IDENTIFIER
202 || state
== SCE_SH_COMMENTLINE
204 while ((startPos
> 1) && (styler
.StyleAt(startPos
- 1) == state
)) {
207 state
= SCE_SH_DEFAULT
;
210 styler
.StartAt(startPos
);
211 char chPrev
= styler
.SafeGetCharAt(startPos
- 1);
214 char chNext
= styler
[startPos
];
215 styler
.StartSegment(startPos
);
217 for (unsigned int i
= startPos
; i
< lengthDoc
; i
++) {
219 // if the current character is not consumed due to the completion of an
220 // earlier style, lexing can be restarted via a simple goto
222 chNext
= styler
.SafeGetCharAt(i
+ 1);
223 char chNext2
= styler
.SafeGetCharAt(i
+ 2);
225 if (styler
.IsLeadByte(ch
)) {
226 chNext
= styler
.SafeGetCharAt(i
+ 2);
232 if ((chPrev
== '\r' && ch
== '\n')) { // skip on DOS/Windows
233 styler
.ColourTo(i
, state
);
238 if (HereDoc
.State
== 1 && isEOLChar(ch
)) {
239 // Begin of here-doc (the line after the here-doc delimiter):
240 // Lexically, the here-doc starts from the next line after the >>, but the
241 // first line of here-doc seem to follow the style of the last EOL sequence
243 if (HereDoc
.Quoted
) {
244 if (state
== SCE_SH_HERE_DELIM
) {
245 // Missing quote at end of string! We are stricter than bash.
246 // Colour here-doc anyway while marking this bit as an error.
247 state
= SCE_SH_ERROR
;
249 styler
.ColourTo(i
- 1, state
);
250 // HereDoc.Quote always == '\''
251 state
= SCE_SH_HERE_Q
;
253 styler
.ColourTo(i
- 1, state
);
255 state
= SCE_SH_HERE_Q
;
259 if (state
== SCE_SH_DEFAULT
) {
260 if (ch
== '\\') { // escaped character
264 styler
.ColourTo(i
, SCE_SH_IDENTIFIER
);
265 } else if (isdigit(ch
)) {
266 state
= SCE_SH_NUMBER
;
267 numBase
= BASH_BASE_DECIMAL
;
268 if (ch
== '0') { // hex,octal
269 if (chNext
== 'x' || chNext
== 'X') {
270 numBase
= BASH_BASE_HEX
;
274 } else if (isdigit(chNext
)) {
275 numBase
= BASH_BASE_OCTAL
;
278 } else if (iswordstart(ch
)) {
280 if (!iswordchar(chNext
) && chNext
!= '+' && chNext
!= '-') {
281 // We need that if length of word == 1!
282 // This test is copied from the SCE_SH_WORD handler.
283 classifyWordBash(styler
.GetStartSegment(), i
, keywords
, styler
);
284 state
= SCE_SH_DEFAULT
;
286 } else if (ch
== '#') {
287 state
= SCE_SH_COMMENTLINE
;
288 } else if (ch
== '\"') {
289 state
= SCE_SH_STRING
;
292 } else if (ch
== '\'') {
293 state
= SCE_SH_CHARACTER
;
296 } else if (ch
== '`') {
297 state
= SCE_SH_BACKTICKS
;
300 } else if (ch
== '$') {
302 state
= SCE_SH_PARAM
;
304 } else if (chNext
== '\'') {
305 state
= SCE_SH_CHARACTER
;
307 } else if (chNext
== '"') {
308 state
= SCE_SH_STRING
;
310 } else if (chNext
== '(' && chNext2
== '(') {
311 styler
.ColourTo(i
, SCE_SH_OPERATOR
);
312 state
= SCE_SH_DEFAULT
;
314 } else if (chNext
== '(' || chNext
== '`') {
315 state
= SCE_SH_BACKTICKS
;
321 state
= SCE_SH_SCALAR
;
327 } else if (ch
== '*') {
328 if (chNext
== '*') { // exponentiation
333 styler
.ColourTo(i
, SCE_SH_OPERATOR
);
334 } else if (ch
== '<' && chNext
== '<') {
335 state
= SCE_SH_HERE_DELIM
;
337 HereDoc
.Indent
= false;
338 } else if (ch
== '-' // file test operators
339 && isSingleCharOp(chNext
)
340 && !isalnum((chNext2
= styler
.SafeGetCharAt(i
+2)))) {
341 styler
.ColourTo(i
+ 1, SCE_SH_WORD
);
342 state
= SCE_SH_DEFAULT
;
346 } else if (isBashOperator(ch
)) {
347 styler
.ColourTo(i
, SCE_SH_OPERATOR
);
349 // keep colouring defaults to make restart easier
350 styler
.ColourTo(i
, SCE_SH_DEFAULT
);
352 } else if (state
== SCE_SH_NUMBER
) {
353 int digit
= translateBashDigit(ch
);
354 if (numBase
== BASH_BASE_DECIMAL
) {
356 numBase
= getBashNumberBase(styler
.GetStartSegment(), i
- 1, styler
);
357 if (numBase
== BASH_BASE_ERROR
) // take the rest as comment
359 } else if (!isdigit(ch
))
361 } else if (numBase
== BASH_BASE_HEX
) {
362 if ((digit
< 16) || (digit
>= 36 && digit
<= 41)) {
363 // hex digit 0-9a-fA-F
366 } else if (numBase
== BASH_BASE_OCTAL
||
367 numBase
== BASH_BASE_OCTAL_ERROR
) {
370 numBase
= BASH_BASE_OCTAL_ERROR
;
374 } else if (numBase
== BASH_BASE_ERROR
) {
377 } else { // DD#DDDD number style handling
378 if (digit
!= BASH_BASE_ERROR
) {
380 // case-insensitive if base<=36
381 if (digit
>= 36) digit
-= 26;
383 if (digit
>= numBase
) {
385 numBase
= BASH_BASE_ERROR
;
391 if (numBase
== BASH_BASE_ERROR
||
392 numBase
== BASH_BASE_OCTAL_ERROR
)
393 state
= SCE_SH_ERROR
;
394 styler
.ColourTo(i
- 1, state
);
395 state
= SCE_SH_DEFAULT
;
399 } else if (state
== SCE_SH_WORD
) {
400 if (!iswordchar(chNext
) && chNext
!= '+' && chNext
!= '-') {
401 // "." never used in Bash variable names
402 // but used in file names
403 classifyWordBash(styler
.GetStartSegment(), i
, keywords
, styler
);
404 state
= SCE_SH_DEFAULT
;
407 } else if (state
== SCE_SH_IDENTIFIER
) {
408 if (!iswordchar(chNext
) && chNext
!= '+' && chNext
!= '-') {
409 styler
.ColourTo(i
, SCE_SH_IDENTIFIER
);
410 state
= SCE_SH_DEFAULT
;
414 if (state
== SCE_SH_COMMENTLINE
) {
415 if (ch
== '\\' && isEOLChar(chNext
)) {
416 // comment continuation
417 if (chNext
== '\r' && chNext2
== '\n') {
419 ch
= styler
.SafeGetCharAt(i
);
420 chNext
= styler
.SafeGetCharAt(i
+ 1);
426 } else if (isEOLChar(ch
)) {
427 styler
.ColourTo(i
- 1, state
);
428 state
= SCE_SH_DEFAULT
;
430 } else if (isEOLChar(chNext
)) {
431 styler
.ColourTo(i
, state
);
432 state
= SCE_SH_DEFAULT
;
434 } else if (state
== SCE_SH_HERE_DELIM
) {
438 // Specifier format is: <<[-]WORD
439 // Optional '-' is for removal of leading tabs from here-doc.
440 // Whitespace acceptable after <<[-] operator
442 if (HereDoc
.State
== 0) { // '<<' encountered
444 HereDoc
.Quote
= chNext
;
445 HereDoc
.Quoted
= false;
446 HereDoc
.DelimiterLength
= 0;
447 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
448 if (chNext
== '\'' || chNext
== '\"') { // a quoted here-doc delimiter (' or ")
452 HereDoc
.Quoted
= true;
453 } else if (!HereDoc
.Indent
&& chNext
== '-') { // <<- indent case
454 HereDoc
.Indent
= true;
456 } else if (isalpha(chNext
) || chNext
== '_' || chNext
== '\\'
457 || chNext
== '-' || chNext
== '+' || chNext
== '!') {
458 // an unquoted here-doc delimiter, no special handling
459 // TODO check what exactly bash considers part of the delim
460 } else if (chNext
== '<') { // HERE string <<<
464 styler
.ColourTo(i
, SCE_SH_HERE_DELIM
);
465 state
= SCE_SH_DEFAULT
;
467 } else if (isspacechar(chNext
)) {
470 } else if (isdigit(chNext
) || chNext
== '=' || chNext
== '$') {
471 // left shift << or <<= operator cases
472 styler
.ColourTo(i
, SCE_SH_OPERATOR
);
473 state
= SCE_SH_DEFAULT
;
476 // symbols terminates; deprecated zero-length delimiter
478 } else if (HereDoc
.State
== 1) { // collect the delimiter
479 if (HereDoc
.Quoted
) { // a quoted here-doc delimiter
480 if (ch
== HereDoc
.Quote
) { // closing quote => end of delimiter
481 styler
.ColourTo(i
, state
);
482 state
= SCE_SH_DEFAULT
;
484 if (ch
== '\\' && chNext
== HereDoc
.Quote
) { // escaped quote
489 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
++] = ch
;
490 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
492 } else { // an unquoted here-doc delimiter
493 if (isalnum(ch
) || ch
== '_' || ch
== '-' || ch
== '+' || ch
== '!') {
494 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
++] = ch
;
495 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
496 } else if (ch
== '\\') {
497 // skip escape prefix
499 styler
.ColourTo(i
- 1, state
);
500 state
= SCE_SH_DEFAULT
;
504 if (HereDoc
.DelimiterLength
>= HERE_DELIM_MAX
- 1) {
505 styler
.ColourTo(i
- 1, state
);
506 state
= SCE_SH_ERROR
;
510 } else if (HereDoc
.State
== 2) {
511 // state == SCE_SH_HERE_Q
512 if (isMatch(styler
, lengthDoc
, i
, HereDoc
.Delimiter
)) {
513 if (!HereDoc
.Indent
&& isEOLChar(chPrev
)) {
515 // standard HERE delimiter
516 i
+= HereDoc
.DelimiterLength
;
517 chPrev
= styler
.SafeGetCharAt(i
- 1);
518 ch
= styler
.SafeGetCharAt(i
);
520 styler
.ColourTo(i
- 1, state
);
521 state
= SCE_SH_DEFAULT
;
525 chNext
= styler
.SafeGetCharAt(i
+ 1);
526 } else if (HereDoc
.Indent
) {
527 // indented HERE delimiter
528 unsigned int bk
= (i
> 0)? i
- 1: 0;
530 ch
= styler
.SafeGetCharAt(bk
--);
533 } else if (!isspacechar(ch
)) {
534 break; // got leading non-whitespace
539 } else if (state
== SCE_SH_SCALAR
) { // variable names
541 if ((state
== SCE_SH_SCALAR
)
542 && i
== (styler
.GetStartSegment() + 1)) {
543 // Special variable: $(, $_ etc.
544 styler
.ColourTo(i
, state
);
545 state
= SCE_SH_DEFAULT
;
547 styler
.ColourTo(i
- 1, state
);
548 state
= SCE_SH_DEFAULT
;
552 } else if (state
== SCE_SH_STRING
553 || state
== SCE_SH_CHARACTER
554 || state
== SCE_SH_BACKTICKS
555 || state
== SCE_SH_PARAM
557 if (!Quote
.Down
&& !isspacechar(ch
)) {
559 } else if (ch
== '\\' && Quote
.Up
!= '\\') {
562 chNext
= styler
.SafeGetCharAt(i
+ 1);
563 } else if (ch
== Quote
.Down
) {
565 if (Quote
.Count
== 0) {
567 if (Quote
.Rep
<= 0) {
568 styler
.ColourTo(i
, state
);
569 state
= SCE_SH_DEFAULT
;
572 if (Quote
.Up
== Quote
.Down
) {
576 } else if (ch
== Quote
.Up
) {
581 if (state
== SCE_SH_ERROR
) {
586 styler
.ColourTo(lengthDoc
- 1, state
);
589 static bool IsCommentLine(int line
, Accessor
&styler
) {
590 int pos
= styler
.LineStart(line
);
591 int eol_pos
= styler
.LineStart(line
+ 1) - 1;
592 for (int i
= pos
; i
< eol_pos
; i
++) {
596 else if (ch
!= ' ' && ch
!= '\t')
602 static void FoldBashDoc(unsigned int startPos
, int length
, int, WordList
*[],
604 bool foldComment
= styler
.GetPropertyInt("fold.comment") != 0;
605 bool foldCompact
= styler
.GetPropertyInt("fold.compact", 1) != 0;
606 unsigned int endPos
= startPos
+ length
;
607 int visibleChars
= 0;
608 int lineCurrent
= styler
.GetLine(startPos
);
609 int levelPrev
= styler
.LevelAt(lineCurrent
) & SC_FOLDLEVELNUMBERMASK
;
610 int levelCurrent
= levelPrev
;
611 char chNext
= styler
[startPos
];
612 int styleNext
= styler
.StyleAt(startPos
);
613 for (unsigned int i
= startPos
; i
< endPos
; i
++) {
615 chNext
= styler
.SafeGetCharAt(i
+ 1);
616 int style
= styleNext
;
617 styleNext
= styler
.StyleAt(i
+ 1);
618 bool atEOL
= (ch
== '\r' && chNext
!= '\n') || (ch
== '\n');
620 if (foldComment
&& atEOL
&& IsCommentLine(lineCurrent
, styler
))
622 if (!IsCommentLine(lineCurrent
- 1, styler
)
623 && IsCommentLine(lineCurrent
+ 1, styler
))
625 else if (IsCommentLine(lineCurrent
- 1, styler
)
626 && !IsCommentLine(lineCurrent
+1, styler
))
629 if (style
== SCE_C_OPERATOR
) {
632 } else if (ch
== '}') {
638 if (visibleChars
== 0 && foldCompact
)
639 lev
|= SC_FOLDLEVELWHITEFLAG
;
640 if ((levelCurrent
> levelPrev
) && (visibleChars
> 0))
641 lev
|= SC_FOLDLEVELHEADERFLAG
;
642 if (lev
!= styler
.LevelAt(lineCurrent
)) {
643 styler
.SetLevel(lineCurrent
, lev
);
646 levelPrev
= levelCurrent
;
649 if (!isspacechar(ch
))
652 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
653 int flagsNext
= styler
.LevelAt(lineCurrent
) & ~SC_FOLDLEVELNUMBERMASK
;
654 styler
.SetLevel(lineCurrent
, levelPrev
| flagsNext
);
657 static const char * const bashWordListDesc
[] = {
662 LexerModule
lmBash(SCLEX_BASH
, ColouriseBashDoc
, "bash", FoldBashDoc
, bashWordListDesc
);