]>
git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/src/LexBash.cxx
f0376b9474b010b093a0cb637da72b2f2e208898
1 // Scintilla source code edit control
5 // Copyright 2004-2007 by Neil Hodgson <neilh@scintilla.org>
6 // Adapted from LexPerl by Kein-Hong Man <mkh@pl.jaring.my> 2004
7 // The License.txt file describes the conditions under which this software may be distributed.
20 #include "Scintilla.h"
23 // define this if you want 'invalid octals' to be marked as errors
24 // usually, this is not a good idea, permissive lexing is better
27 #define BASH_BASE_ERROR 65
28 #define BASH_BASE_DECIMAL 66
29 #define BASH_BASE_HEX 67
31 #define BASH_BASE_OCTAL 68
32 #define BASH_BASE_OCTAL_ERROR 69
35 #define HERE_DELIM_MAX 256
38 using namespace Scintilla
;
41 static inline int translateBashDigit(char ch
) {
42 if (ch
>= '0' && ch
<= '9') {
44 } else if (ch
>= 'a' && ch
<= 'z') {
46 } else if (ch
>= 'A' && ch
<= 'Z') {
48 } else if (ch
== '@') {
50 } else if (ch
== '_') {
53 return BASH_BASE_ERROR
;
56 static inline bool isEOLChar(char ch
) {
57 return (ch
== '\r') || (ch
== '\n');
60 static bool isSingleCharOp(char ch
) {
64 return (NULL
!= strstr("rwxoRWXOezsfdlpSbctugkTBMACahGLNn", strCharSet
));
67 static inline bool isBashOperator(char ch
) {
68 if (ch
== '^' || ch
== '&' || ch
== '\\' || ch
== '%' ||
69 ch
== '(' || ch
== ')' || ch
== '-' || ch
== '+' ||
70 ch
== '=' || ch
== '|' || ch
== '{' || ch
== '}' ||
71 ch
== '[' || ch
== ']' || ch
== ':' || ch
== ';' ||
72 ch
== '>' || ch
== ',' || ch
== '/' || ch
== '<' ||
73 ch
== '?' || ch
== '!' || ch
== '.' || ch
== '~' ||
79 static int classifyWordBash(unsigned int start
, unsigned int end
, WordList
&keywords
, Accessor
&styler
) {
81 for (unsigned int i
= 0; i
< end
- start
+ 1 && i
< 30; i
++) {
82 s
[i
] = styler
[start
+ i
];
85 char chAttr
= SCE_SH_IDENTIFIER
;
86 if (keywords
.InList(s
))
88 styler
.ColourTo(end
, chAttr
);
92 static inline int getBashNumberBase(unsigned int start
, unsigned int end
, Accessor
&styler
) {
94 for (unsigned int i
= 0; i
< end
- start
+ 1 && i
< 10; i
++) {
95 base
= base
* 10 + (styler
[start
+ i
] - '0');
97 if (base
> 64 || (end
- start
) > 1) {
98 return BASH_BASE_ERROR
;
103 static inline bool isEndVar(char ch
) {
104 return !isalnum(ch
) && ch
!= '$' && ch
!= '_';
107 static inline bool isNonQuote(char ch
) {
108 return isalnum(ch
) || ch
== '_';
111 static bool isMatch(Accessor
&styler
, int lengthDoc
, int pos
, const char *val
) {
112 if ((pos
+ static_cast<int>(strlen(val
))) >= lengthDoc
) {
116 if (*val
!= styler
[pos
++]) {
124 static char opposite(char ch
) {
136 static void ColouriseBashDoc(unsigned int startPos
, int length
, int initStyle
,
137 WordList
*keywordlists
[], Accessor
&styler
) {
139 // Lexer for bash often has to backtrack to start of current style to determine
140 // which characters are being used as quotes, how deeply nested is the
141 // start position and what the termination string is for here documents
143 WordList
&keywords
= *keywordlists
[0];
147 int State
; // 0: '<<' encountered
148 // 1: collect the delimiter
149 // 2: here doc text (lines after the delimiter)
150 char Quote
; // the char after '<<'
151 bool Quoted
; // true if Quote in ('\'','"','`')
152 bool Indent
; // indented delimiter (for <<-)
153 int DelimiterLength
; // strlen(Delimiter)
154 char *Delimiter
; // the Delimiter, 256: sizeof PL_tokenbuf
161 Delimiter
= new char[HERE_DELIM_MAX
];
193 int state
= initStyle
;
195 unsigned int lengthDoc
= startPos
+ length
;
197 // If in a long distance lexical state, seek to the beginning to find quote characters
198 // Bash strings can be multi-line with embedded newlines, so backtrack.
199 // Bash numbers have additional state during lexing, so backtrack too.
200 if (state
== SCE_SH_HERE_Q
) {
201 while ((startPos
> 1) && (styler
.StyleAt(startPos
) != SCE_SH_HERE_DELIM
)) {
204 startPos
= styler
.LineStart(styler
.GetLine(startPos
));
205 state
= styler
.StyleAt(startPos
- 1);
207 if (state
== SCE_SH_STRING
208 || state
== SCE_SH_BACKTICKS
209 || state
== SCE_SH_CHARACTER
210 || state
== SCE_SH_NUMBER
211 || state
== SCE_SH_IDENTIFIER
212 || state
== SCE_SH_COMMENTLINE
214 while ((startPos
> 1) && (styler
.StyleAt(startPos
- 1) == state
)) {
217 state
= SCE_SH_DEFAULT
;
220 styler
.StartAt(startPos
);
221 char chPrev
= styler
.SafeGetCharAt(startPos
- 1);
224 char chNext
= styler
[startPos
];
225 styler
.StartSegment(startPos
);
227 for (unsigned int i
= startPos
; i
< lengthDoc
; i
++) {
229 // if the current character is not consumed due to the completion of an
230 // earlier style, lexing can be restarted via a simple goto
232 chNext
= styler
.SafeGetCharAt(i
+ 1);
233 char chNext2
= styler
.SafeGetCharAt(i
+ 2);
235 if (styler
.IsLeadByte(ch
)) {
236 chNext
= styler
.SafeGetCharAt(i
+ 2);
242 if ((chPrev
== '\r' && ch
== '\n')) { // skip on DOS/Windows
243 styler
.ColourTo(i
, state
);
248 if (HereDoc
.State
== 1 && isEOLChar(ch
)) {
249 // Begin of here-doc (the line after the here-doc delimiter):
250 // Lexically, the here-doc starts from the next line after the >>, but the
251 // first line of here-doc seem to follow the style of the last EOL sequence
253 if (HereDoc
.Quoted
) {
254 if (state
== SCE_SH_HERE_DELIM
) {
255 // Missing quote at end of string! We are stricter than bash.
256 // Colour here-doc anyway while marking this bit as an error.
257 state
= SCE_SH_ERROR
;
259 styler
.ColourTo(i
- 1, state
);
260 // HereDoc.Quote always == '\''
261 state
= SCE_SH_HERE_Q
;
263 styler
.ColourTo(i
- 1, state
);
265 state
= SCE_SH_HERE_Q
;
269 if (state
== SCE_SH_DEFAULT
) {
270 if (ch
== '\\') { // escaped character
271 if (i
< lengthDoc
- 1)
275 styler
.ColourTo(i
, SCE_SH_IDENTIFIER
);
276 } else if (isdigit(ch
)) {
277 state
= SCE_SH_NUMBER
;
278 numBase
= BASH_BASE_DECIMAL
;
279 if (ch
== '0') { // hex,octal
280 if (chNext
== 'x' || chNext
== 'X') {
281 numBase
= BASH_BASE_HEX
;
285 } else if (isdigit(chNext
)) {
286 #ifdef PEDANTIC_OCTAL
287 numBase
= BASH_BASE_OCTAL
;
289 numBase
= BASH_BASE_HEX
;
293 } else if (iswordstart(ch
)) {
295 if (!iswordchar(chNext
) && chNext
!= '+' && chNext
!= '-') {
296 // We need that if length of word == 1!
297 // This test is copied from the SCE_SH_WORD handler.
298 classifyWordBash(styler
.GetStartSegment(), i
, keywords
, styler
);
299 state
= SCE_SH_DEFAULT
;
301 } else if (ch
== '#') {
302 state
= SCE_SH_COMMENTLINE
;
303 } else if (ch
== '\"') {
304 state
= SCE_SH_STRING
;
307 } else if (ch
== '\'') {
308 state
= SCE_SH_CHARACTER
;
311 } else if (ch
== '`') {
312 state
= SCE_SH_BACKTICKS
;
315 } else if (ch
== '$') {
317 state
= SCE_SH_PARAM
;
319 } else if (chNext
== '\'') {
320 state
= SCE_SH_CHARACTER
;
322 } else if (chNext
== '"') {
323 state
= SCE_SH_STRING
;
325 } else if (chNext
== '(' && chNext2
== '(') {
326 styler
.ColourTo(i
, SCE_SH_OPERATOR
);
327 state
= SCE_SH_DEFAULT
;
329 } else if (chNext
== '(' || chNext
== '`') {
330 state
= SCE_SH_BACKTICKS
;
336 state
= SCE_SH_SCALAR
;
342 } else if (ch
== '*') {
343 if (chNext
== '*') { // exponentiation
348 styler
.ColourTo(i
, SCE_SH_OPERATOR
);
349 } else if (ch
== '<' && chNext
== '<') {
350 state
= SCE_SH_HERE_DELIM
;
352 HereDoc
.Indent
= false;
353 } else if (ch
== '-' // file test operators
354 && isSingleCharOp(chNext
)
355 && !isalnum((chNext2
= styler
.SafeGetCharAt(i
+2)))
356 && isspace(chPrev
)) {
357 styler
.ColourTo(i
+ 1, SCE_SH_WORD
);
358 state
= SCE_SH_DEFAULT
;
362 } else if (isBashOperator(ch
)) {
363 styler
.ColourTo(i
, SCE_SH_OPERATOR
);
365 // keep colouring defaults to make restart easier
366 styler
.ColourTo(i
, SCE_SH_DEFAULT
);
368 } else if (state
== SCE_SH_NUMBER
) {
369 int digit
= translateBashDigit(ch
);
370 if (numBase
== BASH_BASE_DECIMAL
) {
372 numBase
= getBashNumberBase(styler
.GetStartSegment(), i
- 1, styler
);
373 if (numBase
== BASH_BASE_ERROR
) // take the rest as comment
375 } else if (!isdigit(ch
))
377 } else if (numBase
== BASH_BASE_HEX
) {
378 if ((digit
< 16) || (digit
>= 36 && digit
<= 41)) {
379 // hex digit 0-9a-fA-F
382 #ifdef PEDANTIC_OCTAL
383 } else if (numBase
== BASH_BASE_OCTAL
||
384 numBase
== BASH_BASE_OCTAL_ERROR
) {
387 numBase
= BASH_BASE_OCTAL_ERROR
;
392 } else if (numBase
== BASH_BASE_ERROR
) {
395 } else { // DD#DDDD number style handling
396 if (digit
!= BASH_BASE_ERROR
) {
398 // case-insensitive if base<=36
399 if (digit
>= 36) digit
-= 26;
401 if (digit
>= numBase
) {
403 numBase
= BASH_BASE_ERROR
;
409 if (numBase
== BASH_BASE_ERROR
410 #ifdef PEDANTIC_OCTAL
411 || numBase
== BASH_BASE_OCTAL_ERROR
414 state
= SCE_SH_ERROR
;
415 styler
.ColourTo(i
- 1, state
);
416 state
= SCE_SH_DEFAULT
;
420 } else if (state
== SCE_SH_WORD
) {
421 if (!iswordchar(chNext
) && chNext
!= '+' && chNext
!= '-') {
422 // "." never used in Bash variable names
423 // but used in file names
424 classifyWordBash(styler
.GetStartSegment(), i
, keywords
, styler
);
425 state
= SCE_SH_DEFAULT
;
428 } else if (state
== SCE_SH_IDENTIFIER
) {
429 if (!iswordchar(chNext
) && chNext
!= '+' && chNext
!= '-') {
430 styler
.ColourTo(i
, SCE_SH_IDENTIFIER
);
431 state
= SCE_SH_DEFAULT
;
435 if (state
== SCE_SH_COMMENTLINE
) {
436 if (ch
== '\\' && isEOLChar(chNext
)) {
437 // comment continuation
438 if (chNext
== '\r' && chNext2
== '\n') {
440 ch
= styler
.SafeGetCharAt(i
);
441 chNext
= styler
.SafeGetCharAt(i
+ 1);
447 } else if (isEOLChar(ch
)) {
448 styler
.ColourTo(i
- 1, state
);
449 state
= SCE_SH_DEFAULT
;
451 } else if (isEOLChar(chNext
)) {
452 styler
.ColourTo(i
, state
);
453 state
= SCE_SH_DEFAULT
;
455 } else if (state
== SCE_SH_HERE_DELIM
) {
459 // Specifier format is: <<[-]WORD
460 // Optional '-' is for removal of leading tabs from here-doc.
461 // Whitespace acceptable after <<[-] operator
463 if (HereDoc
.State
== 0) { // '<<' encountered
465 HereDoc
.Quote
= chNext
;
466 HereDoc
.Quoted
= false;
467 HereDoc
.DelimiterLength
= 0;
468 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
469 if (chNext
== '\'' || chNext
== '\"') { // a quoted here-doc delimiter (' or ")
473 HereDoc
.Quoted
= true;
474 } else if (!HereDoc
.Indent
&& chNext
== '-') { // <<- indent case
475 HereDoc
.Indent
= true;
477 } else if (isalpha(chNext
) || chNext
== '_' || chNext
== '\\'
478 || chNext
== '-' || chNext
== '+' || chNext
== '!') {
479 // an unquoted here-doc delimiter, no special handling
480 // TODO check what exactly bash considers part of the delim
481 } else if (chNext
== '<') { // HERE string <<<
485 styler
.ColourTo(i
, SCE_SH_HERE_DELIM
);
486 state
= SCE_SH_DEFAULT
;
488 } else if (isspacechar(chNext
)) {
491 } else if (isdigit(chNext
) || chNext
== '=' || chNext
== '$') {
492 // left shift << or <<= operator cases
493 styler
.ColourTo(i
, SCE_SH_OPERATOR
);
494 state
= SCE_SH_DEFAULT
;
497 // symbols terminates; deprecated zero-length delimiter
499 } else if (HereDoc
.State
== 1) { // collect the delimiter
500 if (HereDoc
.Quoted
) { // a quoted here-doc delimiter
501 if (ch
== HereDoc
.Quote
) { // closing quote => end of delimiter
502 styler
.ColourTo(i
, state
);
503 state
= SCE_SH_DEFAULT
;
505 if (ch
== '\\' && chNext
== HereDoc
.Quote
) { // escaped quote
510 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
++] = ch
;
511 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
513 } else { // an unquoted here-doc delimiter
514 if (isalnum(ch
) || ch
== '_' || ch
== '-' || ch
== '+' || ch
== '!') {
515 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
++] = ch
;
516 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
517 } else if (ch
== '\\') {
518 // skip escape prefix
520 styler
.ColourTo(i
- 1, state
);
521 state
= SCE_SH_DEFAULT
;
525 if (HereDoc
.DelimiterLength
>= HERE_DELIM_MAX
- 1) {
526 styler
.ColourTo(i
- 1, state
);
527 state
= SCE_SH_ERROR
;
531 } else if (HereDoc
.State
== 2) {
532 // state == SCE_SH_HERE_Q
533 if (isMatch(styler
, lengthDoc
, i
, HereDoc
.Delimiter
)) {
534 if (!HereDoc
.Indent
&& isEOLChar(chPrev
)) {
536 // standard HERE delimiter
537 i
+= HereDoc
.DelimiterLength
;
538 chPrev
= styler
.SafeGetCharAt(i
- 1);
539 ch
= styler
.SafeGetCharAt(i
);
541 styler
.ColourTo(i
- 1, state
);
542 state
= SCE_SH_DEFAULT
;
546 chNext
= styler
.SafeGetCharAt(i
+ 1);
547 } else if (HereDoc
.Indent
) {
548 // indented HERE delimiter
549 unsigned int bk
= (i
> 0)? i
- 1: 0;
551 ch
= styler
.SafeGetCharAt(bk
--);
554 } else if (!isspacechar(ch
)) {
555 break; // got leading non-whitespace
560 } else if (state
== SCE_SH_SCALAR
) { // variable names
562 if ((state
== SCE_SH_SCALAR
)
563 && i
== (styler
.GetStartSegment() + 1)) {
564 // Special variable: $(, $_ etc.
565 styler
.ColourTo(i
, state
);
566 state
= SCE_SH_DEFAULT
;
568 styler
.ColourTo(i
- 1, state
);
569 state
= SCE_SH_DEFAULT
;
573 } else if (state
== SCE_SH_STRING
574 || state
== SCE_SH_CHARACTER
575 || state
== SCE_SH_BACKTICKS
576 || state
== SCE_SH_PARAM
578 if (!Quote
.Down
&& !isspacechar(ch
)) {
580 } else if (ch
== '\\' && Quote
.Up
!= '\\') {
583 chNext
= styler
.SafeGetCharAt(i
+ 1);
584 } else if (ch
== Quote
.Down
) {
586 if (Quote
.Count
== 0) {
588 if (Quote
.Rep
<= 0) {
589 styler
.ColourTo(i
, state
);
590 state
= SCE_SH_DEFAULT
;
593 if (Quote
.Up
== Quote
.Down
) {
597 } else if (ch
== Quote
.Up
) {
602 if (state
== SCE_SH_ERROR
) {
607 styler
.ColourTo(lengthDoc
- 1, state
);
610 static bool IsCommentLine(int line
, Accessor
&styler
) {
611 int pos
= styler
.LineStart(line
);
612 int eol_pos
= styler
.LineStart(line
+ 1) - 1;
613 for (int i
= pos
; i
< eol_pos
; i
++) {
617 else if (ch
!= ' ' && ch
!= '\t')
623 static void FoldBashDoc(unsigned int startPos
, int length
, int, WordList
*[],
625 bool foldComment
= styler
.GetPropertyInt("fold.comment") != 0;
626 bool foldCompact
= styler
.GetPropertyInt("fold.compact", 1) != 0;
627 unsigned int endPos
= startPos
+ length
;
628 int visibleChars
= 0;
629 int lineCurrent
= styler
.GetLine(startPos
);
630 int levelPrev
= styler
.LevelAt(lineCurrent
) & SC_FOLDLEVELNUMBERMASK
;
631 int levelCurrent
= levelPrev
;
632 char chNext
= styler
[startPos
];
633 int styleNext
= styler
.StyleAt(startPos
);
634 for (unsigned int i
= startPos
; i
< endPos
; i
++) {
636 chNext
= styler
.SafeGetCharAt(i
+ 1);
637 int style
= styleNext
;
638 styleNext
= styler
.StyleAt(i
+ 1);
639 bool atEOL
= (ch
== '\r' && chNext
!= '\n') || (ch
== '\n');
641 if (foldComment
&& atEOL
&& IsCommentLine(lineCurrent
, styler
))
643 if (!IsCommentLine(lineCurrent
- 1, styler
)
644 && IsCommentLine(lineCurrent
+ 1, styler
))
646 else if (IsCommentLine(lineCurrent
- 1, styler
)
647 && !IsCommentLine(lineCurrent
+1, styler
))
650 if (style
== SCE_SH_OPERATOR
) {
653 } else if (ch
== '}') {
659 if (visibleChars
== 0 && foldCompact
)
660 lev
|= SC_FOLDLEVELWHITEFLAG
;
661 if ((levelCurrent
> levelPrev
) && (visibleChars
> 0))
662 lev
|= SC_FOLDLEVELHEADERFLAG
;
663 if (lev
!= styler
.LevelAt(lineCurrent
)) {
664 styler
.SetLevel(lineCurrent
, lev
);
667 levelPrev
= levelCurrent
;
670 if (!isspacechar(ch
))
673 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
674 int flagsNext
= styler
.LevelAt(lineCurrent
) & ~SC_FOLDLEVELNUMBERMASK
;
675 styler
.SetLevel(lineCurrent
, levelPrev
| flagsNext
);
678 static const char * const bashWordListDesc
[] = {
683 LexerModule
lmBash(SCLEX_BASH
, ColouriseBashDoc
, "bash", FoldBashDoc
, bashWordListDesc
);