]>
git.saurik.com Git - wxWidgets.git/blob - contrib/src/stc/scintilla/src/LexBash.cxx
1 // Scintilla source code edit control
5 // Copyright 2004 by Neil Hodgson <neilh@scintilla.org>
6 // Adapted from LexPerl by Kein-Hong Man <mkh@pl.jaring.my> 2004
7 // The License.txt file describes the conditions under which this software may be distributed.
20 #include "Scintilla.h"
23 #define BASH_BASE_ERROR 65
24 #define BASH_BASE_DECIMAL 66
25 #define BASH_BASE_HEX 67
26 #define BASH_BASE_OCTAL 68
27 #define BASH_BASE_OCTAL_ERROR 69
29 #define HERE_DELIM_MAX 256
31 static inline int translateBashDigit(char ch
) {
32 if (ch
>= '0' && ch
<= '9') {
34 } else if (ch
>= 'a' && ch
<= 'z') {
36 } else if (ch
>= 'A' && ch
<= 'Z') {
38 } else if (ch
== '@') {
40 } else if (ch
== '_') {
43 return BASH_BASE_ERROR
;
46 static inline bool isEOLChar(char ch
) {
47 return (ch
== '\r') || (ch
== '\n');
50 static bool isSingleCharOp(char ch
) {
54 return (NULL
!= strstr("rwxoRWXOezsfdlpSbctugkTBMACahGLNn", strCharSet
));
57 static inline bool isBashOperator(char ch
) {
58 if (ch
== '^' || ch
== '&' || ch
== '\\' || ch
== '%' ||
59 ch
== '(' || ch
== ')' || ch
== '-' || ch
== '+' ||
60 ch
== '=' || ch
== '|' || ch
== '{' || ch
== '}' ||
61 ch
== '[' || ch
== ']' || ch
== ':' || ch
== ';' ||
62 ch
== '>' || ch
== ',' || ch
== '/' || ch
== '<' ||
63 ch
== '?' || ch
== '!' || ch
== '.' || ch
== '~' ||
69 static int classifyWordBash(unsigned int start
, unsigned int end
, WordList
&keywords
, Accessor
&styler
) {
71 for (unsigned int i
= 0; i
< end
- start
+ 1 && i
< 30; i
++) {
72 s
[i
] = styler
[start
+ i
];
75 char chAttr
= SCE_SH_IDENTIFIER
;
76 if (keywords
.InList(s
))
78 styler
.ColourTo(end
, chAttr
);
82 static inline int getBashNumberBase(unsigned int start
, unsigned int end
, Accessor
&styler
) {
84 for (unsigned int i
= 0; i
< end
- start
+ 1 && i
< 10; i
++) {
85 base
= base
* 10 + (styler
[start
+ i
] - '0');
87 if (base
> 64 || (end
- start
) > 1) {
88 return BASH_BASE_ERROR
;
93 static inline bool isEndVar(char ch
) {
94 return !isalnum(ch
) && ch
!= '$' && ch
!= '_';
97 static inline bool isNonQuote(char ch
) {
98 return isalnum(ch
) || ch
== '_';
101 static bool isMatch(Accessor
&styler
, int lengthDoc
, int pos
, const char *val
) {
102 if ((pos
+ static_cast<int>(strlen(val
))) >= lengthDoc
) {
106 if (*val
!= styler
[pos
++]) {
114 static char opposite(char ch
) {
126 static void ColouriseBashDoc(unsigned int startPos
, int length
, int initStyle
,
127 WordList
*keywordlists
[], Accessor
&styler
) {
129 // Lexer for bash often has to backtrack to start of current style to determine
130 // which characters are being used as quotes, how deeply nested is the
131 // start position and what the termination string is for here documents
133 WordList
&keywords
= *keywordlists
[0];
137 int State
; // 0: '<<' encountered
138 // 1: collect the delimiter
139 // 2: here doc text (lines after the delimiter)
140 char Quote
; // the char after '<<'
141 bool Quoted
; // true if Quote in ('\'','"','`')
142 bool Indent
; // indented delimiter (for <<-)
143 int DelimiterLength
; // strlen(Delimiter)
144 char *Delimiter
; // the Delimiter, 256: sizeof PL_tokenbuf
148 Delimiter
= new char[HERE_DELIM_MAX
];
180 int state
= initStyle
;
182 unsigned int lengthDoc
= startPos
+ length
;
184 // If in a long distance lexical state, seek to the beginning to find quote characters
185 // Bash strings can be multi-line with embedded newlines, so backtrack.
186 // Bash numbers have additional state during lexing, so backtrack too.
187 if (state
== SCE_SH_HERE_Q
) {
188 while ((startPos
> 1) && (styler
.StyleAt(startPos
) != SCE_SH_HERE_DELIM
)) {
191 startPos
= styler
.LineStart(styler
.GetLine(startPos
));
192 state
= styler
.StyleAt(startPos
- 1);
194 if (state
== SCE_SH_STRING
195 || state
== SCE_SH_BACKTICKS
196 || state
== SCE_SH_CHARACTER
197 || state
== SCE_SH_NUMBER
198 || state
== SCE_SH_IDENTIFIER
199 || state
== SCE_SH_COMMENTLINE
201 while ((startPos
> 1) && (styler
.StyleAt(startPos
- 1) == state
)) {
204 state
= SCE_SH_DEFAULT
;
207 styler
.StartAt(startPos
);
208 char chPrev
= styler
.SafeGetCharAt(startPos
- 1);
211 char chNext
= styler
[startPos
];
212 styler
.StartSegment(startPos
);
214 for (unsigned int i
= startPos
; i
< lengthDoc
; i
++) {
216 // if the current character is not consumed due to the completion of an
217 // earlier style, lexing can be restarted via a simple goto
219 chNext
= styler
.SafeGetCharAt(i
+ 1);
220 char chNext2
= styler
.SafeGetCharAt(i
+ 2);
222 if (styler
.IsLeadByte(ch
)) {
223 chNext
= styler
.SafeGetCharAt(i
+ 2);
229 if ((chPrev
== '\r' && ch
== '\n')) { // skip on DOS/Windows
230 styler
.ColourTo(i
, state
);
235 if (HereDoc
.State
== 1 && isEOLChar(ch
)) {
236 // Begin of here-doc (the line after the here-doc delimiter):
237 // Lexically, the here-doc starts from the next line after the >>, but the
238 // first line of here-doc seem to follow the style of the last EOL sequence
240 if (HereDoc
.Quoted
) {
241 if (state
== SCE_SH_HERE_DELIM
) {
242 // Missing quote at end of string! We are stricter than bash.
243 // Colour here-doc anyway while marking this bit as an error.
244 state
= SCE_SH_ERROR
;
246 styler
.ColourTo(i
- 1, state
);
247 // HereDoc.Quote always == '\''
248 state
= SCE_SH_HERE_Q
;
250 styler
.ColourTo(i
- 1, state
);
252 state
= SCE_SH_HERE_Q
;
256 if (state
== SCE_SH_DEFAULT
) {
257 if (ch
== '\\') { // escaped character
261 styler
.ColourTo(i
, SCE_SH_IDENTIFIER
);
262 } else if (isdigit(ch
)) {
263 state
= SCE_SH_NUMBER
;
264 numBase
= BASH_BASE_DECIMAL
;
265 if (ch
== '0') { // hex,octal
266 if (chNext
== 'x' || chNext
== 'X') {
267 numBase
= BASH_BASE_HEX
;
271 } else if (isdigit(chNext
)) {
272 numBase
= BASH_BASE_OCTAL
;
275 } else if (iswordstart(ch
)) {
277 if (!iswordchar(chNext
) && chNext
!= '+' && chNext
!= '-') {
278 // We need that if length of word == 1!
279 // This test is copied from the SCE_SH_WORD handler.
280 classifyWordBash(styler
.GetStartSegment(), i
, keywords
, styler
);
281 state
= SCE_SH_DEFAULT
;
283 } else if (ch
== '#') {
284 state
= SCE_SH_COMMENTLINE
;
285 } else if (ch
== '\"') {
286 state
= SCE_SH_STRING
;
289 } else if (ch
== '\'') {
290 state
= SCE_SH_CHARACTER
;
293 } else if (ch
== '`') {
294 state
= SCE_SH_BACKTICKS
;
297 } else if (ch
== '$') {
299 state
= SCE_SH_PARAM
;
301 } else if (chNext
== '\'') {
302 state
= SCE_SH_CHARACTER
;
304 } else if (chNext
== '"') {
305 state
= SCE_SH_STRING
;
307 } else if (chNext
== '(' && chNext2
== '(') {
308 styler
.ColourTo(i
, SCE_SH_OPERATOR
);
309 state
= SCE_SH_DEFAULT
;
311 } else if (chNext
== '(' || chNext
== '`') {
312 state
= SCE_SH_BACKTICKS
;
318 state
= SCE_SH_SCALAR
;
324 } else if (ch
== '*') {
325 if (chNext
== '*') { // exponentiation
330 styler
.ColourTo(i
, SCE_SH_OPERATOR
);
331 } else if (ch
== '<' && chNext
== '<') {
332 state
= SCE_SH_HERE_DELIM
;
334 HereDoc
.Indent
= false;
335 } else if (ch
== '-' // file test operators
336 && isSingleCharOp(chNext
)
337 && !isalnum((chNext2
= styler
.SafeGetCharAt(i
+2)))) {
338 styler
.ColourTo(i
+ 1, SCE_SH_WORD
);
339 state
= SCE_SH_DEFAULT
;
343 } else if (isBashOperator(ch
)) {
344 styler
.ColourTo(i
, SCE_SH_OPERATOR
);
346 // keep colouring defaults to make restart easier
347 styler
.ColourTo(i
, SCE_SH_DEFAULT
);
349 } else if (state
== SCE_SH_NUMBER
) {
350 int digit
= translateBashDigit(ch
);
351 if (numBase
== BASH_BASE_DECIMAL
) {
353 numBase
= getBashNumberBase(styler
.GetStartSegment(), i
- 1, styler
);
354 if (numBase
== BASH_BASE_ERROR
) // take the rest as comment
356 } else if (!isdigit(ch
))
358 } else if (numBase
== BASH_BASE_HEX
) {
359 if ((digit
< 16) || (digit
>= 36 && digit
<= 41)) {
360 // hex digit 0-9a-fA-F
363 } else if (numBase
== BASH_BASE_OCTAL
||
364 numBase
== BASH_BASE_OCTAL_ERROR
) {
367 numBase
= BASH_BASE_OCTAL_ERROR
;
371 } else if (numBase
== BASH_BASE_ERROR
) {
374 } else { // DD#DDDD number style handling
375 if (digit
!= BASH_BASE_ERROR
) {
377 // case-insensitive if base<=36
378 if (digit
>= 36) digit
-= 26;
380 if (digit
>= numBase
) {
382 numBase
= BASH_BASE_ERROR
;
388 if (numBase
== BASH_BASE_ERROR
||
389 numBase
== BASH_BASE_OCTAL_ERROR
)
390 state
= SCE_SH_ERROR
;
391 styler
.ColourTo(i
- 1, state
);
392 state
= SCE_SH_DEFAULT
;
396 } else if (state
== SCE_SH_WORD
) {
397 if (!iswordchar(chNext
) && chNext
!= '+' && chNext
!= '-') {
398 // "." never used in Bash variable names
399 // but used in file names
400 classifyWordBash(styler
.GetStartSegment(), i
, keywords
, styler
);
401 state
= SCE_SH_DEFAULT
;
404 } else if (state
== SCE_SH_IDENTIFIER
) {
405 if (!iswordchar(chNext
) && chNext
!= '+' && chNext
!= '-') {
406 styler
.ColourTo(i
, SCE_SH_IDENTIFIER
);
407 state
= SCE_SH_DEFAULT
;
411 if (state
== SCE_SH_COMMENTLINE
) {
412 if (ch
== '\\' && isEOLChar(chNext
)) {
413 // comment continuation
414 if (chNext
== '\r' && chNext2
== '\n') {
416 ch
= styler
.SafeGetCharAt(i
);
417 chNext
= styler
.SafeGetCharAt(i
+ 1);
423 } else if (isEOLChar(ch
)) {
424 styler
.ColourTo(i
- 1, state
);
425 state
= SCE_SH_DEFAULT
;
427 } else if (isEOLChar(chNext
)) {
428 styler
.ColourTo(i
, state
);
429 state
= SCE_SH_DEFAULT
;
431 } else if (state
== SCE_SH_HERE_DELIM
) {
435 // Specifier format is: <<[-]WORD
436 // Optional '-' is for removal of leading tabs from here-doc.
437 // Whitespace acceptable after <<[-] operator
439 if (HereDoc
.State
== 0) { // '<<' encountered
441 HereDoc
.Quote
= chNext
;
442 HereDoc
.Quoted
= false;
443 HereDoc
.DelimiterLength
= 0;
444 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
445 if (chNext
== '\'') { // a quoted here-doc delimiter (' only)
449 HereDoc
.Quoted
= true;
450 } else if (!HereDoc
.Indent
&& chNext
== '-') { // <<- indent case
451 HereDoc
.Indent
= true;
453 } else if (isalpha(chNext
) || chNext
== '_' || chNext
== '\\'
454 || chNext
== '-' || chNext
== '+') {
455 // an unquoted here-doc delimiter, no special handling
456 } else if (chNext
== '<') { // HERE string <<<
460 styler
.ColourTo(i
, SCE_SH_HERE_DELIM
);
461 state
= SCE_SH_DEFAULT
;
463 } else if (isspacechar(chNext
)) {
466 } else if (isdigit(chNext
) || chNext
== '=' || chNext
== '$') {
467 // left shift << or <<= operator cases
468 styler
.ColourTo(i
, SCE_SH_OPERATOR
);
469 state
= SCE_SH_DEFAULT
;
472 // symbols terminates; deprecated zero-length delimiter
474 } else if (HereDoc
.State
== 1) { // collect the delimiter
475 if (HereDoc
.Quoted
) { // a quoted here-doc delimiter
476 if (ch
== HereDoc
.Quote
) { // closing quote => end of delimiter
477 styler
.ColourTo(i
, state
);
478 state
= SCE_SH_DEFAULT
;
480 if (ch
== '\\' && chNext
== HereDoc
.Quote
) { // escaped quote
485 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
++] = ch
;
486 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
488 } else { // an unquoted here-doc delimiter
489 if (isalnum(ch
) || ch
== '_' || ch
== '-' || ch
== '+') {
490 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
++] = ch
;
491 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
492 } else if (ch
== '\\') {
493 // skip escape prefix
495 styler
.ColourTo(i
- 1, state
);
496 state
= SCE_SH_DEFAULT
;
500 if (HereDoc
.DelimiterLength
>= HERE_DELIM_MAX
- 1) {
501 styler
.ColourTo(i
- 1, state
);
502 state
= SCE_SH_ERROR
;
506 } else if (HereDoc
.State
== 2) {
507 // state == SCE_SH_HERE_Q
508 if (isMatch(styler
, lengthDoc
, i
, HereDoc
.Delimiter
)) {
509 if (!HereDoc
.Indent
&& isEOLChar(chPrev
)) {
511 // standard HERE delimiter
512 i
+= HereDoc
.DelimiterLength
;
513 chPrev
= styler
.SafeGetCharAt(i
- 1);
514 ch
= styler
.SafeGetCharAt(i
);
516 styler
.ColourTo(i
- 1, state
);
517 state
= SCE_SH_DEFAULT
;
521 chNext
= styler
.SafeGetCharAt(i
+ 1);
522 } else if (HereDoc
.Indent
) {
523 // indented HERE delimiter
524 unsigned int bk
= (i
> 0)? i
- 1: 0;
526 ch
= styler
.SafeGetCharAt(bk
--);
529 } else if (!isspacechar(ch
)) {
530 break; // got leading non-whitespace
535 } else if (state
== SCE_SH_SCALAR
) { // variable names
537 if ((state
== SCE_SH_SCALAR
)
538 && i
== (styler
.GetStartSegment() + 1)) {
539 // Special variable: $(, $_ etc.
540 styler
.ColourTo(i
, state
);
541 state
= SCE_SH_DEFAULT
;
543 styler
.ColourTo(i
- 1, state
);
544 state
= SCE_SH_DEFAULT
;
548 } else if (state
== SCE_SH_STRING
549 || state
== SCE_SH_CHARACTER
550 || state
== SCE_SH_BACKTICKS
551 || state
== SCE_SH_PARAM
553 if (!Quote
.Down
&& !isspacechar(ch
)) {
555 } else if (ch
== '\\' && Quote
.Up
!= '\\') {
558 chNext
= styler
.SafeGetCharAt(i
+ 1);
559 } else if (ch
== Quote
.Down
) {
561 if (Quote
.Count
== 0) {
563 if (Quote
.Rep
<= 0) {
564 styler
.ColourTo(i
, state
);
565 state
= SCE_SH_DEFAULT
;
568 if (Quote
.Up
== Quote
.Down
) {
572 } else if (ch
== Quote
.Up
) {
577 if (state
== SCE_SH_ERROR
) {
582 styler
.ColourTo(lengthDoc
- 1, state
);
585 static void FoldBashDoc(unsigned int startPos
, int length
, int, WordList
*[],
587 bool foldComment
= styler
.GetPropertyInt("fold.comment") != 0;
588 bool foldCompact
= styler
.GetPropertyInt("fold.compact", 1) != 0;
589 unsigned int endPos
= startPos
+ length
;
590 int visibleChars
= 0;
591 int lineCurrent
= styler
.GetLine(startPos
);
592 int levelPrev
= styler
.LevelAt(lineCurrent
) & SC_FOLDLEVELNUMBERMASK
;
593 int levelCurrent
= levelPrev
;
594 char chNext
= styler
[startPos
];
595 int styleNext
= styler
.StyleAt(startPos
);
596 for (unsigned int i
= startPos
; i
< endPos
; i
++) {
598 chNext
= styler
.SafeGetCharAt(i
+ 1);
599 int style
= styleNext
;
600 styleNext
= styler
.StyleAt(i
+ 1);
601 bool atEOL
= (ch
== '\r' && chNext
!= '\n') || (ch
== '\n');
602 if (foldComment
&& (style
== SCE_SH_COMMENTLINE
)) {
603 if ((ch
== '/') && (chNext
== '/')) {
604 char chNext2
= styler
.SafeGetCharAt(i
+ 2);
605 if (chNext2
== '{') {
607 } else if (chNext2
== '}') {
612 if (style
== SCE_C_OPERATOR
) {
615 } else if (ch
== '}') {
621 if (visibleChars
== 0 && foldCompact
)
622 lev
|= SC_FOLDLEVELWHITEFLAG
;
623 if ((levelCurrent
> levelPrev
) && (visibleChars
> 0))
624 lev
|= SC_FOLDLEVELHEADERFLAG
;
625 if (lev
!= styler
.LevelAt(lineCurrent
)) {
626 styler
.SetLevel(lineCurrent
, lev
);
629 levelPrev
= levelCurrent
;
632 if (!isspacechar(ch
))
635 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
636 int flagsNext
= styler
.LevelAt(lineCurrent
) & ~SC_FOLDLEVELNUMBERMASK
;
637 styler
.SetLevel(lineCurrent
, levelPrev
| flagsNext
);
640 static const char * const bashWordListDesc
[] = {
645 LexerModule
lmBash(SCLEX_BASH
, ColouriseBashDoc
, "bash", FoldBashDoc
, bashWordListDesc
);