]>
git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/src/LexBash.cxx
5801278becb9f7d5f7860f62fb230b368c3aab5c
1 // Scintilla source code edit control
5 // Copyright 2004-2008 by Neil Hodgson <neilh@scintilla.org>
6 // Adapted from LexPerl by Kein-Hong Man 2004
7 // The License.txt file describes the conditions under which this software may be distributed.
19 #include "StyleContext.h"
21 #include "Scintilla.h"
23 #include "CharacterSet.h"
26 using namespace Scintilla
;
29 #define HERE_DELIM_MAX 256
31 // define this if you want 'invalid octals' to be marked as errors
32 // usually, this is not a good idea, permissive lexing is better
35 #define BASH_BASE_ERROR 65
36 #define BASH_BASE_DECIMAL 66
37 #define BASH_BASE_HEX 67
39 #define BASH_BASE_OCTAL 68
40 #define BASH_BASE_OCTAL_ERROR 69
43 static inline int translateBashDigit(int ch
) {
44 if (ch
>= '0' && ch
<= '9') {
46 } else if (ch
>= 'a' && ch
<= 'z') {
48 } else if (ch
>= 'A' && ch
<= 'Z') {
50 } else if (ch
== '@') {
52 } else if (ch
== '_') {
55 return BASH_BASE_ERROR
;
58 static inline int getBashNumberBase(char *s
) {
62 base
= base
* 10 + (*s
++ - '0');
65 if (base
> 64 || i
> 2) {
66 return BASH_BASE_ERROR
;
71 static int opposite(int ch
) {
72 if (ch
== '(') return ')';
73 if (ch
== '[') return ']';
74 if (ch
== '{') return '}';
75 if (ch
== '<') return '>';
79 static void ColouriseBashDoc(unsigned int startPos
, int length
, int initStyle
,
80 WordList
*keywordlists
[], Accessor
&styler
) {
82 WordList
&keywords
= *keywordlists
[0];
84 CharacterSet
setWordStart(CharacterSet::setAlpha
, "_");
85 // note that [+-] are often parts of identifiers in shell scripts
86 CharacterSet
setWord(CharacterSet::setAlphaNum
, "._+-");
87 CharacterSet
setBashOperator(CharacterSet::setNone
, "^&\\%()-+=|{}[]:;>,*/<?!.~@");
88 CharacterSet
setSingleCharOp(CharacterSet::setNone
, "rwxoRWXOezsfdlpSbctugkTBMACahGLNn");
89 CharacterSet
setParam(CharacterSet::setAlphaNum
, "$_");
90 CharacterSet
setHereDoc(CharacterSet::setAlpha
, "_\\-+!");
91 CharacterSet
setHereDoc2(CharacterSet::setAlphaNum
, "_-+!");
92 CharacterSet
setLeftShift(CharacterSet::setDigits
, "=$");
94 class HereDocCls
{ // Class to manage HERE document elements
96 int State
; // 0: '<<' encountered
97 // 1: collect the delimiter
98 // 2: here doc text (lines after the delimiter)
99 int Quote
; // the char after '<<'
100 bool Quoted
; // true if Quote in ('\'','"','`')
101 bool Indent
; // indented delimiter (for <<-)
102 int DelimiterLength
; // strlen(Delimiter)
103 char *Delimiter
; // the Delimiter, 256: sizeof PL_tokenbuf
110 Delimiter
= new char[HERE_DELIM_MAX
];
113 void Append(int ch
) {
114 Delimiter
[DelimiterLength
++] = static_cast<char>(ch
);
115 Delimiter
[DelimiterLength
] = '\0';
123 class QuoteCls
{ // Class to manage quote pairs (simplified vs LexPerl)
146 unsigned int endPos
= startPos
+ length
;
148 // Backtrack to beginning of style if required...
149 // If in a long distance lexical state, backtrack to find quote characters
150 if (initStyle
== SCE_SH_HERE_Q
) {
151 while ((startPos
> 1) && (styler
.StyleAt(startPos
) != SCE_SH_HERE_DELIM
)) {
154 startPos
= styler
.LineStart(styler
.GetLine(startPos
));
155 initStyle
= styler
.StyleAt(startPos
- 1);
157 // Bash strings can be multi-line with embedded newlines, so backtrack.
158 // Bash numbers have additional state during lexing, so backtrack too.
159 if (initStyle
== SCE_SH_STRING
160 || initStyle
== SCE_SH_BACKTICKS
161 || initStyle
== SCE_SH_CHARACTER
162 || initStyle
== SCE_SH_NUMBER
163 || initStyle
== SCE_SH_IDENTIFIER
164 || initStyle
== SCE_SH_COMMENTLINE
) {
165 while ((startPos
> 1) && (styler
.StyleAt(startPos
- 1) == initStyle
)) {
168 initStyle
= SCE_SH_DEFAULT
;
171 StyleContext
sc(startPos
, endPos
- startPos
, initStyle
, styler
);
173 for (; sc
.More(); sc
.Forward()) {
175 // Determine if the current state should terminate.
177 case SCE_SH_OPERATOR
:
178 sc
.SetState(SCE_SH_DEFAULT
);
181 // "." never used in Bash variable names but used in file names
182 if (!setWord
.Contains(sc
.ch
)) {
184 sc
.GetCurrent(s
, sizeof(s
));
185 if (s
[0] != '-' && // for file operators
186 !keywords
.InList(s
)) {
187 sc
.ChangeState(SCE_SH_IDENTIFIER
);
189 sc
.SetState(SCE_SH_DEFAULT
);
192 case SCE_SH_IDENTIFIER
:
193 if (sc
.chPrev
== '\\') { // for escaped chars
194 sc
.ForwardSetState(SCE_SH_DEFAULT
);
195 } else if (!setWord
.Contains(sc
.ch
)) {
196 sc
.SetState(SCE_SH_DEFAULT
);
200 digit
= translateBashDigit(sc
.ch
);
201 if (numBase
== BASH_BASE_DECIMAL
) {
204 sc
.GetCurrent(s
, sizeof(s
));
205 numBase
= getBashNumberBase(s
);
206 if (numBase
!= BASH_BASE_ERROR
)
208 } else if (IsADigit(sc
.ch
))
210 } else if (numBase
== BASH_BASE_HEX
) {
211 if (IsADigit(sc
.ch
, 16))
213 #ifdef PEDANTIC_OCTAL
214 } else if (numBase
== BASH_BASE_OCTAL
||
215 numBase
== BASH_BASE_OCTAL_ERROR
) {
219 numBase
= BASH_BASE_OCTAL_ERROR
;
223 } else if (numBase
== BASH_BASE_ERROR
) {
226 } else { // DD#DDDD number style handling
227 if (digit
!= BASH_BASE_ERROR
) {
229 // case-insensitive if base<=36
230 if (digit
>= 36) digit
-= 26;
235 numBase
= BASH_BASE_ERROR
;
240 // fallthrough when number is at an end or error
241 if (numBase
== BASH_BASE_ERROR
242 #ifdef PEDANTIC_OCTAL
243 || numBase
== BASH_BASE_OCTAL_ERROR
246 sc
.ChangeState(SCE_SH_ERROR
);
248 sc
.SetState(SCE_SH_DEFAULT
);
250 case SCE_SH_COMMENTLINE
:
251 if (sc
.ch
== '\\' && (sc
.chNext
== '\r' || sc
.chNext
== '\n')) {
252 // comment continuation
254 if (sc
.ch
== '\r' && sc
.chNext
== '\n') {
257 } else if (sc
.atLineEnd
) {
258 sc
.ForwardSetState(SCE_SH_DEFAULT
);
261 case SCE_SH_HERE_DELIM
:
264 // Specifier format is: <<[-]WORD
265 // Optional '-' is for removal of leading tabs from here-doc.
266 // Whitespace acceptable after <<[-] operator
268 if (HereDoc
.State
== 0) { // '<<' encountered
269 HereDoc
.Quote
= sc
.chNext
;
270 HereDoc
.Quoted
= false;
271 HereDoc
.DelimiterLength
= 0;
272 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
273 if (sc
.chNext
== '\'' || sc
.chNext
== '\"') { // a quoted here-doc delimiter (' or ")
275 HereDoc
.Quoted
= true;
277 } else if (!HereDoc
.Indent
&& sc
.chNext
== '-') { // <<- indent case
278 HereDoc
.Indent
= true;
279 } else if (setHereDoc
.Contains(sc
.chNext
)) {
280 // an unquoted here-doc delimiter, no special handling
281 // TODO check what exactly bash considers part of the delim
283 } else if (sc
.chNext
== '<') { // HERE string <<<
285 sc
.ForwardSetState(SCE_SH_DEFAULT
);
286 } else if (IsASpace(sc
.chNext
)) {
288 } else if (setLeftShift
.Contains(sc
.chNext
)) {
289 // left shift << or <<= operator cases
290 sc
.ChangeState(SCE_SH_OPERATOR
);
291 sc
.ForwardSetState(SCE_SH_DEFAULT
);
293 // symbols terminates; deprecated zero-length delimiter
296 } else if (HereDoc
.State
== 1) { // collect the delimiter
297 if (HereDoc
.Quoted
) { // a quoted here-doc delimiter
298 if (sc
.ch
== HereDoc
.Quote
) { // closing quote => end of delimiter
299 sc
.ForwardSetState(SCE_SH_DEFAULT
);
301 if (sc
.ch
== '\\' && sc
.chNext
== HereDoc
.Quote
) { // escaped quote
304 HereDoc
.Append(sc
.ch
);
306 } else { // an unquoted here-doc delimiter
307 if (setHereDoc2
.Contains(sc
.ch
)) {
308 HereDoc
.Append(sc
.ch
);
309 } else if (sc
.ch
== '\\') {
310 // skip escape prefix
312 sc
.SetState(SCE_SH_DEFAULT
);
315 if (HereDoc
.DelimiterLength
>= HERE_DELIM_MAX
- 1) { // force blowup
316 sc
.SetState(SCE_SH_ERROR
);
322 // HereDoc.State == 2
323 if (sc
.atLineStart
) {
324 sc
.SetState(SCE_SH_HERE_Q
);
326 while (IsASpace(sc
.ch
) && !sc
.atLineEnd
) { // whitespace prefix
331 sc
.SetState(SCE_SH_HERE_Q
);
332 while (!sc
.atLineEnd
) {
335 char s
[HERE_DELIM_MAX
];
336 sc
.GetCurrent(s
, sizeof(s
));
337 if (sc
.LengthCurrent() == 0)
339 if (s
[strlen(s
) - 1] == '\r')
340 s
[strlen(s
) - 1] = '\0';
341 if (strcmp(HereDoc
.Delimiter
, s
) == 0) {
342 if ((prefixws
> 0 && HereDoc
.Indent
) || // indentation rule
343 (prefixws
== 0 && !HereDoc
.Indent
)) {
344 sc
.SetState(SCE_SH_DEFAULT
);
350 case SCE_SH_SCALAR
: // variable names
351 if (!setParam
.Contains(sc
.ch
)) {
352 if (sc
.LengthCurrent() == 1) {
353 // Special variable: $(, $_ etc.
354 sc
.ForwardSetState(SCE_SH_DEFAULT
);
356 sc
.SetState(SCE_SH_DEFAULT
);
360 case SCE_SH_STRING
: // delimited styles
361 case SCE_SH_CHARACTER
:
362 case SCE_SH_BACKTICKS
:
364 if (sc
.ch
== '\\' && Quote
.Up
!= '\\') {
366 } else if (sc
.ch
== Quote
.Down
) {
368 if (Quote
.Count
== 0) {
369 sc
.ForwardSetState(SCE_SH_DEFAULT
);
371 } else if (sc
.ch
== Quote
.Up
) {
377 // Must check end of HereDoc state 1 before default state is handled
378 if (HereDoc
.State
== 1 && sc
.atLineEnd
) {
379 // Begin of here-doc (the line after the here-doc delimiter):
380 // Lexically, the here-doc starts from the next line after the >>, but the
381 // first line of here-doc seem to follow the style of the last EOL sequence
383 if (HereDoc
.Quoted
) {
384 if (sc
.state
== SCE_SH_HERE_DELIM
) {
385 // Missing quote at end of string! We are stricter than bash.
386 // Colour here-doc anyway while marking this bit as an error.
387 sc
.ChangeState(SCE_SH_ERROR
);
389 // HereDoc.Quote always == '\''
391 sc
.SetState(SCE_SH_HERE_Q
);
394 // Determine if a new state should be entered.
395 if (sc
.state
== SCE_SH_DEFAULT
) {
396 if (sc
.ch
== '\\') { // escaped character
397 sc
.SetState(SCE_SH_IDENTIFIER
);
398 } else if (IsADigit(sc
.ch
)) {
399 sc
.SetState(SCE_SH_NUMBER
);
400 numBase
= BASH_BASE_DECIMAL
;
401 if (sc
.ch
== '0') { // hex,octal
402 if (sc
.chNext
== 'x' || sc
.chNext
== 'X') {
403 numBase
= BASH_BASE_HEX
;
405 } else if (IsADigit(sc
.chNext
)) {
406 #ifdef PEDANTIC_OCTAL
407 numBase
= BASH_BASE_OCTAL
;
409 numBase
= BASH_BASE_HEX
;
413 } else if (setWordStart
.Contains(sc
.ch
)) {
414 sc
.SetState(SCE_SH_WORD
);
415 } else if (sc
.ch
== '#') {
416 sc
.SetState(SCE_SH_COMMENTLINE
);
417 } else if (sc
.ch
== '\"') {
418 sc
.SetState(SCE_SH_STRING
);
420 } else if (sc
.ch
== '\'') {
421 sc
.SetState(SCE_SH_CHARACTER
);
423 } else if (sc
.ch
== '`') {
424 sc
.SetState(SCE_SH_BACKTICKS
);
426 } else if (sc
.ch
== '$') {
427 sc
.SetState(SCE_SH_SCALAR
);
430 sc
.ChangeState(SCE_SH_PARAM
);
431 } else if (sc
.ch
== '\'') {
432 sc
.ChangeState(SCE_SH_CHARACTER
);
433 } else if (sc
.ch
== '"') {
434 sc
.ChangeState(SCE_SH_STRING
);
435 } else if (sc
.ch
== '(' || sc
.ch
== '`') {
436 sc
.ChangeState(SCE_SH_BACKTICKS
);
437 if (sc
.chNext
== '(') { // $(( is lexed as operator
438 sc
.ChangeState(SCE_SH_OPERATOR
);
441 continue; // scalar has no delimiter pair
443 // fallthrough, open delim for $[{'"(`]
445 } else if (sc
.Match('<', '<')) {
446 sc
.SetState(SCE_SH_HERE_DELIM
);
448 HereDoc
.Indent
= false;
449 } else if (sc
.ch
== '-' && // one-char file test operators
450 setSingleCharOp
.Contains(sc
.chNext
) &&
451 !setWord
.Contains(sc
.GetRelative(2)) &&
452 IsASpace(sc
.chPrev
)) {
453 sc
.SetState(SCE_SH_WORD
);
455 } else if (setBashOperator
.Contains(sc
.ch
)) {
456 sc
.SetState(SCE_SH_OPERATOR
);
463 static bool IsCommentLine(int line
, Accessor
&styler
) {
464 int pos
= styler
.LineStart(line
);
465 int eol_pos
= styler
.LineStart(line
+ 1) - 1;
466 for (int i
= pos
; i
< eol_pos
; i
++) {
470 else if (ch
!= ' ' && ch
!= '\t')
476 static void FoldBashDoc(unsigned int startPos
, int length
, int, WordList
*[],
478 bool foldComment
= styler
.GetPropertyInt("fold.comment") != 0;
479 bool foldCompact
= styler
.GetPropertyInt("fold.compact", 1) != 0;
480 unsigned int endPos
= startPos
+ length
;
481 int visibleChars
= 0;
482 int lineCurrent
= styler
.GetLine(startPos
);
483 int levelPrev
= styler
.LevelAt(lineCurrent
) & SC_FOLDLEVELNUMBERMASK
;
484 int levelCurrent
= levelPrev
;
485 char chNext
= styler
[startPos
];
486 int styleNext
= styler
.StyleAt(startPos
);
487 for (unsigned int i
= startPos
; i
< endPos
; i
++) {
489 chNext
= styler
.SafeGetCharAt(i
+ 1);
490 int style
= styleNext
;
491 styleNext
= styler
.StyleAt(i
+ 1);
492 bool atEOL
= (ch
== '\r' && chNext
!= '\n') || (ch
== '\n');
494 if (foldComment
&& atEOL
&& IsCommentLine(lineCurrent
, styler
))
496 if (!IsCommentLine(lineCurrent
- 1, styler
)
497 && IsCommentLine(lineCurrent
+ 1, styler
))
499 else if (IsCommentLine(lineCurrent
- 1, styler
)
500 && !IsCommentLine(lineCurrent
+ 1, styler
))
503 if (style
== SCE_SH_OPERATOR
) {
506 } else if (ch
== '}') {
512 if (visibleChars
== 0 && foldCompact
)
513 lev
|= SC_FOLDLEVELWHITEFLAG
;
514 if ((levelCurrent
> levelPrev
) && (visibleChars
> 0))
515 lev
|= SC_FOLDLEVELHEADERFLAG
;
516 if (lev
!= styler
.LevelAt(lineCurrent
)) {
517 styler
.SetLevel(lineCurrent
, lev
);
520 levelPrev
= levelCurrent
;
523 if (!isspacechar(ch
))
526 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
527 int flagsNext
= styler
.LevelAt(lineCurrent
) & ~SC_FOLDLEVELNUMBERMASK
;
528 styler
.SetLevel(lineCurrent
, levelPrev
| flagsNext
);
531 static const char * const bashWordListDesc
[] = {
536 LexerModule
lmBash(SCLEX_BASH
, ColouriseBashDoc
, "bash", FoldBashDoc
, bashWordListDesc
);