1 // Scintilla source code edit control
2 /** @file LexPython.cxx
5 // Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
16 #include "Scintilla.h"
20 #include "LexAccessor.h"
22 #include "StyleContext.h"
23 #include "CharacterSet.h"
24 #include "LexerModule.h"
27 using namespace Scintilla
;
30 /* kwCDef, kwCTypeName only used for Cython */
31 enum kwType
{ kwOther
, kwClass
, kwDef
, kwImport
, kwCDef
, kwCTypeName
, kwCPDef
};
33 static const int indicatorWhitespace
= 1;
35 static bool IsPyComment(Accessor
&styler
, int pos
, int len
) {
36 return len
> 0 && styler
[pos
] == '#';
39 enum literalsAllowed
{ litNone
=0, litU
=1, litB
=2};
41 static bool IsPyStringTypeChar(int ch
, literalsAllowed allowed
) {
43 ((allowed
& litB
) && (ch
== 'b' || ch
== 'B')) ||
44 ((allowed
& litU
) && (ch
== 'u' || ch
== 'U'));
47 static bool IsPyStringStart(int ch
, int chNext
, int chNext2
, literalsAllowed allowed
) {
48 if (ch
== '\'' || ch
== '"')
50 if (IsPyStringTypeChar(ch
, allowed
)) {
51 if (chNext
== '"' || chNext
== '\'')
53 if ((chNext
== 'r' || chNext
== 'R') && (chNext2
== '"' || chNext2
== '\''))
56 if ((ch
== 'r' || ch
== 'R') && (chNext
== '"' || chNext
== '\''))
62 /* Return the state to use for the string starting at i; *nextIndex will be set to the first index following the quote(s) */
63 static int GetPyStringState(Accessor
&styler
, int i
, unsigned int *nextIndex
, literalsAllowed allowed
) {
64 char ch
= styler
.SafeGetCharAt(i
);
65 char chNext
= styler
.SafeGetCharAt(i
+ 1);
67 // Advance beyond r, u, or ur prefix (or r, b, or br in Python 3.0), but bail if there are any unexpected chars
68 if (ch
== 'r' || ch
== 'R') {
70 ch
= styler
.SafeGetCharAt(i
);
71 chNext
= styler
.SafeGetCharAt(i
+ 1);
72 } else if (IsPyStringTypeChar(ch
, allowed
)) {
73 if (chNext
== 'r' || chNext
== 'R')
77 ch
= styler
.SafeGetCharAt(i
);
78 chNext
= styler
.SafeGetCharAt(i
+ 1);
81 if (ch
!= '"' && ch
!= '\'') {
86 if (ch
== chNext
&& ch
== styler
.SafeGetCharAt(i
+ 2)) {
90 return SCE_P_TRIPLEDOUBLE
;
99 return SCE_P_CHARACTER
;
103 static inline bool IsAWordChar(int ch
) {
104 return (ch
< 0x80) && (isalnum(ch
) || ch
== '.' || ch
== '_');
107 static inline bool IsAWordStart(int ch
) {
108 return (ch
< 0x80) && (isalnum(ch
) || ch
== '_');
111 static void ColourisePyDoc(unsigned int startPos
, int length
, int initStyle
,
112 WordList
*keywordlists
[], Accessor
&styler
) {
114 int endPos
= startPos
+ length
;
116 // Backtrack to previous line in case need to fix its tab whinging
117 int lineCurrent
= styler
.GetLine(startPos
);
119 if (lineCurrent
> 0) {
121 // Look for backslash-continued lines
122 while (lineCurrent
> 0) {
123 int eolPos
= styler
.LineStart(lineCurrent
) - 1;
124 int eolStyle
= styler
.StyleAt(eolPos
);
125 if (eolStyle
== SCE_P_STRING
126 || eolStyle
== SCE_P_CHARACTER
127 || eolStyle
== SCE_P_STRINGEOL
) {
133 startPos
= styler
.LineStart(lineCurrent
);
135 initStyle
= startPos
== 0 ? SCE_P_DEFAULT
: styler
.StyleAt(startPos
- 1);
138 WordList
&keywords
= *keywordlists
[0];
139 WordList
&keywords2
= *keywordlists
[1];
141 // property tab.timmy.whinge.level
142 // For Python code, checks whether indenting is consistent.
143 // The default, 0 turns off indentation checking,
144 // 1 checks whether each line is potentially inconsistent with the previous line,
145 // 2 checks whether any space characters occur before a tab character in the indentation,
146 // 3 checks whether any spaces are in the indentation, and
147 // 4 checks for any tab characters in the indentation.
148 // 1 is a good level to use.
149 const int whingeLevel
= styler
.GetPropertyInt("tab.timmy.whinge.level");
151 // property lexer.python.literals.binary
152 // Set to 0 to not recognise Python 3 binary and octal literals: 0b1011 0o712.
153 bool base2or8Literals
= styler
.GetPropertyInt("lexer.python.literals.binary", 1) != 0;
155 // property lexer.python.strings.u
156 // Set to 0 to not recognise Python Unicode literals u"x" as used before Python 3.
157 literalsAllowed allowedLiterals
= (styler
.GetPropertyInt("lexer.python.strings.u", 1)) ? litU
: litNone
;
159 // property lexer.python.strings.b
160 // Set to 0 to not recognise Python 3 bytes literals b"x".
161 if (styler
.GetPropertyInt("lexer.python.strings.b", 1))
162 allowedLiterals
= static_cast<literalsAllowed
>(allowedLiterals
| litB
);
164 // property lexer.python.strings.over.newline
165 // Set to 1 to allow strings to span newline characters.
166 bool stringsOverNewline
= styler
.GetPropertyInt("lexer.python.strings.over.newline") != 0;
168 // property lexer.python.keywords2.no.sub.identifiers
169 // When enabled, it will not style keywords2 items that are used as a sub-identifier.
170 // Example: when set, will not highlight "foo.open" when "open" is a keywords2 item.
171 const bool keywords2NoSubIdentifiers
= styler
.GetPropertyInt("lexer.python.keywords2.no.sub.identifiers") != 0;
173 initStyle
= initStyle
& 31;
174 if (initStyle
== SCE_P_STRINGEOL
) {
175 initStyle
= SCE_P_DEFAULT
;
178 kwType kwLast
= kwOther
;
180 styler
.IndentAmount(lineCurrent
, &spaceFlags
, IsPyComment
);
181 bool base_n_number
= false;
183 StyleContext
sc(startPos
, endPos
- startPos
, initStyle
, styler
);
185 bool indentGood
= true;
186 int startIndicator
= sc
.currentPos
;
187 bool inContinuedString
= false;
189 for (; sc
.More(); sc
.Forward()) {
191 if (sc
.atLineStart
) {
192 styler
.IndentAmount(lineCurrent
, &spaceFlags
, IsPyComment
);
194 if (whingeLevel
== 1) {
195 indentGood
= (spaceFlags
& wsInconsistent
) == 0;
196 } else if (whingeLevel
== 2) {
197 indentGood
= (spaceFlags
& wsSpaceTab
) == 0;
198 } else if (whingeLevel
== 3) {
199 indentGood
= (spaceFlags
& wsSpace
) == 0;
200 } else if (whingeLevel
== 4) {
201 indentGood
= (spaceFlags
& wsTab
) == 0;
204 styler
.IndicatorFill(startIndicator
, sc
.currentPos
, indicatorWhitespace
, 0);
205 startIndicator
= sc
.currentPos
;
210 if ((sc
.state
== SCE_P_DEFAULT
) ||
211 (sc
.state
== SCE_P_TRIPLE
) ||
212 (sc
.state
== SCE_P_TRIPLEDOUBLE
)) {
213 // Perform colourisation of white space and triple quoted strings at end of each line to allow
214 // tab marking to work inside white space and triple quoted strings
215 sc
.SetState(sc
.state
);
218 if ((sc
.state
== SCE_P_STRING
) || (sc
.state
== SCE_P_CHARACTER
)) {
219 if (inContinuedString
|| stringsOverNewline
) {
220 inContinuedString
= false;
222 sc
.ChangeState(SCE_P_STRINGEOL
);
223 sc
.ForwardSetState(SCE_P_DEFAULT
);
230 bool needEOLCheck
= false;
232 // Check for a state end
233 if (sc
.state
== SCE_P_OPERATOR
) {
235 sc
.SetState(SCE_P_DEFAULT
);
236 } else if (sc
.state
== SCE_P_NUMBER
) {
237 if (!IsAWordChar(sc
.ch
) &&
238 !(!base_n_number
&& ((sc
.ch
== '+' || sc
.ch
== '-') && (sc
.chPrev
== 'e' || sc
.chPrev
== 'E')))) {
239 sc
.SetState(SCE_P_DEFAULT
);
241 } else if (sc
.state
== SCE_P_IDENTIFIER
) {
242 if ((sc
.ch
== '.') || (!IsAWordChar(sc
.ch
))) {
244 sc
.GetCurrent(s
, sizeof(s
));
245 int style
= SCE_P_IDENTIFIER
;
246 if ((kwLast
== kwImport
) && (strcmp(s
, "as") == 0)) {
248 } else if (keywords
.InList(s
)) {
250 } else if (kwLast
== kwClass
) {
251 style
= SCE_P_CLASSNAME
;
252 } else if (kwLast
== kwDef
) {
253 style
= SCE_P_DEFNAME
;
254 } else if (kwLast
== kwCDef
|| kwLast
== kwCPDef
) {
255 int pos
= sc
.currentPos
;
256 unsigned char ch
= styler
.SafeGetCharAt(pos
, '\0');
259 style
= SCE_P_DEFNAME
;
261 } else if (ch
== ':') {
262 style
= SCE_P_CLASSNAME
;
264 } else if (ch
== ' ' || ch
== '\t' || ch
== '\n' || ch
== '\r') {
266 ch
= styler
.SafeGetCharAt(pos
, '\0');
271 } else if (keywords2
.InList(s
)) {
272 if (keywords2NoSubIdentifiers
) {
273 // We don't want to highlight keywords2
274 // that are used as a sub-identifier,
275 // i.e. not open in "foo.open".
276 int pos
= styler
.GetStartSegment() - 1;
277 if (pos
< 0 || (styler
.SafeGetCharAt(pos
, '\0') != '.'))
283 sc
.ChangeState(style
);
284 sc
.SetState(SCE_P_DEFAULT
);
285 if (style
== SCE_P_WORD
) {
286 if (0 == strcmp(s
, "class"))
288 else if (0 == strcmp(s
, "def"))
290 else if (0 == strcmp(s
, "import"))
292 else if (0 == strcmp(s
, "cdef"))
294 else if (0 == strcmp(s
, "cpdef"))
296 else if (0 == strcmp(s
, "cimport"))
298 else if (kwLast
!= kwCDef
&& kwLast
!= kwCPDef
)
300 } else if (kwLast
!= kwCDef
&& kwLast
!= kwCPDef
) {
304 } else if ((sc
.state
== SCE_P_COMMENTLINE
) || (sc
.state
== SCE_P_COMMENTBLOCK
)) {
305 if (sc
.ch
== '\r' || sc
.ch
== '\n') {
306 sc
.SetState(SCE_P_DEFAULT
);
308 } else if (sc
.state
== SCE_P_DECORATOR
) {
309 if (!IsAWordChar(sc
.ch
)) {
310 sc
.SetState(SCE_P_DEFAULT
);
312 } else if ((sc
.state
== SCE_P_STRING
) || (sc
.state
== SCE_P_CHARACTER
)) {
314 if ((sc
.chNext
== '\r') && (sc
.GetRelative(2) == '\n')) {
317 if (sc
.chNext
== '\n' || sc
.chNext
== '\r') {
318 inContinuedString
= true;
320 // Don't roll over the newline.
323 } else if ((sc
.state
== SCE_P_STRING
) && (sc
.ch
== '\"')) {
324 sc
.ForwardSetState(SCE_P_DEFAULT
);
326 } else if ((sc
.state
== SCE_P_CHARACTER
) && (sc
.ch
== '\'')) {
327 sc
.ForwardSetState(SCE_P_DEFAULT
);
330 } else if (sc
.state
== SCE_P_TRIPLE
) {
333 } else if (sc
.Match("\'\'\'")) {
336 sc
.ForwardSetState(SCE_P_DEFAULT
);
339 } else if (sc
.state
== SCE_P_TRIPLEDOUBLE
) {
342 } else if (sc
.Match("\"\"\"")) {
345 sc
.ForwardSetState(SCE_P_DEFAULT
);
350 if (!indentGood
&& !IsASpaceOrTab(sc
.ch
)) {
351 styler
.IndicatorFill(startIndicator
, sc
.currentPos
, indicatorWhitespace
, 1);
352 startIndicator
= sc
.currentPos
;
356 // One cdef or cpdef line, clear kwLast only at end of line
357 if ((kwLast
== kwCDef
|| kwLast
== kwCPDef
) && sc
.atLineEnd
) {
361 // State exit code may have moved on to end of line
362 if (needEOLCheck
&& sc
.atLineEnd
) {
364 styler
.IndentAmount(lineCurrent
, &spaceFlags
, IsPyComment
);
369 // Check for a new state starting character
370 if (sc
.state
== SCE_P_DEFAULT
) {
371 if (IsADigit(sc
.ch
) || (sc
.ch
== '.' && IsADigit(sc
.chNext
))) {
372 if (sc
.ch
== '0' && (sc
.chNext
== 'x' || sc
.chNext
== 'X')) {
373 base_n_number
= true;
374 sc
.SetState(SCE_P_NUMBER
);
375 } else if (sc
.ch
== '0' &&
376 (sc
.chNext
== 'o' || sc
.chNext
== 'O' || sc
.chNext
== 'b' || sc
.chNext
== 'B')) {
377 if (base2or8Literals
) {
378 base_n_number
= true;
379 sc
.SetState(SCE_P_NUMBER
);
381 sc
.SetState(SCE_P_NUMBER
);
382 sc
.ForwardSetState(SCE_P_IDENTIFIER
);
385 base_n_number
= false;
386 sc
.SetState(SCE_P_NUMBER
);
388 } else if ((isascii(sc
.ch
) && isoperator(static_cast<char>(sc
.ch
))) || sc
.ch
== '`') {
389 sc
.SetState(SCE_P_OPERATOR
);
390 } else if (sc
.ch
== '#') {
391 sc
.SetState(sc
.chNext
== '#' ? SCE_P_COMMENTBLOCK
: SCE_P_COMMENTLINE
);
392 } else if (sc
.ch
== '@') {
393 sc
.SetState(SCE_P_DECORATOR
);
394 } else if (IsPyStringStart(sc
.ch
, sc
.chNext
, sc
.GetRelative(2), allowedLiterals
)) {
395 unsigned int nextIndex
= 0;
396 sc
.SetState(GetPyStringState(styler
, sc
.currentPos
, &nextIndex
, allowedLiterals
));
397 while (nextIndex
> (sc
.currentPos
+ 1) && sc
.More()) {
400 } else if (IsAWordStart(sc
.ch
)) {
401 sc
.SetState(SCE_P_IDENTIFIER
);
405 styler
.IndicatorFill(startIndicator
, sc
.currentPos
, indicatorWhitespace
, 0);
409 static bool IsCommentLine(int line
, Accessor
&styler
) {
410 int pos
= styler
.LineStart(line
);
411 int eol_pos
= styler
.LineStart(line
+ 1) - 1;
412 for (int i
= pos
; i
< eol_pos
; i
++) {
416 else if (ch
!= ' ' && ch
!= '\t')
422 static bool IsQuoteLine(int line
, Accessor
&styler
) {
423 int style
= styler
.StyleAt(styler
.LineStart(line
)) & 31;
424 return ((style
== SCE_P_TRIPLE
) || (style
== SCE_P_TRIPLEDOUBLE
));
428 static void FoldPyDoc(unsigned int startPos
, int length
, int /*initStyle - unused*/,
429 WordList
*[], Accessor
&styler
) {
430 const int maxPos
= startPos
+ length
;
431 const int maxLines
= (maxPos
== styler
.Length()) ? styler
.GetLine(maxPos
) : styler
.GetLine(maxPos
- 1); // Requested last line
432 const int docLines
= styler
.GetLine(styler
.Length()); // Available last line
434 // property fold.quotes.python
435 // This option enables folding multi-line quoted strings when using the Python lexer.
436 const bool foldQuotes
= styler
.GetPropertyInt("fold.quotes.python") != 0;
438 const bool foldCompact
= styler
.GetPropertyInt("fold.compact") != 0;
440 // Backtrack to previous non-blank line so we can determine indent level
441 // for any white space lines (needed esp. within triple quoted strings)
442 // and so we can fix any preceding fold level (which is why we go back
443 // at least one line in all cases)
445 int lineCurrent
= styler
.GetLine(startPos
);
446 int indentCurrent
= styler
.IndentAmount(lineCurrent
, &spaceFlags
, NULL
);
447 while (lineCurrent
> 0) {
449 indentCurrent
= styler
.IndentAmount(lineCurrent
, &spaceFlags
, NULL
);
450 if (!(indentCurrent
& SC_FOLDLEVELWHITEFLAG
) &&
451 (!IsCommentLine(lineCurrent
, styler
)) &&
452 (!IsQuoteLine(lineCurrent
, styler
)))
455 int indentCurrentLevel
= indentCurrent
& SC_FOLDLEVELNUMBERMASK
;
457 // Set up initial loop state
458 startPos
= styler
.LineStart(lineCurrent
);
459 int prev_state
= SCE_P_DEFAULT
& 31;
460 if (lineCurrent
>= 1)
461 prev_state
= styler
.StyleAt(startPos
- 1) & 31;
462 int prevQuote
= foldQuotes
&& ((prev_state
== SCE_P_TRIPLE
) || (prev_state
== SCE_P_TRIPLEDOUBLE
));
464 // Process all characters to end of requested range or end of any triple quote
465 //that hangs over the end of the range. Cap processing in all cases
466 // to end of document (in case of unclosed quote at end).
467 while ((lineCurrent
<= docLines
) && ((lineCurrent
<= maxLines
) || prevQuote
)) {
470 int lev
= indentCurrent
;
471 int lineNext
= lineCurrent
+ 1;
472 int indentNext
= indentCurrent
;
474 if (lineNext
<= docLines
) {
475 // Information about next line is only available if not at end of document
476 indentNext
= styler
.IndentAmount(lineNext
, &spaceFlags
, NULL
);
477 int lookAtPos
= (styler
.LineStart(lineNext
) == styler
.Length()) ? styler
.Length() - 1 : styler
.LineStart(lineNext
);
478 int style
= styler
.StyleAt(lookAtPos
) & 31;
479 quote
= foldQuotes
&& ((style
== SCE_P_TRIPLE
) || (style
== SCE_P_TRIPLEDOUBLE
));
481 const int quote_start
= (quote
&& !prevQuote
);
482 const int quote_continue
= (quote
&& prevQuote
);
483 if (!quote
|| !prevQuote
)
484 indentCurrentLevel
= indentCurrent
& SC_FOLDLEVELNUMBERMASK
;
486 indentNext
= indentCurrentLevel
;
487 if (indentNext
& SC_FOLDLEVELWHITEFLAG
)
488 indentNext
= SC_FOLDLEVELWHITEFLAG
| indentCurrentLevel
;
491 // Place fold point at start of triple quoted string
492 lev
|= SC_FOLDLEVELHEADERFLAG
;
493 } else if (quote_continue
|| prevQuote
) {
494 // Add level to rest of lines in the string
498 // Skip past any blank lines for next indent level info; we skip also
499 // comments (all comments, not just those starting in column 0)
500 // which effectively folds them into surrounding code rather
501 // than screwing up folding.
504 (lineNext
< docLines
) &&
505 ((indentNext
& SC_FOLDLEVELWHITEFLAG
) ||
506 (lineNext
<= docLines
&& IsCommentLine(lineNext
, styler
)))) {
509 indentNext
= styler
.IndentAmount(lineNext
, &spaceFlags
, NULL
);
512 const int levelAfterComments
= indentNext
& SC_FOLDLEVELNUMBERMASK
;
513 const int levelBeforeComments
= Maximum(indentCurrentLevel
,levelAfterComments
);
515 // Now set all the indent levels on the lines we skipped
516 // Do this from end to start. Once we encounter one line
517 // which is indented more than the line after the end of
518 // the comment-block, use the level of the block before
520 int skipLine
= lineNext
;
521 int skipLevel
= levelAfterComments
;
523 while (--skipLine
> lineCurrent
) {
524 int skipLineIndent
= styler
.IndentAmount(skipLine
, &spaceFlags
, NULL
);
527 if ((skipLineIndent
& SC_FOLDLEVELNUMBERMASK
) > levelAfterComments
)
528 skipLevel
= levelBeforeComments
;
530 int whiteFlag
= skipLineIndent
& SC_FOLDLEVELWHITEFLAG
;
532 styler
.SetLevel(skipLine
, skipLevel
| whiteFlag
);
534 if ((skipLineIndent
& SC_FOLDLEVELNUMBERMASK
) > levelAfterComments
&&
535 !(skipLineIndent
& SC_FOLDLEVELWHITEFLAG
) &&
536 !IsCommentLine(skipLine
, styler
))
537 skipLevel
= levelBeforeComments
;
539 styler
.SetLevel(skipLine
, skipLevel
);
543 // Set fold header on non-quote line
544 if (!quote
&& !(indentCurrent
& SC_FOLDLEVELWHITEFLAG
)) {
545 if ((indentCurrent
& SC_FOLDLEVELNUMBERMASK
) < (indentNext
& SC_FOLDLEVELNUMBERMASK
))
546 lev
|= SC_FOLDLEVELHEADERFLAG
;
549 // Keep track of triple quote state of previous line
552 // Set fold level for this line and move to next line
553 styler
.SetLevel(lineCurrent
, foldCompact
? lev
: lev
& ~SC_FOLDLEVELWHITEFLAG
);
554 indentCurrent
= indentNext
;
555 lineCurrent
= lineNext
;
558 // NOTE: Cannot set level of last line here because indentCurrent doesn't have
559 // header flag set; the loop above is crafted to take care of this case!
560 //styler.SetLevel(lineCurrent, indentCurrent);
563 static const char *const pythonWordListDesc
[] = {
565 "Highlighted identifiers",
569 LexerModule
lmPython(SCLEX_PYTHON
, ColourisePyDoc
, "python", FoldPyDoc
,