1 // Scintilla source code edit control
2 /** @file LexPython.cxx
5 // Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
18 #include "StyleContext.h"
20 #include "Scintilla.h"
24 using namespace Scintilla
;
27 /* kwCDef, kwCTypeName only used for Cython */
28 enum kwType
{ kwOther
, kwClass
, kwDef
, kwImport
, kwCDef
, kwCTypeName
};
30 static const int indicatorWhitespace
= 1;
32 static bool IsPyComment(Accessor
&styler
, int pos
, int len
) {
33 return len
> 0 && styler
[pos
] == '#';
36 enum literalsAllowed
{ litNone
=0, litU
=1, litB
=2};
38 static bool IsPyStringTypeChar(int ch
, literalsAllowed allowed
) {
40 ((allowed
& litB
) && (ch
== 'b' || ch
== 'B')) ||
41 ((allowed
& litU
) && (ch
== 'u' || ch
== 'U'));
44 static bool IsPyStringStart(int ch
, int chNext
, int chNext2
, literalsAllowed allowed
) {
45 if (ch
== '\'' || ch
== '"')
47 if (IsPyStringTypeChar(ch
, allowed
)) {
48 if (chNext
== '"' || chNext
== '\'')
50 if ((chNext
== 'r' || chNext
== 'R') && (chNext2
== '"' || chNext2
== '\''))
53 if ((ch
== 'r' || ch
== 'R') && (chNext
== '"' || chNext
== '\''))
59 /* Return the state to use for the string starting at i; *nextIndex will be set to the first index following the quote(s) */
60 static int GetPyStringState(Accessor
&styler
, int i
, unsigned int *nextIndex
, literalsAllowed allowed
) {
61 char ch
= styler
.SafeGetCharAt(i
);
62 char chNext
= styler
.SafeGetCharAt(i
+ 1);
64 // Advance beyond r, u, or ur prefix (or r, b, or br in Python 3.0), but bail if there are any unexpected chars
65 if (ch
== 'r' || ch
== 'R') {
67 ch
= styler
.SafeGetCharAt(i
);
68 chNext
= styler
.SafeGetCharAt(i
+ 1);
69 } else if (IsPyStringTypeChar(ch
, allowed
)) {
70 if (chNext
== 'r' || chNext
== 'R')
74 ch
= styler
.SafeGetCharAt(i
);
75 chNext
= styler
.SafeGetCharAt(i
+ 1);
78 if (ch
!= '"' && ch
!= '\'') {
83 if (ch
== chNext
&& ch
== styler
.SafeGetCharAt(i
+ 2)) {
87 return SCE_P_TRIPLEDOUBLE
;
96 return SCE_P_CHARACTER
;
100 static inline bool IsAWordChar(int ch
) {
101 return (ch
< 0x80) && (isalnum(ch
) || ch
== '.' || ch
== '_');
104 static inline bool IsAWordStart(int ch
) {
105 return (ch
< 0x80) && (isalnum(ch
) || ch
== '_');
108 static void ColourisePyDoc(unsigned int startPos
, int length
, int initStyle
,
109 WordList
*keywordlists
[], Accessor
&styler
) {
111 int endPos
= startPos
+ length
;
113 // Backtrack to previous line in case need to fix its tab whinging
114 int lineCurrent
= styler
.GetLine(startPos
);
116 if (lineCurrent
> 0) {
118 // Look for backslash-continued lines
119 while (lineCurrent
> 0) {
120 int eolPos
= styler
.LineStart(lineCurrent
) - 1;
121 int eolStyle
= styler
.StyleAt(eolPos
);
122 if (eolStyle
== SCE_P_STRING
123 || eolStyle
== SCE_P_CHARACTER
124 || eolStyle
== SCE_P_STRINGEOL
) {
130 startPos
= styler
.LineStart(lineCurrent
);
132 initStyle
= startPos
== 0 ? SCE_P_DEFAULT
: styler
.StyleAt(startPos
- 1);
135 WordList
&keywords
= *keywordlists
[0];
136 WordList
&keywords2
= *keywordlists
[1];
138 // property tab.timmy.whinge.level
139 // For Python code, checks whether indenting is consistent.
140 // The default, 0 turns off indentation checking,
141 // 1 checks whether each line is potentially inconsistent with the previous line,
142 // 2 checks whether any space characters occur before a tab character in the indentation,
143 // 3 checks whether any spaces are in the indentation, and
144 // 4 checks for any tab characters in the indentation.
145 // 1 is a good level to use.
146 const int whingeLevel
= styler
.GetPropertyInt("tab.timmy.whinge.level");
148 // property lexer.python.literals.binary
149 // Set to 0 to not recognise Python 3 binary and octal literals: 0b1011 0o712.
150 bool base2or8Literals
= styler
.GetPropertyInt("lexer.python.literals.binary", 1) != 0;
152 // property lexer.python.strings.u
153 // Set to 0 to not recognise Python Unicode literals u"x" as used before Python 3.
154 literalsAllowed allowedLiterals
= (styler
.GetPropertyInt("lexer.python.strings.u", 1)) ? litU
: litNone
;
156 // property lexer.python.strings.b
157 // Set to 0 to not recognise Python 3 bytes literals b"x".
158 if (styler
.GetPropertyInt("lexer.python.strings.b", 1))
159 allowedLiterals
= static_cast<literalsAllowed
>(allowedLiterals
| litB
);
161 // property lexer.python.strings.over.newline
162 // Set to 1 to allow strings to span newline characters.
163 bool stringsOverNewline
= styler
.GetPropertyInt("lexer.python.strings.over.newline") != 0;
165 initStyle
= initStyle
& 31;
166 if (initStyle
== SCE_P_STRINGEOL
) {
167 initStyle
= SCE_P_DEFAULT
;
170 kwType kwLast
= kwOther
;
172 styler
.IndentAmount(lineCurrent
, &spaceFlags
, IsPyComment
);
173 bool base_n_number
= false;
175 StyleContext
sc(startPos
, endPos
- startPos
, initStyle
, styler
);
177 bool indentGood
= true;
178 int startIndicator
= sc
.currentPos
;
179 bool inContinuedString
= false;
181 for (; sc
.More(); sc
.Forward()) {
183 if (sc
.atLineStart
) {
184 styler
.IndentAmount(lineCurrent
, &spaceFlags
, IsPyComment
);
186 if (whingeLevel
== 1) {
187 indentGood
= (spaceFlags
& wsInconsistent
) == 0;
188 } else if (whingeLevel
== 2) {
189 indentGood
= (spaceFlags
& wsSpaceTab
) == 0;
190 } else if (whingeLevel
== 3) {
191 indentGood
= (spaceFlags
& wsSpace
) == 0;
192 } else if (whingeLevel
== 4) {
193 indentGood
= (spaceFlags
& wsTab
) == 0;
196 styler
.IndicatorFill(startIndicator
, sc
.currentPos
, indicatorWhitespace
, 0);
197 startIndicator
= sc
.currentPos
;
202 if ((sc
.state
== SCE_P_DEFAULT
) ||
203 (sc
.state
== SCE_P_TRIPLE
) ||
204 (sc
.state
== SCE_P_TRIPLEDOUBLE
)) {
205 // Perform colourisation of white space and triple quoted strings at end of each line to allow
206 // tab marking to work inside white space and triple quoted strings
207 sc
.SetState(sc
.state
);
210 if ((sc
.state
== SCE_P_STRING
) || (sc
.state
== SCE_P_CHARACTER
)) {
211 if (inContinuedString
|| stringsOverNewline
) {
212 inContinuedString
= false;
214 sc
.ChangeState(SCE_P_STRINGEOL
);
215 sc
.ForwardSetState(SCE_P_DEFAULT
);
222 bool needEOLCheck
= false;
224 // Check for a state end
225 if (sc
.state
== SCE_P_OPERATOR
) {
227 sc
.SetState(SCE_P_DEFAULT
);
228 } else if (sc
.state
== SCE_P_NUMBER
) {
229 if (!IsAWordChar(sc
.ch
) &&
230 !(!base_n_number
&& ((sc
.ch
== '+' || sc
.ch
== '-') && (sc
.chPrev
== 'e' || sc
.chPrev
== 'E')))) {
231 sc
.SetState(SCE_P_DEFAULT
);
233 } else if (sc
.state
== SCE_P_IDENTIFIER
) {
234 if ((sc
.ch
== '.') || (!IsAWordChar(sc
.ch
))) {
236 sc
.GetCurrent(s
, sizeof(s
));
237 int style
= SCE_P_IDENTIFIER
;
238 if ((kwLast
== kwImport
) && (strcmp(s
, "as") == 0)) {
240 } else if (keywords
.InList(s
)) {
242 } else if (kwLast
== kwClass
) {
243 style
= SCE_P_CLASSNAME
;
244 } else if (kwLast
== kwDef
) {
245 style
= SCE_P_DEFNAME
;
246 } else if (kwLast
== kwCDef
) {
247 int pos
= sc
.currentPos
;
248 unsigned char ch
= styler
.SafeGetCharAt(pos
, '\0');
251 style
= SCE_P_DEFNAME
;
253 } else if (ch
== ':') {
254 style
= SCE_P_CLASSNAME
;
256 } else if (ch
== ' ' || ch
== '\t' || ch
== '\n' || ch
== '\r') {
258 ch
= styler
.SafeGetCharAt(pos
, '\0');
263 } else if (keywords2
.InList(s
)) {
266 sc
.ChangeState(style
);
267 sc
.SetState(SCE_P_DEFAULT
);
268 if (style
== SCE_P_WORD
) {
269 if (0 == strcmp(s
, "class"))
271 else if (0 == strcmp(s
, "def"))
273 else if (0 == strcmp(s
, "import"))
275 else if (0 == strcmp(s
, "cdef"))
277 else if (0 == strcmp(s
, "cimport"))
279 else if (kwLast
!= kwCDef
)
281 } else if (kwLast
!= kwCDef
) {
285 } else if ((sc
.state
== SCE_P_COMMENTLINE
) || (sc
.state
== SCE_P_COMMENTBLOCK
)) {
286 if (sc
.ch
== '\r' || sc
.ch
== '\n') {
287 sc
.SetState(SCE_P_DEFAULT
);
289 } else if (sc
.state
== SCE_P_DECORATOR
) {
290 if (!IsAWordChar(sc
.ch
)) {
291 sc
.SetState(SCE_P_DEFAULT
);
293 } else if ((sc
.state
== SCE_P_STRING
) || (sc
.state
== SCE_P_CHARACTER
)) {
295 if ((sc
.chNext
== '\r') && (sc
.GetRelative(2) == '\n')) {
298 if (sc
.chNext
== '\n' || sc
.chNext
== '\r') {
299 inContinuedString
= true;
301 // Don't roll over the newline.
304 } else if ((sc
.state
== SCE_P_STRING
) && (sc
.ch
== '\"')) {
305 sc
.ForwardSetState(SCE_P_DEFAULT
);
307 } else if ((sc
.state
== SCE_P_CHARACTER
) && (sc
.ch
== '\'')) {
308 sc
.ForwardSetState(SCE_P_DEFAULT
);
311 } else if (sc
.state
== SCE_P_TRIPLE
) {
314 } else if (sc
.Match("\'\'\'")) {
317 sc
.ForwardSetState(SCE_P_DEFAULT
);
320 } else if (sc
.state
== SCE_P_TRIPLEDOUBLE
) {
323 } else if (sc
.Match("\"\"\"")) {
326 sc
.ForwardSetState(SCE_P_DEFAULT
);
331 if (!indentGood
&& !IsASpaceOrTab(sc
.ch
)) {
332 styler
.IndicatorFill(startIndicator
, sc
.currentPos
, indicatorWhitespace
, 1);
333 startIndicator
= sc
.currentPos
;
337 // One cdef line, clear kwLast only at end of line
338 if (kwLast
== kwCDef
&& sc
.atLineEnd
) {
342 // State exit code may have moved on to end of line
343 if (needEOLCheck
&& sc
.atLineEnd
) {
345 styler
.IndentAmount(lineCurrent
, &spaceFlags
, IsPyComment
);
350 // Check for a new state starting character
351 if (sc
.state
== SCE_P_DEFAULT
) {
352 if (IsADigit(sc
.ch
) || (sc
.ch
== '.' && IsADigit(sc
.chNext
))) {
353 if (sc
.ch
== '0' && (sc
.chNext
== 'x' || sc
.chNext
== 'X')) {
354 base_n_number
= true;
355 sc
.SetState(SCE_P_NUMBER
);
356 } else if (sc
.ch
== '0' &&
357 (sc
.chNext
== 'o' || sc
.chNext
== 'O' || sc
.chNext
== 'b' || sc
.chNext
== 'B')) {
358 if (base2or8Literals
) {
359 base_n_number
= true;
360 sc
.SetState(SCE_P_NUMBER
);
362 sc
.SetState(SCE_P_NUMBER
);
363 sc
.ForwardSetState(SCE_P_IDENTIFIER
);
366 base_n_number
= false;
367 sc
.SetState(SCE_P_NUMBER
);
369 } else if ((isascii(sc
.ch
) && isoperator(static_cast<char>(sc
.ch
))) || sc
.ch
== '`') {
370 sc
.SetState(SCE_P_OPERATOR
);
371 } else if (sc
.ch
== '#') {
372 sc
.SetState(sc
.chNext
== '#' ? SCE_P_COMMENTBLOCK
: SCE_P_COMMENTLINE
);
373 } else if (sc
.ch
== '@') {
374 sc
.SetState(SCE_P_DECORATOR
);
375 } else if (IsPyStringStart(sc
.ch
, sc
.chNext
, sc
.GetRelative(2), allowedLiterals
)) {
376 unsigned int nextIndex
= 0;
377 sc
.SetState(GetPyStringState(styler
, sc
.currentPos
, &nextIndex
, allowedLiterals
));
378 while (nextIndex
> (sc
.currentPos
+ 1) && sc
.More()) {
381 } else if (IsAWordStart(sc
.ch
)) {
382 sc
.SetState(SCE_P_IDENTIFIER
);
386 styler
.IndicatorFill(startIndicator
, sc
.currentPos
, indicatorWhitespace
, 0);
390 static bool IsCommentLine(int line
, Accessor
&styler
) {
391 int pos
= styler
.LineStart(line
);
392 int eol_pos
= styler
.LineStart(line
+ 1) - 1;
393 for (int i
= pos
; i
< eol_pos
; i
++) {
397 else if (ch
!= ' ' && ch
!= '\t')
403 static bool IsQuoteLine(int line
, Accessor
&styler
) {
404 int style
= styler
.StyleAt(styler
.LineStart(line
)) & 31;
405 return ((style
== SCE_P_TRIPLE
) || (style
== SCE_P_TRIPLEDOUBLE
));
409 static void FoldPyDoc(unsigned int startPos
, int length
, int /*initStyle - unused*/,
410 WordList
*[], Accessor
&styler
) {
411 const int maxPos
= startPos
+ length
;
412 const int maxLines
= styler
.GetLine(maxPos
- 1); // Requested last line
413 const int docLines
= styler
.GetLine(styler
.Length() - 1); // Available last line
415 // property fold.comment.python
416 // This option enables folding multi-line comments when using the Python lexer.
417 const bool foldComment
= styler
.GetPropertyInt("fold.comment.python") != 0;
419 // property fold.quotes.python
420 // This option enables folding multi-line quoted strings when using the Python lexer.
421 const bool foldQuotes
= styler
.GetPropertyInt("fold.quotes.python") != 0;
423 const bool foldCompact
= styler
.GetPropertyInt("fold.compact") != 0;
425 // Backtrack to previous non-blank line so we can determine indent level
426 // for any white space lines (needed esp. within triple quoted strings)
427 // and so we can fix any preceding fold level (which is why we go back
428 // at least one line in all cases)
430 int lineCurrent
= styler
.GetLine(startPos
);
431 int indentCurrent
= styler
.IndentAmount(lineCurrent
, &spaceFlags
, NULL
);
432 while (lineCurrent
> 0) {
434 indentCurrent
= styler
.IndentAmount(lineCurrent
, &spaceFlags
, NULL
);
435 if (!(indentCurrent
& SC_FOLDLEVELWHITEFLAG
) &&
436 (!IsCommentLine(lineCurrent
, styler
)) &&
437 (!IsQuoteLine(lineCurrent
, styler
)))
440 int indentCurrentLevel
= indentCurrent
& SC_FOLDLEVELNUMBERMASK
;
442 // Set up initial loop state
443 startPos
= styler
.LineStart(lineCurrent
);
444 int prev_state
= SCE_P_DEFAULT
& 31;
445 if (lineCurrent
>= 1)
446 prev_state
= styler
.StyleAt(startPos
- 1) & 31;
447 int prevQuote
= foldQuotes
&& ((prev_state
== SCE_P_TRIPLE
) || (prev_state
== SCE_P_TRIPLEDOUBLE
));
449 if (lineCurrent
>= 1)
450 prevComment
= foldComment
&& IsCommentLine(lineCurrent
- 1, styler
);
452 // Process all characters to end of requested range or end of any triple quote
453 // or comment that hangs over the end of the range. Cap processing in all cases
454 // to end of document (in case of unclosed quote or comment at end).
455 while ((lineCurrent
<= docLines
) && ((lineCurrent
<= maxLines
) || prevQuote
|| prevComment
)) {
458 int lev
= indentCurrent
;
459 int lineNext
= lineCurrent
+ 1;
460 int indentNext
= indentCurrent
;
462 if (lineNext
<= docLines
) {
463 // Information about next line is only available if not at end of document
464 indentNext
= styler
.IndentAmount(lineNext
, &spaceFlags
, NULL
);
465 int style
= styler
.StyleAt(styler
.LineStart(lineNext
)) & 31;
466 quote
= foldQuotes
&& ((style
== SCE_P_TRIPLE
) || (style
== SCE_P_TRIPLEDOUBLE
));
468 const int quote_start
= (quote
&& !prevQuote
);
469 const int quote_continue
= (quote
&& prevQuote
);
470 const int comment
= foldComment
&& IsCommentLine(lineCurrent
, styler
);
471 const int comment_start
= (comment
&& !prevComment
&& (lineNext
<= docLines
) &&
472 IsCommentLine(lineNext
, styler
) && (lev
> SC_FOLDLEVELBASE
));
473 const int comment_continue
= (comment
&& prevComment
);
474 if ((!quote
|| !prevQuote
) && !comment
)
475 indentCurrentLevel
= indentCurrent
& SC_FOLDLEVELNUMBERMASK
;
477 indentNext
= indentCurrentLevel
;
478 if (indentNext
& SC_FOLDLEVELWHITEFLAG
)
479 indentNext
= SC_FOLDLEVELWHITEFLAG
| indentCurrentLevel
;
482 // Place fold point at start of triple quoted string
483 lev
|= SC_FOLDLEVELHEADERFLAG
;
484 } else if (quote_continue
|| prevQuote
) {
485 // Add level to rest of lines in the string
487 } else if (comment_start
) {
488 // Place fold point at start of a block of comments
489 lev
|= SC_FOLDLEVELHEADERFLAG
;
490 } else if (comment_continue
) {
491 // Add level to rest of lines in the block
495 // Skip past any blank lines for next indent level info; we skip also
496 // comments (all comments, not just those starting in column 0)
497 // which effectively folds them into surrounding code rather
498 // than screwing up folding.
501 (lineNext
< docLines
) &&
502 ((indentNext
& SC_FOLDLEVELWHITEFLAG
) ||
503 (lineNext
<= docLines
&& IsCommentLine(lineNext
, styler
)))) {
506 indentNext
= styler
.IndentAmount(lineNext
, &spaceFlags
, NULL
);
509 const int levelAfterComments
= indentNext
& SC_FOLDLEVELNUMBERMASK
;
510 const int levelBeforeComments
= Platform::Maximum(indentCurrentLevel
,levelAfterComments
);
512 // Now set all the indent levels on the lines we skipped
513 // Do this from end to start. Once we encounter one line
514 // which is indented more than the line after the end of
515 // the comment-block, use the level of the block before
517 int skipLine
= lineNext
;
518 int skipLevel
= levelAfterComments
;
520 while (--skipLine
> lineCurrent
) {
521 int skipLineIndent
= styler
.IndentAmount(skipLine
, &spaceFlags
, NULL
);
524 if ((skipLineIndent
& SC_FOLDLEVELNUMBERMASK
) > levelAfterComments
)
525 skipLevel
= levelBeforeComments
;
527 int whiteFlag
= skipLineIndent
& SC_FOLDLEVELWHITEFLAG
;
529 styler
.SetLevel(skipLine
, skipLevel
| whiteFlag
);
531 if ((skipLineIndent
& SC_FOLDLEVELNUMBERMASK
) > levelAfterComments
&&
532 !(skipLineIndent
& SC_FOLDLEVELWHITEFLAG
) &&
533 !IsCommentLine(skipLine
, styler
))
534 skipLevel
= levelBeforeComments
;
536 styler
.SetLevel(skipLine
, skipLevel
);
540 // Set fold header on non-quote/non-comment line
541 if (!quote
&& !comment
&& !(indentCurrent
& SC_FOLDLEVELWHITEFLAG
) ) {
542 if ((indentCurrent
& SC_FOLDLEVELNUMBERMASK
) < (indentNext
& SC_FOLDLEVELNUMBERMASK
))
543 lev
|= SC_FOLDLEVELHEADERFLAG
;
546 // Keep track of triple quote and block comment state of previous line
548 prevComment
= comment_start
|| comment_continue
;
550 // Set fold level for this line and move to next line
551 styler
.SetLevel(lineCurrent
, lev
);
552 indentCurrent
= indentNext
;
553 lineCurrent
= lineNext
;
556 // NOTE: Cannot set level of last line here because indentCurrent doesn't have
557 // header flag set; the loop above is crafted to take care of this case!
558 //styler.SetLevel(lineCurrent, indentCurrent);
561 static const char * const pythonWordListDesc
[] = {
563 "Highlighted identifiers",
567 LexerModule
lmPython(SCLEX_PYTHON
, ColourisePyDoc
, "python", FoldPyDoc
,