]> git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/src/LexPython.cxx
6669694d1d6451d9c0c1d56ef6b636359ffb0b52
[wxWidgets.git] / src / stc / scintilla / src / LexPython.cxx
1 // Scintilla source code edit control
2 /** @file LexPython.cxx
3 ** Lexer for Python.
4 **/
5 // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include <stdio.h>
12 #include <stdarg.h>
13
14 #include "Platform.h"
15
16 #include "PropSet.h"
17 #include "Accessor.h"
18 #include "KeyWords.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21
22 /* Returns true if the "as" word that begins at start follows an import statement */
23 static bool IsImportAs(unsigned int start, Accessor &styler) {
24 unsigned int i;
25 unsigned int j;
26 char s[10];
27
28 /* Find any import before start but after any statement terminator or quote */
29 i = start;
30 while (i > 0) {
31 char ch = styler[i - 1];
32
33 if (ch == '\n' || ch == '\r' || ch == ';' || ch == '\'' || ch == '"' || ch == '`')
34 break;
35 if (ch == 't' && i > 5) {
36 for (j = 0; j < 6; j++)
37 s[j] = styler[(i - 6) + j];
38 s[j] = '\0';
39 if (strcmp(s, "import") == 0)
40 return true;
41 }
42 i--;
43 }
44
45 return false;
46 }
47
48 static void ClassifyWordPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord) {
49 char s[100];
50 bool wordIsNumber = isdigit(styler[start]);
51 for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) {
52 s[i] = styler[start + i];
53 s[i + 1] = '\0';
54 }
55 char chAttr = SCE_P_IDENTIFIER;
56 if (0 == strcmp(prevWord, "class"))
57 chAttr = SCE_P_CLASSNAME;
58 else if (0 == strcmp(prevWord, "def"))
59 chAttr = SCE_P_DEFNAME;
60 else if (wordIsNumber)
61 chAttr = SCE_P_NUMBER;
62 else if (keywords.InList(s))
63 chAttr = SCE_P_WORD;
64 else if (strcmp(s, "as") == 0 && IsImportAs(start, styler))
65 chAttr = SCE_P_WORD;
66 // make sure that dot-qualifiers inside the word are lexed correct
67 else for (unsigned int i = 0; i < end - start + 1; i++) {
68 if (styler[start + i] == '.') {
69 styler.ColourTo(start + i - 1, chAttr);
70 styler.ColourTo(start + i, SCE_P_OPERATOR);
71 }
72 }
73 styler.ColourTo(end, chAttr);
74 strcpy(prevWord, s);
75 }
76
77 static bool IsPyComment(Accessor &styler, int pos, int len) {
78 return len>0 && styler[pos]=='#';
79 }
80
81 static bool IsPyStringStart(char ch, char chNext, char chNext2) {
82 if (ch == '\'' || ch == '"')
83 return true;
84 if (ch == 'u' || ch == 'U') {
85 if (chNext == '"' || chNext == '\'')
86 return true;
87 if ((chNext == 'r' || chNext == 'R') && (chNext2 == '"' || chNext2 == '\''))
88 return true;
89 }
90 if ((ch == 'r' || ch == 'R') && (chNext == '"' || chNext == '\''))
91 return true;
92
93 return false;
94 }
95
96 static bool IsPyWordStart(char ch, char chNext, char chNext2) {
97 return (iswordchar(ch) && !IsPyStringStart(ch, chNext, chNext2));
98 }
99
100 /* Return the state to use for the string starting at i; *nextIndex will be set to the first index following the quote(s) */
101 static int GetPyStringState(Accessor &styler, int i, int *nextIndex) {
102 char ch = styler.SafeGetCharAt(i);
103 char chNext = styler.SafeGetCharAt(i + 1);
104
105 // Advance beyond r, u, or ur prefix, but bail if there are any unexpected chars
106 if (ch == 'r' || ch == 'R') {
107 i++;
108 ch = styler.SafeGetCharAt(i);
109 chNext = styler.SafeGetCharAt(i + 1);
110 }
111 else if (ch == 'u' || ch == 'U') {
112 if (chNext == 'r' || chNext == 'R')
113 i += 2;
114 else
115 i += 1;
116 ch = styler.SafeGetCharAt(i);
117 chNext = styler.SafeGetCharAt(i + 1);
118 }
119
120 if (ch != '"' && ch != '\'') {
121 *nextIndex = i + 1;
122 return SCE_P_DEFAULT;
123 }
124
125 if (ch == chNext && ch == styler.SafeGetCharAt(i + 2)) {
126 *nextIndex = i + 3;
127
128 if (ch == '"')
129 return SCE_P_TRIPLEDOUBLE;
130 else
131 return SCE_P_TRIPLE;
132 } else {
133 *nextIndex = i + 1;
134
135 if (ch == '"')
136 return SCE_P_STRING;
137 else
138 return SCE_P_CHARACTER;
139 }
140 }
141
142 static void ColourisePyDoc(unsigned int startPos, int length, int initStyle,
143 WordList *keywordlists[], Accessor &styler) {
144
145 int lengthDoc = startPos + length;
146
147 // Backtrack to previous line in case need to fix its tab whinging
148 int lineCurrent = styler.GetLine(startPos);
149 if (startPos > 0) {
150 if (lineCurrent > 0) {
151 startPos = styler.LineStart(lineCurrent-1);
152 if (startPos == 0)
153 initStyle = SCE_P_DEFAULT;
154 else
155 initStyle = styler.StyleAt(startPos-1);
156 }
157 }
158
159 // Python uses a different mask because bad indentation is marked by oring with 32
160 styler.StartAt(startPos, 127);
161
162 WordList &keywords = *keywordlists[0];
163
164 int whingeLevel = styler.GetPropertyInt("tab.timmy.whinge.level");
165 char prevWord[200];
166 prevWord[0] = '\0';
167 if (length == 0)
168 return ;
169
170 int state = initStyle & 31;
171
172 int nextIndex = 0;
173 char chPrev = ' ';
174 char chPrev2 = ' ';
175 char chNext = styler[startPos];
176 styler.StartSegment(startPos);
177 bool atStartLine = true;
178 int spaceFlags = 0;
179 styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment);
180 for (int i = startPos; i < lengthDoc; i++) {
181
182 if (atStartLine) {
183 char chBad = static_cast<char>(64);
184 char chGood = static_cast<char>(0);
185 char chFlags = chGood;
186 if (whingeLevel == 1) {
187 chFlags = (spaceFlags & wsInconsistent) ? chBad : chGood;
188 } else if (whingeLevel == 2) {
189 chFlags = (spaceFlags & wsSpaceTab) ? chBad : chGood;
190 } else if (whingeLevel == 3) {
191 chFlags = (spaceFlags & wsSpace) ? chBad : chGood;
192 } else if (whingeLevel == 4) {
193 chFlags = (spaceFlags & wsTab) ? chBad : chGood;
194 }
195 styler.SetFlags(chFlags, static_cast<char>(state));
196 atStartLine = false;
197 }
198
199 char ch = chNext;
200 chNext = styler.SafeGetCharAt(i + 1);
201 char chNext2 = styler.SafeGetCharAt(i + 2);
202
203 if ((ch == '\r' && chNext != '\n') || (ch == '\n') || (i == lengthDoc)) {
204 if ((state == SCE_P_DEFAULT) || (state == SCE_P_TRIPLE) || (state == SCE_P_TRIPLEDOUBLE)) {
205 // Perform colourisation of white space and triple quoted strings at end of each line to allow
206 // tab marking to work inside white space and triple quoted strings
207 styler.ColourTo(i, state);
208 }
209 lineCurrent++;
210 styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment);
211 atStartLine = true;
212 }
213
214 if (styler.IsLeadByte(ch)) {
215 chNext = styler.SafeGetCharAt(i + 2);
216 chPrev = ' ';
217 chPrev2 = ' ';
218 i += 1;
219 continue;
220 }
221
222 if (state == SCE_P_STRINGEOL) {
223 if (ch != '\r' && ch != '\n') {
224 styler.ColourTo(i - 1, state);
225 state = SCE_P_DEFAULT;
226 }
227 }
228 if (state == SCE_P_DEFAULT) {
229 if (IsPyWordStart(ch, chNext, chNext2)) {
230 styler.ColourTo(i - 1, state);
231 state = SCE_P_WORD;
232 } else if (ch == '#') {
233 styler.ColourTo(i - 1, state);
234 state = chNext == '#' ? SCE_P_COMMENTBLOCK : SCE_P_COMMENTLINE;
235 } else if (IsPyStringStart(ch, chNext, chNext2)) {
236 styler.ColourTo(i - 1, state);
237 state = GetPyStringState(styler, i, &nextIndex);
238 if (nextIndex != i + 1) {
239 i = nextIndex - 1;
240 ch = ' ';
241 chPrev = ' ';
242 chNext = styler.SafeGetCharAt(i + 1);
243 }
244 } else if (isoperator(ch)) {
245 styler.ColourTo(i - 1, state);
246 styler.ColourTo(i, SCE_P_OPERATOR);
247 }
248 } else if (state == SCE_P_WORD) {
249 if (!iswordchar(ch)) {
250 ClassifyWordPy(styler.GetStartSegment(), i - 1, keywords, styler, prevWord);
251 state = SCE_P_DEFAULT;
252 if (ch == '#') {
253 state = chNext == '#' ? SCE_P_COMMENTBLOCK : SCE_P_COMMENTLINE;
254 } else if (IsPyStringStart(ch, chNext, chNext2)) {
255 styler.ColourTo(i - 1, state);
256 state = GetPyStringState(styler, i, &nextIndex);
257 if (nextIndex != i + 1) {
258 i = nextIndex - 1;
259 ch = ' ';
260 chPrev = ' ';
261 chNext = styler.SafeGetCharAt(i + 1);
262 }
263 } else if (isoperator(ch)) {
264 styler.ColourTo(i, SCE_P_OPERATOR);
265 }
266 }
267 } else {
268 if (state == SCE_P_COMMENTLINE || state == SCE_P_COMMENTBLOCK) {
269 if (ch == '\r' || ch == '\n') {
270 styler.ColourTo(i - 1, state);
271 state = SCE_P_DEFAULT;
272 }
273 } else if (state == SCE_P_STRING) {
274 if ((ch == '\r' || ch == '\n') && (chPrev != '\\')) {
275 styler.ColourTo(i - 1, state);
276 state = SCE_P_STRINGEOL;
277 } else if (ch == '\\') {
278 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
279 i++;
280 ch = chNext;
281 chNext = styler.SafeGetCharAt(i + 1);
282 }
283 } else if (ch == '\"') {
284 styler.ColourTo(i, state);
285 state = SCE_P_DEFAULT;
286 }
287 } else if (state == SCE_P_CHARACTER) {
288 if ((ch == '\r' || ch == '\n') && (chPrev != '\\')) {
289 styler.ColourTo(i - 1, state);
290 state = SCE_P_STRINGEOL;
291 } else if (ch == '\\') {
292 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
293 i++;
294 ch = chNext;
295 chNext = styler.SafeGetCharAt(i + 1);
296 }
297 } else if (ch == '\'') {
298 styler.ColourTo(i, state);
299 state = SCE_P_DEFAULT;
300 }
301 } else if (state == SCE_P_TRIPLE) {
302 if (ch == '\'' && chPrev == '\'' && chPrev2 == '\'') {
303 styler.ColourTo(i, state);
304 state = SCE_P_DEFAULT;
305 }
306 } else if (state == SCE_P_TRIPLEDOUBLE) {
307 if (ch == '\"' && chPrev == '\"' && chPrev2 == '\"') {
308 styler.ColourTo(i, state);
309 state = SCE_P_DEFAULT;
310 }
311 }
312 }
313 chPrev2 = chPrev;
314 chPrev = ch;
315 }
316 if (state == SCE_P_WORD) {
317 ClassifyWordPy(styler.GetStartSegment(), lengthDoc, keywords, styler, prevWord);
318 } else {
319 styler.ColourTo(lengthDoc, state);
320 }
321 }
322
323 static bool IsCommentLine(int line, Accessor &styler) {
324 int pos = styler.LineStart(line);
325 int eol_pos = styler.LineStart(line+1) - 1;
326 for (int i = pos; i < eol_pos; i++) {
327 char ch = styler[i];
328 if (ch == '#')
329 return true;
330 else if (ch != ' ' && ch != '\t')
331 return false;
332 }
333 return false;
334 }
335
336 static bool IsQuoteLine(int line, Accessor &styler) {
337 int style = styler.StyleAt(styler.LineStart(line)) & 31;
338 return ((style == SCE_P_TRIPLE) || (style== SCE_P_TRIPLEDOUBLE));
339 }
340
341 static void FoldPyDoc(unsigned int startPos, int length, int /*initStyle - unused*/,
342 WordList *[], Accessor &styler) {
343 int maxPos = startPos + length;
344 int maxLines = styler.GetLine(maxPos-1);
345
346 bool foldComment = styler.GetPropertyInt("fold.comment.python");
347 bool foldQuotes = styler.GetPropertyInt("fold.quotes.python");
348
349 // Backtrack to previous non-blank line so we can determine indent level
350 // for any white space lines (needed esp. within triple quoted strings)
351 // and so we can fix any preceding fold level (which is why we go back
352 // at least one line in all cases)
353 int spaceFlags = 0;
354 int lineCurrent = styler.GetLine(startPos);
355 int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
356 while (lineCurrent > 0) {
357 lineCurrent--;
358 indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
359 if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG) &&
360 (!IsCommentLine(lineCurrent, styler)) &&
361 (!IsQuoteLine(lineCurrent, styler)))
362 break;
363 }
364 int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
365
366 startPos = styler.LineStart(lineCurrent);
367 // Set up initial state
368 int prev_state = SCE_P_DEFAULT & 31;
369 if (lineCurrent >= 1)
370 prev_state = styler.StyleAt(startPos-1) & 31;
371 int prevQuote = foldQuotes && ((prev_state == SCE_P_TRIPLE) || (prev_state == SCE_P_TRIPLEDOUBLE));
372 int prevComment = 0;
373 if (lineCurrent >= 1)
374 prevComment = foldComment && IsCommentLine(lineCurrent - 1, styler);
375
376 // Process all characters to end of requested range or end of any triple quote
377 // or comment that hangs over the end of the range
378 while ((lineCurrent <= maxLines) || prevQuote || prevComment) {
379
380 // Gather info
381 int lev = indentCurrent;
382 int lineNext = lineCurrent + 1;
383 int style = styler.StyleAt(styler.LineStart(lineNext)) & 31;
384 int indentNext = styler.IndentAmount(lineNext, &spaceFlags, NULL);
385 int quote = foldQuotes && ((style == SCE_P_TRIPLE) || (style== SCE_P_TRIPLEDOUBLE));
386 int quote_start = (quote && !prevQuote);
387 int quote_continue = (quote && prevQuote);
388 int comment = foldComment && IsCommentLine(lineCurrent, styler);
389 int comment_start = (comment && !prevComment &&
390 IsCommentLine(lineNext, styler) && (lev > SC_FOLDLEVELBASE));
391 int comment_continue = (comment && prevComment);
392 if ((!quote || !prevQuote) && !comment)
393 indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
394 if (quote)
395 indentNext = indentCurrentLevel;
396 if (indentNext & SC_FOLDLEVELWHITEFLAG)
397 indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel;
398
399 if (quote_start) {
400 // Place fold point at start of triple quoted string
401 lev |= SC_FOLDLEVELHEADERFLAG;
402 } else if (quote_continue || prevQuote) {
403 // Add level to rest of lines in the string
404 lev = lev + 1;
405 } else if (comment_start) {
406 // Place fold point at start of a block of comments
407 lev |= SC_FOLDLEVELHEADERFLAG;
408 } else if (comment_continue) {
409 // Add level to rest of lines in the block
410 lev = lev + 1;
411 }
412
413 // Skip past any blank lines for next indent level info; we skip also comments
414 // starting in column 0 which effectively folds them into surrounding code
415 // rather than screwing up folding. Then set indent level on the lines
416 // we skipped to be same as maximum of current and next indent. This approach
417 // does a reasonable job of collapsing white space into surrounding code
418 // without getting confused by white space at the start of an indented level.
419 while (!quote &&
420 ((indentNext & SC_FOLDLEVELWHITEFLAG) || styler[styler.LineStart(lineNext)] == '#') &&
421 (lineNext < maxLines)) {
422 int level = Platform::Maximum(indentCurrent, indentNext);
423 if (indentNext & SC_FOLDLEVELWHITEFLAG)
424 level = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel;
425 styler.SetLevel(lineNext, level);
426 lineNext++;
427 indentNext = styler.IndentAmount(lineNext, &spaceFlags, NULL);
428 }
429
430 // Set fold header on non-quote/non-comment line
431 if (!quote && !comment && !(indentCurrent & SC_FOLDLEVELWHITEFLAG) ) {
432 if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK))
433 lev |= SC_FOLDLEVELHEADERFLAG;
434 }
435
436 // Keep track of triple quote and block comment state of previous line
437 prevQuote = quote;
438 prevComment = comment_start || comment_continue;
439
440 // Set fold level for this line and move to next line
441 styler.SetLevel(lineCurrent, lev);
442 indentCurrent = indentNext;
443 lineCurrent = lineNext;
444 }
445
446 // Make sure last line indent level is set too
447 styler.SetLevel(lineCurrent, indentCurrent);
448 }
449
450 LexerModule lmPython(SCLEX_PYTHON, ColourisePyDoc, "python", FoldPyDoc);