]>
Commit | Line | Data |
---|---|---|
1e9bafca RD |
1 | // Scintilla source code edit control |
2 | /** @file LexBasic.cxx | |
3 | ** Lexer for BlitzBasic and PureBasic. | |
1dcf666d | 4 | ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net> |
1e9bafca RD |
5 | **/ |
6 | // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org> | |
7 | // The License.txt file describes the conditions under which this software may be distributed. | |
8 | ||
9 | // This tries to be a unified Lexer/Folder for all the BlitzBasic/BlitzMax/PurBasic basics | |
10 | // and derivatives. Once they diverge enough, might want to split it into multiple | |
11 | // lexers for more code clearity. | |
12 | // | |
13 | // Mail me (elias <at> users <dot> sf <dot> net) for any bugs. | |
14 | ||
15 | // Folding only works for simple things like functions or types. | |
16 | ||
17 | // You may want to have a look at my ctags lexer as well, if you additionally to coloring | |
18 | // and folding need to extract things like label tags in your editor. | |
19 | ||
20 | #include <stdlib.h> | |
21 | #include <string.h> | |
22 | #include <stdio.h> | |
1e9bafca | 23 | #include <stdarg.h> |
1dcf666d RD |
24 | #include <assert.h> |
25 | #include <ctype.h> | |
1e9bafca | 26 | |
1dcf666d RD |
27 | #include <string> |
28 | #include <map> | |
1e9bafca | 29 | |
1dcf666d | 30 | #include "ILexer.h" |
1e9bafca RD |
31 | #include "Scintilla.h" |
32 | #include "SciLexer.h" | |
33 | ||
1dcf666d RD |
34 | #include "WordList.h" |
35 | #include "LexAccessor.h" | |
36 | #include "StyleContext.h" | |
37 | #include "CharacterSet.h" | |
38 | #include "LexerModule.h" | |
39 | #include "OptionSet.h" | |
40 | ||
7e0c58e9 RD |
41 | #ifdef SCI_NAMESPACE |
42 | using namespace Scintilla; | |
43 | #endif | |
44 | ||
1e9bafca RD |
45 | /* Bits: |
46 | * 1 - whitespace | |
47 | * 2 - operator | |
48 | * 4 - identifier | |
49 | * 8 - decimal digit | |
50 | * 16 - hex digit | |
51 | * 32 - bin digit | |
52 | */ | |
53 | static int character_classification[128] = | |
54 | { | |
55 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, | |
56 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
57 | 1, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 2, | |
58 | 60, 60, 28, 28, 28, 28, 28, 28, 28, 28, 2, 2, 2, 2, 2, 2, | |
59 | 2, 20, 20, 20, 20, 20, 20, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
60 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 4, | |
61 | 2, 20, 20, 20, 20, 20, 20, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
62 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 0 | |
63 | }; | |
64 | ||
65 | static bool IsSpace(int c) { | |
66 | return c < 128 && (character_classification[c] & 1); | |
67 | } | |
68 | ||
69 | static bool IsOperator(int c) { | |
70 | return c < 128 && (character_classification[c] & 2); | |
71 | } | |
72 | ||
73 | static bool IsIdentifier(int c) { | |
74 | return c < 128 && (character_classification[c] & 4); | |
75 | } | |
76 | ||
77 | static bool IsDigit(int c) { | |
78 | return c < 128 && (character_classification[c] & 8); | |
79 | } | |
80 | ||
81 | static bool IsHexDigit(int c) { | |
82 | return c < 128 && (character_classification[c] & 16); | |
83 | } | |
84 | ||
85 | static bool IsBinDigit(int c) { | |
86 | return c < 128 && (character_classification[c] & 32); | |
87 | } | |
88 | ||
89 | static int LowerCase(int c) | |
90 | { | |
91 | if (c >= 'A' && c <= 'Z') | |
92 | return 'a' + c - 'A'; | |
93 | return c; | |
94 | } | |
95 | ||
1dcf666d RD |
96 | static int CheckBlitzFoldPoint(char const *token, int &level) { |
97 | if (!strcmp(token, "function") || | |
98 | !strcmp(token, "type")) { | |
99 | level |= SC_FOLDLEVELHEADERFLAG; | |
100 | return 1; | |
101 | } | |
102 | if (!strcmp(token, "end function") || | |
103 | !strcmp(token, "end type")) { | |
104 | return -1; | |
105 | } | |
106 | return 0; | |
107 | } | |
108 | ||
109 | static int CheckPureFoldPoint(char const *token, int &level) { | |
110 | if (!strcmp(token, "procedure") || | |
111 | !strcmp(token, "enumeration") || | |
112 | !strcmp(token, "interface") || | |
113 | !strcmp(token, "structure")) { | |
114 | level |= SC_FOLDLEVELHEADERFLAG; | |
115 | return 1; | |
116 | } | |
117 | if (!strcmp(token, "endprocedure") || | |
118 | !strcmp(token, "endenumeration") || | |
119 | !strcmp(token, "endinterface") || | |
120 | !strcmp(token, "endstructure")) { | |
121 | return -1; | |
122 | } | |
123 | return 0; | |
124 | } | |
125 | ||
126 | static int CheckFreeFoldPoint(char const *token, int &level) { | |
127 | if (!strcmp(token, "function") || | |
128 | !strcmp(token, "sub") || | |
129 | !strcmp(token, "type")) { | |
130 | level |= SC_FOLDLEVELHEADERFLAG; | |
131 | return 1; | |
132 | } | |
133 | if (!strcmp(token, "end function") || | |
134 | !strcmp(token, "end sub") || | |
135 | !strcmp(token, "end type")) { | |
136 | return -1; | |
137 | } | |
138 | return 0; | |
139 | } | |
140 | ||
141 | // An individual named option for use in an OptionSet | |
142 | ||
143 | // Options used for LexerBasic | |
144 | struct OptionsBasic { | |
145 | bool fold; | |
146 | bool foldSyntaxBased; | |
147 | bool foldCommentExplicit; | |
148 | std::string foldExplicitStart; | |
149 | std::string foldExplicitEnd; | |
150 | bool foldExplicitAnywhere; | |
151 | bool foldCompact; | |
152 | OptionsBasic() { | |
153 | fold = false; | |
154 | foldSyntaxBased = true; | |
155 | foldCommentExplicit = false; | |
156 | foldExplicitStart = ""; | |
157 | foldExplicitEnd = ""; | |
158 | foldExplicitAnywhere = false; | |
159 | foldCompact = true; | |
160 | } | |
161 | }; | |
162 | ||
163 | static const char * const blitzbasicWordListDesc[] = { | |
164 | "BlitzBasic Keywords", | |
165 | "user1", | |
166 | "user2", | |
167 | "user3", | |
168 | 0 | |
169 | }; | |
170 | ||
171 | static const char * const purebasicWordListDesc[] = { | |
172 | "PureBasic Keywords", | |
173 | "PureBasic PreProcessor Keywords", | |
174 | "user defined 1", | |
175 | "user defined 2", | |
176 | 0 | |
177 | }; | |
178 | ||
179 | static const char * const freebasicWordListDesc[] = { | |
180 | "FreeBasic Keywords", | |
181 | "FreeBasic PreProcessor Keywords", | |
182 | "user defined 1", | |
183 | "user defined 2", | |
184 | 0 | |
185 | }; | |
186 | ||
187 | struct OptionSetBasic : public OptionSet<OptionsBasic> { | |
188 | OptionSetBasic(const char * const wordListDescriptions[]) { | |
189 | DefineProperty("fold", &OptionsBasic::fold); | |
190 | ||
191 | DefineProperty("fold.basic.syntax.based", &OptionsBasic::foldSyntaxBased, | |
192 | "Set this property to 0 to disable syntax based folding."); | |
193 | ||
194 | DefineProperty("fold.basic.comment.explicit", &OptionsBasic::foldCommentExplicit, | |
195 | "This option enables folding explicit fold points when using the Basic lexer. " | |
196 | "Explicit fold points allows adding extra folding by placing a ;{ (BB/PB) or '{ (FB) comment at the start " | |
197 | "and a ;} (BB/PB) or '} (FB) at the end of a section that should be folded."); | |
198 | ||
199 | DefineProperty("fold.basic.explicit.start", &OptionsBasic::foldExplicitStart, | |
200 | "The string to use for explicit fold start points, replacing the standard ;{ (BB/PB) or '{ (FB)."); | |
201 | ||
202 | DefineProperty("fold.basic.explicit.end", &OptionsBasic::foldExplicitEnd, | |
203 | "The string to use for explicit fold end points, replacing the standard ;} (BB/PB) or '} (FB)."); | |
204 | ||
205 | DefineProperty("fold.basic.explicit.anywhere", &OptionsBasic::foldExplicitAnywhere, | |
206 | "Set this property to 1 to enable explicit fold points anywhere, not just in line comments."); | |
207 | ||
208 | DefineProperty("fold.compact", &OptionsBasic::foldCompact); | |
209 | ||
210 | DefineWordListSets(wordListDescriptions); | |
211 | } | |
212 | }; | |
213 | ||
214 | class LexerBasic : public ILexer { | |
215 | char comment_char; | |
216 | int (*CheckFoldPoint)(char const *, int &); | |
217 | WordList keywordlists[4]; | |
218 | OptionsBasic options; | |
219 | OptionSetBasic osBasic; | |
220 | public: | |
221 | LexerBasic(char comment_char_, int (*CheckFoldPoint_)(char const *, int &), const char * const wordListDescriptions[]) : | |
222 | comment_char(comment_char_), | |
223 | CheckFoldPoint(CheckFoldPoint_), | |
224 | osBasic(wordListDescriptions) { | |
225 | } | |
226 | virtual ~LexerBasic() { | |
227 | } | |
228 | void SCI_METHOD Release() { | |
229 | delete this; | |
230 | } | |
231 | int SCI_METHOD Version() const { | |
232 | return lvOriginal; | |
233 | } | |
234 | const char * SCI_METHOD PropertyNames() { | |
235 | return osBasic.PropertyNames(); | |
236 | } | |
237 | int SCI_METHOD PropertyType(const char *name) { | |
238 | return osBasic.PropertyType(name); | |
239 | } | |
240 | const char * SCI_METHOD DescribeProperty(const char *name) { | |
241 | return osBasic.DescribeProperty(name); | |
242 | } | |
243 | int SCI_METHOD PropertySet(const char *key, const char *val); | |
244 | const char * SCI_METHOD DescribeWordListSets() { | |
245 | return osBasic.DescribeWordListSets(); | |
246 | } | |
247 | int SCI_METHOD WordListSet(int n, const char *wl); | |
248 | void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess); | |
249 | void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess); | |
250 | ||
251 | void * SCI_METHOD PrivateCall(int, void *) { | |
252 | return 0; | |
253 | } | |
254 | static ILexer *LexerFactoryBlitzBasic() { | |
255 | return new LexerBasic(';', CheckBlitzFoldPoint, blitzbasicWordListDesc); | |
256 | } | |
257 | static ILexer *LexerFactoryPureBasic() { | |
258 | return new LexerBasic(';', CheckPureFoldPoint, purebasicWordListDesc); | |
259 | } | |
260 | static ILexer *LexerFactoryFreeBasic() { | |
261 | return new LexerBasic('\'', CheckFreeFoldPoint, freebasicWordListDesc ); | |
262 | } | |
263 | }; | |
264 | ||
265 | int SCI_METHOD LexerBasic::PropertySet(const char *key, const char *val) { | |
266 | if (osBasic.PropertySet(&options, key, val)) { | |
267 | return 0; | |
268 | } | |
269 | return -1; | |
270 | } | |
271 | ||
272 | int SCI_METHOD LexerBasic::WordListSet(int n, const char *wl) { | |
273 | WordList *wordListN = 0; | |
274 | switch (n) { | |
275 | case 0: | |
276 | wordListN = &keywordlists[0]; | |
277 | break; | |
278 | case 1: | |
279 | wordListN = &keywordlists[1]; | |
280 | break; | |
281 | case 2: | |
282 | wordListN = &keywordlists[2]; | |
283 | break; | |
284 | case 3: | |
285 | wordListN = &keywordlists[3]; | |
286 | break; | |
287 | } | |
288 | int firstModification = -1; | |
289 | if (wordListN) { | |
290 | WordList wlNew; | |
291 | wlNew.Set(wl); | |
292 | if (*wordListN != wlNew) { | |
293 | wordListN->Set(wl); | |
294 | firstModification = 0; | |
295 | } | |
296 | } | |
297 | return firstModification; | |
298 | } | |
299 | ||
300 | void SCI_METHOD LexerBasic::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) { | |
301 | LexAccessor styler(pAccess); | |
302 | ||
1e9bafca RD |
303 | bool wasfirst = true, isfirst = true; // true if first token in a line |
304 | styler.StartAt(startPos); | |
305 | ||
306 | StyleContext sc(startPos, length, initStyle, styler); | |
307 | ||
308 | // Can't use sc.More() here else we miss the last character | |
309 | for (; ; sc.Forward()) { | |
310 | if (sc.state == SCE_B_IDENTIFIER) { | |
311 | if (!IsIdentifier(sc.ch)) { | |
312 | // Labels | |
313 | if (wasfirst && sc.Match(':')) { | |
314 | sc.ChangeState(SCE_B_LABEL); | |
315 | sc.ForwardSetState(SCE_B_DEFAULT); | |
316 | } else { | |
317 | char s[100]; | |
318 | int kstates[4] = { | |
319 | SCE_B_KEYWORD, | |
320 | SCE_B_KEYWORD2, | |
321 | SCE_B_KEYWORD3, | |
322 | SCE_B_KEYWORD4, | |
323 | }; | |
324 | sc.GetCurrentLowered(s, sizeof(s)); | |
325 | for (int i = 0; i < 4; i++) { | |
1dcf666d | 326 | if (keywordlists[i].InList(s)) { |
1e9bafca RD |
327 | sc.ChangeState(kstates[i]); |
328 | } | |
329 | } | |
330 | // Types, must set them as operator else they will be | |
331 | // matched as number/constant | |
332 | if (sc.Match('.') || sc.Match('$') || sc.Match('%') || | |
333 | sc.Match('#')) { | |
334 | sc.SetState(SCE_B_OPERATOR); | |
335 | } else { | |
336 | sc.SetState(SCE_B_DEFAULT); | |
337 | } | |
338 | } | |
339 | } | |
340 | } else if (sc.state == SCE_B_OPERATOR) { | |
341 | if (!IsOperator(sc.ch) || sc.Match('#')) | |
342 | sc.SetState(SCE_B_DEFAULT); | |
343 | } else if (sc.state == SCE_B_LABEL) { | |
344 | if (!IsIdentifier(sc.ch)) | |
345 | sc.SetState(SCE_B_DEFAULT); | |
346 | } else if (sc.state == SCE_B_CONSTANT) { | |
347 | if (!IsIdentifier(sc.ch)) | |
348 | sc.SetState(SCE_B_DEFAULT); | |
349 | } else if (sc.state == SCE_B_NUMBER) { | |
350 | if (!IsDigit(sc.ch)) | |
351 | sc.SetState(SCE_B_DEFAULT); | |
352 | } else if (sc.state == SCE_B_HEXNUMBER) { | |
353 | if (!IsHexDigit(sc.ch)) | |
354 | sc.SetState(SCE_B_DEFAULT); | |
355 | } else if (sc.state == SCE_B_BINNUMBER) { | |
356 | if (!IsBinDigit(sc.ch)) | |
357 | sc.SetState(SCE_B_DEFAULT); | |
358 | } else if (sc.state == SCE_B_STRING) { | |
359 | if (sc.ch == '"') { | |
360 | sc.ForwardSetState(SCE_B_DEFAULT); | |
361 | } | |
362 | if (sc.atLineEnd) { | |
363 | sc.ChangeState(SCE_B_ERROR); | |
364 | sc.SetState(SCE_B_DEFAULT); | |
365 | } | |
b8193d80 | 366 | } else if (sc.state == SCE_B_COMMENT || sc.state == SCE_B_PREPROCESSOR) { |
1e9bafca RD |
367 | if (sc.atLineEnd) { |
368 | sc.SetState(SCE_B_DEFAULT); | |
369 | } | |
370 | } | |
371 | ||
372 | if (sc.atLineStart) | |
373 | isfirst = true; | |
374 | ||
375 | if (sc.state == SCE_B_DEFAULT || sc.state == SCE_B_ERROR) { | |
376 | if (isfirst && sc.Match('.')) { | |
377 | sc.SetState(SCE_B_LABEL); | |
378 | } else if (isfirst && sc.Match('#')) { | |
379 | wasfirst = isfirst; | |
380 | sc.SetState(SCE_B_IDENTIFIER); | |
381 | } else if (sc.Match(comment_char)) { | |
b8193d80 RD |
382 | // Hack to make deprecated QBASIC '$Include show |
383 | // up in freebasic with SCE_B_PREPROCESSOR. | |
384 | if (comment_char == '\'' && sc.Match(comment_char, '$')) | |
385 | sc.SetState(SCE_B_PREPROCESSOR); | |
386 | else | |
387 | sc.SetState(SCE_B_COMMENT); | |
1e9bafca RD |
388 | } else if (sc.Match('"')) { |
389 | sc.SetState(SCE_B_STRING); | |
390 | } else if (IsDigit(sc.ch)) { | |
391 | sc.SetState(SCE_B_NUMBER); | |
392 | } else if (sc.Match('$')) { | |
393 | sc.SetState(SCE_B_HEXNUMBER); | |
394 | } else if (sc.Match('%')) { | |
395 | sc.SetState(SCE_B_BINNUMBER); | |
396 | } else if (sc.Match('#')) { | |
397 | sc.SetState(SCE_B_CONSTANT); | |
398 | } else if (IsOperator(sc.ch)) { | |
399 | sc.SetState(SCE_B_OPERATOR); | |
400 | } else if (IsIdentifier(sc.ch)) { | |
401 | wasfirst = isfirst; | |
402 | sc.SetState(SCE_B_IDENTIFIER); | |
403 | } else if (!IsSpace(sc.ch)) { | |
404 | sc.SetState(SCE_B_ERROR); | |
405 | } | |
406 | } | |
407 | ||
408 | if (!IsSpace(sc.ch)) | |
409 | isfirst = false; | |
410 | ||
411 | if (!sc.More()) | |
412 | break; | |
413 | } | |
414 | sc.Complete(); | |
415 | } | |
416 | ||
1e9bafca | 417 | |
1dcf666d | 418 | void SCI_METHOD LexerBasic::Fold(unsigned int startPos, int length, int /* initStyle */, IDocument *pAccess) { |
1e9bafca | 419 | |
1dcf666d RD |
420 | if (!options.fold) |
421 | return; | |
422 | ||
423 | LexAccessor styler(pAccess); | |
1e9bafca | 424 | |
1e9bafca RD |
425 | int line = styler.GetLine(startPos); |
426 | int level = styler.LevelAt(line); | |
427 | int go = 0, done = 0; | |
428 | int endPos = startPos + length; | |
429 | char word[256]; | |
430 | int wordlen = 0; | |
1dcf666d RD |
431 | const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty(); |
432 | int cNext = styler[startPos]; | |
433 | ||
1e9bafca RD |
434 | // Scan for tokens at the start of the line (they may include |
435 | // whitespace, for tokens like "End Function" | |
1dcf666d RD |
436 | for (int i = startPos; i < endPos; i++) { |
437 | int c = cNext; | |
438 | cNext = styler.SafeGetCharAt(i + 1); | |
439 | bool atEOL = (c == '\r' && cNext != '\n') || (c == '\n'); | |
440 | if (options.foldSyntaxBased && !done && !go) { | |
1e9bafca RD |
441 | if (wordlen) { // are we scanning a token already? |
442 | word[wordlen] = static_cast<char>(LowerCase(c)); | |
443 | if (!IsIdentifier(c)) { // done with token | |
444 | word[wordlen] = '\0'; | |
445 | go = CheckFoldPoint(word, level); | |
446 | if (!go) { | |
447 | // Treat any whitespace as single blank, for | |
448 | // things like "End Function". | |
449 | if (IsSpace(c) && IsIdentifier(word[wordlen - 1])) { | |
450 | word[wordlen] = ' '; | |
451 | if (wordlen < 255) | |
452 | wordlen++; | |
453 | } | |
454 | else // done with this line | |
455 | done = 1; | |
456 | } | |
457 | } else if (wordlen < 255) { | |
458 | wordlen++; | |
459 | } | |
460 | } else { // start scanning at first non-whitespace character | |
461 | if (!IsSpace(c)) { | |
462 | if (IsIdentifier(c)) { | |
463 | word[0] = static_cast<char>(LowerCase(c)); | |
464 | wordlen = 1; | |
465 | } else // done with this line | |
466 | done = 1; | |
467 | } | |
468 | } | |
469 | } | |
1dcf666d RD |
470 | if (options.foldCommentExplicit && ((styler.StyleAt(i) == SCE_B_COMMENT) || options.foldExplicitAnywhere)) { |
471 | if (userDefinedFoldMarkers) { | |
472 | if (styler.Match(i, options.foldExplicitStart.c_str())) { | |
473 | level |= SC_FOLDLEVELHEADERFLAG; | |
474 | go = 1; | |
475 | } else if (styler.Match(i, options.foldExplicitEnd.c_str())) { | |
476 | go = -1; | |
477 | } | |
478 | } else { | |
479 | if (c == comment_char) { | |
480 | if (cNext == '{') { | |
481 | level |= SC_FOLDLEVELHEADERFLAG; | |
482 | go = 1; | |
483 | } else if (cNext == '}') { | |
484 | go = -1; | |
485 | } | |
486 | } | |
487 | } | |
488 | } | |
489 | if (atEOL) { // line end | |
490 | if (!done && wordlen == 0 && options.foldCompact) // line was only space | |
1e9bafca RD |
491 | level |= SC_FOLDLEVELWHITEFLAG; |
492 | if (level != styler.LevelAt(line)) | |
493 | styler.SetLevel(line, level); | |
494 | level += go; | |
495 | line++; | |
496 | // reset state | |
497 | wordlen = 0; | |
498 | level &= ~SC_FOLDLEVELHEADERFLAG; | |
499 | level &= ~SC_FOLDLEVELWHITEFLAG; | |
500 | go = 0; | |
501 | done = 0; | |
502 | } | |
503 | } | |
504 | } | |
505 | ||
1dcf666d | 506 | LexerModule lmBlitzBasic(SCLEX_BLITZBASIC, LexerBasic::LexerFactoryBlitzBasic, "blitzbasic", blitzbasicWordListDesc); |
1e9bafca | 507 | |
1dcf666d | 508 | LexerModule lmPureBasic(SCLEX_PUREBASIC, LexerBasic::LexerFactoryPureBasic, "purebasic", purebasicWordListDesc); |
1e9bafca | 509 | |
1dcf666d | 510 | LexerModule lmFreeBasic(SCLEX_FREEBASIC, LexerBasic::LexerFactoryFreeBasic, "freebasic", freebasicWordListDesc); |