]> git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/lexers/LexOScript.cxx
Update of OpenVMS compile support
[wxWidgets.git] / src / stc / scintilla / lexers / LexOScript.cxx
1 // Scintilla source code edit control
2 /** @file LexOScript.cxx
3 ** Lexer for OScript sources; ocx files and/or OSpace dumps.
4 ** OScript is a programming language used to develop applications for the
5 ** Livelink server platform.
6 **/
7 // Written by Ferdinand Prantl <prantlf@gmail.com>, inspired by the code from
8 // LexVB.cxx and LexPascal.cxx. The License.txt file describes the conditions
9 // under which this software may be distributed.
10
11 #include <stdlib.h>
12 #include <string.h>
13 #include <stdio.h>
14 #include <stdarg.h>
15 #include <assert.h>
16 #include <ctype.h>
17
18 #include "ILexer.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21
22 #include "WordList.h"
23 #include "LexAccessor.h"
24 #include "Accessor.h"
25 #include "StyleContext.h"
26 #include "CharacterSet.h"
27 #include "LexerModule.h"
28
29 #ifdef SCI_NAMESPACE
30 using namespace Scintilla;
31 #endif
32
33 // -----------------------------------------
34 // Functions classifying a single character.
35
36 // This function is generic and should be probably moved to CharSet.h where
37 // IsAlphaNumeric the others reside.
38 inline bool IsAlpha(int ch) {
39 return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
40 }
41
42 static inline bool IsIdentifierChar(int ch) {
43 // Identifiers cannot contain non-ASCII letters; a word with non-English
44 // language-specific characters cannot be an identifier.
45 return IsAlphaNumeric(ch) || ch == '_';
46 }
47
48 static inline bool IsIdentifierStart(int ch) {
49 // Identifiers cannot contain non-ASCII letters; a word with non-English
50 // language-specific characters cannot be an identifier.
51 return IsAlpha(ch) || ch == '_';
52 }
53
54 static inline bool IsNumberChar(int ch, int chNext) {
55 // Numeric constructs are not checked for lexical correctness. They are
56 // expected to look like +1.23-E9 but actually any bunch of the following
57 // characters will be styled as number.
58 // KNOWN PROBLEM: if you put + or - operators immediately after a number
59 // and the next operand starts with the letter E, the operator will not be
60 // recognized and it will be styled together with the preceding number.
61 // This should not occur; at least not often. The coding style recommends
62 // putting spaces around operators.
63 return IsADigit(ch) || toupper(ch) == 'E' || ch == '.' ||
64 ((ch == '-' || ch == '+') && toupper(chNext) == 'E');
65 }
66
67 // This function checks for the start or a natural number without any symbols
68 // or operators as a prefix; the IsPrefixedNumberStart should be called
69 // immediately after this one to cover all possible numeric constructs.
70 static inline bool IsNaturalNumberStart(int ch) {
71 return IsADigit(ch) != 0;
72 }
73
74 static inline bool IsPrefixedNumberStart(int ch, int chNext) {
75 // KNOWN PROBLEM: if you put + or - operators immediately before a number
76 // the operator will not be recognized and it will be styled together with
77 // the succeeding number. This should not occur; at least not often. The
78 // coding style recommends putting spaces around operators.
79 return (ch == '.' || ch == '-' || ch == '+') && IsADigit(chNext);
80 }
81
82 static inline bool IsOperator(int ch) {
83 return strchr("%^&*()-+={}[]:;<>,/?!.~|\\", ch) != NULL;
84 }
85
86 // ---------------------------------------------------------------
87 // Functions classifying a token currently processed in the lexer.
88
89 // Checks if the current line starts with the preprocessor directive used
90 // usually to introduce documentation comments: #ifdef DOC. This method is
91 // supposed to be called if the line has been recognized as a preprocessor
92 // directive already.
93 static bool IsDocCommentStart(StyleContext &sc) {
94 // Check the line back to its start only if the end looks promising.
95 if (sc.LengthCurrent() == 10 && !IsAlphaNumeric(sc.ch)) {
96 char s[11];
97 sc.GetCurrentLowered(s, sizeof(s));
98 return strcmp(s, "#ifdef doc") == 0;
99 }
100 return false;
101 }
102
103 // Checks if the current line starts with the preprocessor directive that
104 // is complementary to the #ifdef DOC start: #endif. This method is supposed
105 // to be called if the current state point to the documentation comment.
106 // QUESTIONAL ASSUMPTION: The complete #endif directive is not checked; just
107 // the starting #e. However, there is no other preprocessor directive with
108 // the same starting letter and thus this optimization should always work.
109 static bool IsDocCommentEnd(StyleContext &sc) {
110 return sc.ch == '#' && sc.chNext == 'e';
111 }
112
113 class IdentifierClassifier {
114 WordList &keywords; // Passed from keywords property.
115 WordList &constants; // Passed from keywords2 property.
116 WordList &operators; // Passed from keywords3 property.
117 WordList &types; // Passed from keywords4 property.
118 WordList &functions; // Passed from keywords5 property.
119 WordList &objects; // Passed from keywords6 property.
120
121 IdentifierClassifier(IdentifierClassifier const&);
122 IdentifierClassifier& operator=(IdentifierClassifier const&);
123
124 public:
125 IdentifierClassifier(WordList *keywordlists[]) :
126 keywords(*keywordlists[0]), constants(*keywordlists[1]),
127 operators(*keywordlists[2]), types(*keywordlists[3]),
128 functions(*keywordlists[4]), objects(*keywordlists[5])
129 {}
130
131 void ClassifyIdentifier(StyleContext &sc) {
132 // Opening parenthesis following an identifier makes it a possible
133 // function call.
134 // KNOWN PROBLEM: If some whitespace is inserted between the
135 // identifier and the parenthesis they will not be able to be
136 // recognized as a function call. This should not occur; at
137 // least not often. Such coding style would be weird.
138 if (sc.Match('(')) {
139 char s[100];
140 sc.GetCurrentLowered(s, sizeof(s));
141 // Before an opening brace can be control statements and
142 // operators too; function call is the last option.
143 if (keywords.InList(s)) {
144 sc.ChangeState(SCE_OSCRIPT_KEYWORD);
145 } else if (operators.InList(s)) {
146 sc.ChangeState(SCE_OSCRIPT_OPERATOR);
147 } else if (functions.InList(s)) {
148 sc.ChangeState(SCE_OSCRIPT_FUNCTION);
149 } else {
150 sc.ChangeState(SCE_OSCRIPT_METHOD);
151 }
152 sc.SetState(SCE_OSCRIPT_OPERATOR);
153 } else {
154 char s[100];
155 sc.GetCurrentLowered(s, sizeof(s));
156 // A dot following an identifier means an access to an object
157 // member. The related object identifier can be special.
158 // KNOWN PROBLEM: If there is whitespace between the identifier
159 // and the following dot, the identifier will not be recognized
160 // as an object in an object member access. If it is one of the
161 // listed static objects it will not be styled.
162 if (sc.Match('.') && objects.InList(s)) {
163 sc.ChangeState(SCE_OSCRIPT_OBJECT);
164 sc.SetState(SCE_OSCRIPT_OPERATOR);
165 } else {
166 if (keywords.InList(s)) {
167 sc.ChangeState(SCE_OSCRIPT_KEYWORD);
168 } else if (constants.InList(s)) {
169 sc.ChangeState(SCE_OSCRIPT_CONSTANT);
170 } else if (operators.InList(s)) {
171 sc.ChangeState(SCE_OSCRIPT_OPERATOR);
172 } else if (types.InList(s)) {
173 sc.ChangeState(SCE_OSCRIPT_TYPE);
174 } else if (functions.InList(s)) {
175 sc.ChangeState(SCE_OSCRIPT_FUNCTION);
176 }
177 sc.SetState(SCE_OSCRIPT_DEFAULT);
178 }
179 }
180 }
181 };
182
183 // ------------------------------------------------
184 // Function colourising an excerpt of OScript code.
185
186 static void ColouriseOScriptDoc(unsigned int startPos, int length,
187 int initStyle, WordList *keywordlists[],
188 Accessor &styler) {
189 // I wonder how whole-line styles ended by EOLN can escape the resetting
190 // code in the loop below and overflow to the next line. Let us make sure
191 // that a new line does not start with them carried from the previous one.
192 // NOTE: An overflowing string is intentionally not checked; it reminds
193 // the developer that the string must be ended on the same line.
194 if (initStyle == SCE_OSCRIPT_LINE_COMMENT ||
195 initStyle == SCE_OSCRIPT_PREPROCESSOR) {
196 initStyle = SCE_OSCRIPT_DEFAULT;
197 }
198
199 styler.StartAt(startPos);
200 StyleContext sc(startPos, length, initStyle, styler);
201 IdentifierClassifier identifierClassifier(keywordlists);
202
203 // It starts with true at the beginning of a line and changes to false as
204 // soon as the first non-whitespace character has been processed.
205 bool isFirstToken = true;
206 // It starts with true at the beginning of a line and changes to false as
207 // soon as the first identifier on the line is passed by.
208 bool isFirstIdentifier = true;
209 // It becomes false when #ifdef DOC (the preprocessor directive often
210 // used to start a documentation comment) is encountered and remain false
211 // until the end of the documentation block is not detected. This is done
212 // by checking for the complementary #endif preprocessor directive.
213 bool endDocComment = false;
214
215 for (; sc.More(); sc.Forward()) {
216
217 if (sc.atLineStart) {
218 isFirstToken = true;
219 isFirstIdentifier = true;
220 // Detect the current state is neither whitespace nor identifier. It
221 // means that no next identifier can be the first token on the line.
222 } else if (isFirstIdentifier && sc.state != SCE_OSCRIPT_DEFAULT &&
223 sc.state != SCE_OSCRIPT_IDENTIFIER) {
224 isFirstIdentifier = false;
225 }
226
227 // Check if the current state should be changed.
228 if (sc.state == SCE_OSCRIPT_OPERATOR) {
229 // Multiple-symbol operators are marked by single characters.
230 sc.SetState(SCE_OSCRIPT_DEFAULT);
231 } else if (sc.state == SCE_OSCRIPT_IDENTIFIER) {
232 if (!IsIdentifierChar(sc.ch)) {
233 // Colon after an identifier makes it a label if it is the
234 // first token on the line.
235 // KNOWN PROBLEM: If some whitespace is inserted between the
236 // identifier and the colon they will not be recognized as a
237 // label. This should not occur; at least not often. It would
238 // make the code structure less legible and examples in the
239 // Livelink documentation do not show it.
240 if (sc.Match(':') && isFirstIdentifier) {
241 sc.ChangeState(SCE_OSCRIPT_LABEL);
242 sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
243 } else {
244 identifierClassifier.ClassifyIdentifier(sc);
245 }
246 // Avoid a sequence of two words be mistaken for a label. A
247 // switch case would be an example.
248 isFirstIdentifier = false;
249 }
250 } else if (sc.state == SCE_OSCRIPT_GLOBAL) {
251 if (!IsIdentifierChar(sc.ch)) {
252 sc.SetState(SCE_OSCRIPT_DEFAULT);
253 }
254 } else if (sc.state == SCE_OSCRIPT_PROPERTY) {
255 if (!IsIdentifierChar(sc.ch)) {
256 // Any member access introduced by the dot operator is
257 // initially marked as a property access. If an opening
258 // parenthesis is detected later it is changed to method call.
259 // KNOWN PROBLEM: The same as at the function call recognition
260 // for SCE_OSCRIPT_IDENTIFIER above.
261 if (sc.Match('(')) {
262 sc.ChangeState(SCE_OSCRIPT_METHOD);
263 }
264 sc.SetState(SCE_OSCRIPT_DEFAULT);
265 }
266 } else if (sc.state == SCE_OSCRIPT_NUMBER) {
267 if (!IsNumberChar(sc.ch, sc.chNext)) {
268 sc.SetState(SCE_OSCRIPT_DEFAULT);
269 }
270 } else if (sc.state == SCE_OSCRIPT_SINGLEQUOTE_STRING) {
271 if (sc.ch == '\'') {
272 // Two consequential apostrophes convert to a single one.
273 if (sc.chNext == '\'') {
274 sc.Forward();
275 } else {
276 sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
277 }
278 } else if (sc.atLineEnd) {
279 sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
280 }
281 } else if (sc.state == SCE_OSCRIPT_DOUBLEQUOTE_STRING) {
282 if (sc.ch == '\"') {
283 // Two consequential quotation marks convert to a single one.
284 if (sc.chNext == '\"') {
285 sc.Forward();
286 } else {
287 sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
288 }
289 } else if (sc.atLineEnd) {
290 sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
291 }
292 } else if (sc.state == SCE_OSCRIPT_BLOCK_COMMENT) {
293 if (sc.Match('*', '/')) {
294 sc.Forward();
295 sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
296 }
297 } else if (sc.state == SCE_OSCRIPT_LINE_COMMENT) {
298 if (sc.atLineEnd) {
299 sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
300 }
301 } else if (sc.state == SCE_OSCRIPT_PREPROCESSOR) {
302 if (IsDocCommentStart(sc)) {
303 sc.ChangeState(SCE_OSCRIPT_DOC_COMMENT);
304 endDocComment = false;
305 } else if (sc.atLineEnd) {
306 sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
307 }
308 } else if (sc.state == SCE_OSCRIPT_DOC_COMMENT) {
309 // KNOWN PROBLEM: The first line detected that would close a
310 // conditional preprocessor block (#endif) the documentation
311 // comment block will end. (Nested #if-#endif blocks are not
312 // supported. Hopefully it will not occur often that a line
313 // within the text block would stat with #endif.
314 if (isFirstToken && IsDocCommentEnd(sc)) {
315 endDocComment = true;
316 } else if (sc.atLineEnd && endDocComment) {
317 sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
318 }
319 }
320
321 // Check what state starts with the current character.
322 if (sc.state == SCE_OSCRIPT_DEFAULT) {
323 if (sc.Match('\'')) {
324 sc.SetState(SCE_OSCRIPT_SINGLEQUOTE_STRING);
325 } else if (sc.Match('\"')) {
326 sc.SetState(SCE_OSCRIPT_DOUBLEQUOTE_STRING);
327 } else if (sc.Match('/', '/')) {
328 sc.SetState(SCE_OSCRIPT_LINE_COMMENT);
329 sc.Forward();
330 } else if (sc.Match('/', '*')) {
331 sc.SetState(SCE_OSCRIPT_BLOCK_COMMENT);
332 sc.Forward();
333 } else if (isFirstToken && sc.Match('#')) {
334 sc.SetState(SCE_OSCRIPT_PREPROCESSOR);
335 } else if (sc.Match('$')) {
336 // Both process-global ($xxx) and thread-global ($$xxx)
337 // variables are handled as one global.
338 sc.SetState(SCE_OSCRIPT_GLOBAL);
339 } else if (IsNaturalNumberStart(sc.ch)) {
340 sc.SetState(SCE_OSCRIPT_NUMBER);
341 } else if (IsPrefixedNumberStart(sc.ch, sc.chNext)) {
342 sc.SetState(SCE_OSCRIPT_NUMBER);
343 sc.Forward();
344 } else if (sc.Match('.') && IsIdentifierStart(sc.chNext)) {
345 // Every object member access is marked as a property access
346 // initially. The decision between property and method is made
347 // after parsing the identifier and looking what comes then.
348 // KNOWN PROBLEM: If there is whitespace between the following
349 // identifier and the dot, the dot will not be recognized
350 // as a member accessing operator. In turn, the identifier
351 // will not be recognizable as a property or a method too.
352 sc.SetState(SCE_OSCRIPT_OPERATOR);
353 sc.Forward();
354 sc.SetState(SCE_OSCRIPT_PROPERTY);
355 } else if (IsIdentifierStart(sc.ch)) {
356 sc.SetState(SCE_OSCRIPT_IDENTIFIER);
357 } else if (IsOperator(sc.ch)) {
358 sc.SetState(SCE_OSCRIPT_OPERATOR);
359 }
360 }
361
362 if (isFirstToken && !IsASpaceOrTab(sc.ch)) {
363 isFirstToken = false;
364 }
365 }
366
367 sc.Complete();
368 }
369
370 // ------------------------------------------
371 // Functions supporting OScript code folding.
372
373 static inline bool IsBlockComment(int style) {
374 return style == SCE_OSCRIPT_BLOCK_COMMENT;
375 }
376
377 static bool IsLineComment(int line, Accessor &styler) {
378 int pos = styler.LineStart(line);
379 int eolPos = styler.LineStart(line + 1) - 1;
380 for (int i = pos; i < eolPos; i++) {
381 char ch = styler[i];
382 char chNext = styler.SafeGetCharAt(i + 1);
383 int style = styler.StyleAt(i);
384 if (ch == '/' && chNext == '/' && style == SCE_OSCRIPT_LINE_COMMENT) {
385 return true;
386 } else if (!IsASpaceOrTab(ch)) {
387 return false;
388 }
389 }
390 return false;
391 }
392
393 static inline bool IsPreprocessor(int style) {
394 return style == SCE_OSCRIPT_PREPROCESSOR ||
395 style == SCE_OSCRIPT_DOC_COMMENT;
396 }
397
398 static void GetRangeLowered(unsigned int start, unsigned int end,
399 Accessor &styler, char *s, unsigned int len) {
400 unsigned int i = 0;
401 while (i < end - start + 1 && i < len - 1) {
402 s[i] = static_cast<char>(tolower(styler[start + i]));
403 i++;
404 }
405 s[i] = '\0';
406 }
407
408 static void GetForwardWordLowered(unsigned int start, Accessor &styler,
409 char *s, unsigned int len) {
410 unsigned int i = 0;
411 while (i < len - 1 && IsAlpha(styler.SafeGetCharAt(start + i))) {
412 s[i] = static_cast<char>(tolower(styler.SafeGetCharAt(start + i)));
413 i++;
414 }
415 s[i] = '\0';
416 }
417
418 static void UpdatePreprocessorFoldLevel(int &levelCurrent,
419 unsigned int startPos, Accessor &styler) {
420 char s[7]; // Size of the longest possible keyword + null.
421 GetForwardWordLowered(startPos, styler, s, sizeof(s));
422
423 if (strcmp(s, "ifdef") == 0 ||
424 strcmp(s, "ifndef") == 0) {
425 levelCurrent++;
426 } else if (strcmp(s, "endif") == 0) {
427 levelCurrent--;
428 if (levelCurrent < SC_FOLDLEVELBASE) {
429 levelCurrent = SC_FOLDLEVELBASE;
430 }
431 }
432 }
433
434 static void UpdateKeywordFoldLevel(int &levelCurrent, unsigned int lastStart,
435 unsigned int currentPos, Accessor &styler) {
436 char s[9];
437 GetRangeLowered(lastStart, currentPos, styler, s, sizeof(s));
438
439 if (strcmp(s, "if") == 0 || strcmp(s, "for") == 0 ||
440 strcmp(s, "switch") == 0 || strcmp(s, "function") == 0 ||
441 strcmp(s, "while") == 0 || strcmp(s, "repeat") == 0) {
442 levelCurrent++;
443 } else if (strcmp(s, "end") == 0 || strcmp(s, "until") == 0) {
444 levelCurrent--;
445 if (levelCurrent < SC_FOLDLEVELBASE) {
446 levelCurrent = SC_FOLDLEVELBASE;
447 }
448 }
449 }
450
451 // ------------------------------
452 // Function folding OScript code.
453
454 static void FoldOScriptDoc(unsigned int startPos, int length, int initStyle,
455 WordList *[], Accessor &styler) {
456 bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
457 bool foldPreprocessor = styler.GetPropertyInt("fold.preprocessor") != 0;
458 bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
459 int endPos = startPos + length;
460 int visibleChars = 0;
461 int lineCurrent = styler.GetLine(startPos);
462 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
463 int levelCurrent = levelPrev;
464 char chNext = styler[startPos];
465 int styleNext = styler.StyleAt(startPos);
466 int style = initStyle;
467 int lastStart = 0;
468
469 for (int i = startPos; i < endPos; i++) {
470 char ch = chNext;
471 chNext = styler.SafeGetCharAt(i + 1);
472 int stylePrev = style;
473 style = styleNext;
474 styleNext = styler.StyleAt(i + 1);
475 bool atLineEnd = (ch == '\r' && chNext != '\n') || (ch == '\n');
476
477 if (foldComment && IsBlockComment(style)) {
478 if (!IsBlockComment(stylePrev)) {
479 levelCurrent++;
480 } else if (!IsBlockComment(styleNext) && !atLineEnd) {
481 // Comments do not end at end of line and the next character
482 // may not be styled.
483 levelCurrent--;
484 }
485 }
486 if (foldComment && atLineEnd && IsLineComment(lineCurrent, styler)) {
487 if (!IsLineComment(lineCurrent - 1, styler) &&
488 IsLineComment(lineCurrent + 1, styler))
489 levelCurrent++;
490 else if (IsLineComment(lineCurrent - 1, styler) &&
491 !IsLineComment(lineCurrent+1, styler))
492 levelCurrent--;
493 }
494 if (foldPreprocessor) {
495 if (ch == '#' && IsPreprocessor(style)) {
496 UpdatePreprocessorFoldLevel(levelCurrent, i + 1, styler);
497 }
498 }
499
500 if (stylePrev != SCE_OSCRIPT_KEYWORD && style == SCE_OSCRIPT_KEYWORD) {
501 lastStart = i;
502 }
503 if (stylePrev == SCE_OSCRIPT_KEYWORD) {
504 if(IsIdentifierChar(ch) && !IsIdentifierChar(chNext)) {
505 UpdateKeywordFoldLevel(levelCurrent, lastStart, i, styler);
506 }
507 }
508
509 if (!IsASpace(ch))
510 visibleChars++;
511
512 if (atLineEnd) {
513 int level = levelPrev;
514 if (visibleChars == 0 && foldCompact)
515 level |= SC_FOLDLEVELWHITEFLAG;
516 if ((levelCurrent > levelPrev) && (visibleChars > 0))
517 level |= SC_FOLDLEVELHEADERFLAG;
518 if (level != styler.LevelAt(lineCurrent)) {
519 styler.SetLevel(lineCurrent, level);
520 }
521 lineCurrent++;
522 levelPrev = levelCurrent;
523 visibleChars = 0;
524 }
525 }
526
527 // If we did not reach EOLN in the previous loop, store the line level and
528 // whitespace information. The rest will be filled in later.
529 int lev = levelPrev;
530 if (visibleChars == 0 && foldCompact)
531 lev |= SC_FOLDLEVELWHITEFLAG;
532 styler.SetLevel(lineCurrent, lev);
533 }
534
535 // --------------------------------------------
536 // Declaration of the OScript lexer descriptor.
537
538 static const char * const oscriptWordListDesc[] = {
539 "Keywords and reserved words",
540 "Literal constants",
541 "Literal operators",
542 "Built-in value and reference types",
543 "Built-in global functions",
544 "Built-in static objects",
545 0
546 };
547
548 LexerModule lmOScript(SCLEX_OSCRIPT, ColouriseOScriptDoc, "oscript", FoldOScriptDoc, oscriptWordListDesc);