1 // Scintilla source code edit control
2 /** @file LexOScript.cxx
3 ** Lexer for OScript sources; ocx files and/or OSpace dumps.
4 ** OScript is a programming language used to develop applications for the
5 ** Livelink server platform.
7 // Written by Ferdinand Prantl <prantlf@gmail.com>, inspired by the code from
8 // LexVB.cxx and LexPascal.cxx. The License.txt file describes the conditions
9 // under which this software may be distributed.
19 #include "Scintilla.h"
23 #include "LexAccessor.h"
25 #include "StyleContext.h"
26 #include "CharacterSet.h"
27 #include "LexerModule.h"
30 using namespace Scintilla
;
33 // -----------------------------------------
34 // Functions classifying a single character.
36 // This function is generic and should be probably moved to CharSet.h where
37 // IsAlphaNumeric the others reside.
38 inline bool IsAlpha(int ch
) {
39 return (ch
>= 'a' && ch
<= 'z') || (ch
>= 'A' && ch
<= 'Z');
42 static inline bool IsIdentifierChar(int ch
) {
43 // Identifiers cannot contain non-ASCII letters; a word with non-English
44 // language-specific characters cannot be an identifier.
45 return IsAlphaNumeric(ch
) || ch
== '_';
48 static inline bool IsIdentifierStart(int ch
) {
49 // Identifiers cannot contain non-ASCII letters; a word with non-English
50 // language-specific characters cannot be an identifier.
51 return IsAlpha(ch
) || ch
== '_';
54 static inline bool IsNumberChar(int ch
, int chNext
) {
55 // Numeric constructs are not checked for lexical correctness. They are
56 // expected to look like +1.23-E9 but actually any bunch of the following
57 // characters will be styled as number.
58 // KNOWN PROBLEM: if you put + or - operators immediately after a number
59 // and the next operand starts with the letter E, the operator will not be
60 // recognized and it will be styled together with the preceding number.
61 // This should not occur; at least not often. The coding style recommends
62 // putting spaces around operators.
63 return IsADigit(ch
) || toupper(ch
) == 'E' || ch
== '.' ||
64 ((ch
== '-' || ch
== '+') && toupper(chNext
) == 'E');
67 // This function checks for the start or a natural number without any symbols
68 // or operators as a prefix; the IsPrefixedNumberStart should be called
69 // immediately after this one to cover all possible numeric constructs.
70 static inline bool IsNaturalNumberStart(int ch
) {
71 return IsADigit(ch
) != 0;
74 static inline bool IsPrefixedNumberStart(int ch
, int chNext
) {
75 // KNOWN PROBLEM: if you put + or - operators immediately before a number
76 // the operator will not be recognized and it will be styled together with
77 // the succeeding number. This should not occur; at least not often. The
78 // coding style recommends putting spaces around operators.
79 return (ch
== '.' || ch
== '-' || ch
== '+') && IsADigit(chNext
);
82 static inline bool IsOperator(int ch
) {
83 return strchr("%^&*()-+={}[]:;<>,/?!.~|\\", ch
) != NULL
;
86 // ---------------------------------------------------------------
87 // Functions classifying a token currently processed in the lexer.
89 // Checks if the current line starts with the preprocessor directive used
90 // usually to introduce documentation comments: #ifdef DOC. This method is
91 // supposed to be called if the line has been recognized as a preprocessor
93 static bool IsDocCommentStart(StyleContext
&sc
) {
94 // Check the line back to its start only if the end looks promising.
95 if (sc
.LengthCurrent() == 10 && !IsAlphaNumeric(sc
.ch
)) {
97 sc
.GetCurrentLowered(s
, sizeof(s
));
98 return strcmp(s
, "#ifdef doc") == 0;
103 // Checks if the current line starts with the preprocessor directive that
104 // is complementary to the #ifdef DOC start: #endif. This method is supposed
105 // to be called if the current state point to the documentation comment.
106 // QUESTIONAL ASSUMPTION: The complete #endif directive is not checked; just
107 // the starting #e. However, there is no other preprocessor directive with
108 // the same starting letter and thus this optimization should always work.
109 static bool IsDocCommentEnd(StyleContext
&sc
) {
110 return sc
.ch
== '#' && sc
.chNext
== 'e';
113 class IdentifierClassifier
{
114 WordList
&keywords
; // Passed from keywords property.
115 WordList
&constants
; // Passed from keywords2 property.
116 WordList
&operators
; // Passed from keywords3 property.
117 WordList
&types
; // Passed from keywords4 property.
118 WordList
&functions
; // Passed from keywords5 property.
119 WordList
&objects
; // Passed from keywords6 property.
121 IdentifierClassifier(IdentifierClassifier
const&);
122 IdentifierClassifier
& operator=(IdentifierClassifier
const&);
125 IdentifierClassifier(WordList
*keywordlists
[]) :
126 keywords(*keywordlists
[0]), constants(*keywordlists
[1]),
127 operators(*keywordlists
[2]), types(*keywordlists
[3]),
128 functions(*keywordlists
[4]), objects(*keywordlists
[5])
131 void ClassifyIdentifier(StyleContext
&sc
) {
132 // Opening parenthesis following an identifier makes it a possible
134 // KNOWN PROBLEM: If some whitespace is inserted between the
135 // identifier and the parenthesis they will not be able to be
136 // recognized as a function call. This should not occur; at
137 // least not often. Such coding style would be weird.
140 sc
.GetCurrentLowered(s
, sizeof(s
));
141 // Before an opening brace can be control statements and
142 // operators too; function call is the last option.
143 if (keywords
.InList(s
)) {
144 sc
.ChangeState(SCE_OSCRIPT_KEYWORD
);
145 } else if (operators
.InList(s
)) {
146 sc
.ChangeState(SCE_OSCRIPT_OPERATOR
);
147 } else if (functions
.InList(s
)) {
148 sc
.ChangeState(SCE_OSCRIPT_FUNCTION
);
150 sc
.ChangeState(SCE_OSCRIPT_METHOD
);
152 sc
.SetState(SCE_OSCRIPT_OPERATOR
);
155 sc
.GetCurrentLowered(s
, sizeof(s
));
156 // A dot following an identifier means an access to an object
157 // member. The related object identifier can be special.
158 // KNOWN PROBLEM: If there is whitespace between the identifier
159 // and the following dot, the identifier will not be recognized
160 // as an object in an object member access. If it is one of the
161 // listed static objects it will not be styled.
162 if (sc
.Match('.') && objects
.InList(s
)) {
163 sc
.ChangeState(SCE_OSCRIPT_OBJECT
);
164 sc
.SetState(SCE_OSCRIPT_OPERATOR
);
166 if (keywords
.InList(s
)) {
167 sc
.ChangeState(SCE_OSCRIPT_KEYWORD
);
168 } else if (constants
.InList(s
)) {
169 sc
.ChangeState(SCE_OSCRIPT_CONSTANT
);
170 } else if (operators
.InList(s
)) {
171 sc
.ChangeState(SCE_OSCRIPT_OPERATOR
);
172 } else if (types
.InList(s
)) {
173 sc
.ChangeState(SCE_OSCRIPT_TYPE
);
174 } else if (functions
.InList(s
)) {
175 sc
.ChangeState(SCE_OSCRIPT_FUNCTION
);
177 sc
.SetState(SCE_OSCRIPT_DEFAULT
);
183 // ------------------------------------------------
184 // Function colourising an excerpt of OScript code.
186 static void ColouriseOScriptDoc(unsigned int startPos
, int length
,
187 int initStyle
, WordList
*keywordlists
[],
189 // I wonder how whole-line styles ended by EOLN can escape the resetting
190 // code in the loop below and overflow to the next line. Let us make sure
191 // that a new line does not start with them carried from the previous one.
192 // NOTE: An overflowing string is intentionally not checked; it reminds
193 // the developer that the string must be ended on the same line.
194 if (initStyle
== SCE_OSCRIPT_LINE_COMMENT
||
195 initStyle
== SCE_OSCRIPT_PREPROCESSOR
) {
196 initStyle
= SCE_OSCRIPT_DEFAULT
;
199 styler
.StartAt(startPos
);
200 StyleContext
sc(startPos
, length
, initStyle
, styler
);
201 IdentifierClassifier
identifierClassifier(keywordlists
);
203 // It starts with true at the beginning of a line and changes to false as
204 // soon as the first non-whitespace character has been processed.
205 bool isFirstToken
= true;
206 // It starts with true at the beginning of a line and changes to false as
207 // soon as the first identifier on the line is passed by.
208 bool isFirstIdentifier
= true;
209 // It becomes false when #ifdef DOC (the preprocessor directive often
210 // used to start a documentation comment) is encountered and remain false
211 // until the end of the documentation block is not detected. This is done
212 // by checking for the complementary #endif preprocessor directive.
213 bool endDocComment
= false;
215 for (; sc
.More(); sc
.Forward()) {
217 if (sc
.atLineStart
) {
219 isFirstIdentifier
= true;
220 // Detect the current state is neither whitespace nor identifier. It
221 // means that no next identifier can be the first token on the line.
222 } else if (isFirstIdentifier
&& sc
.state
!= SCE_OSCRIPT_DEFAULT
&&
223 sc
.state
!= SCE_OSCRIPT_IDENTIFIER
) {
224 isFirstIdentifier
= false;
227 // Check if the current state should be changed.
228 if (sc
.state
== SCE_OSCRIPT_OPERATOR
) {
229 // Multiple-symbol operators are marked by single characters.
230 sc
.SetState(SCE_OSCRIPT_DEFAULT
);
231 } else if (sc
.state
== SCE_OSCRIPT_IDENTIFIER
) {
232 if (!IsIdentifierChar(sc
.ch
)) {
233 // Colon after an identifier makes it a label if it is the
234 // first token on the line.
235 // KNOWN PROBLEM: If some whitespace is inserted between the
236 // identifier and the colon they will not be recognized as a
237 // label. This should not occur; at least not often. It would
238 // make the code structure less legible and examples in the
239 // Livelink documentation do not show it.
240 if (sc
.Match(':') && isFirstIdentifier
) {
241 sc
.ChangeState(SCE_OSCRIPT_LABEL
);
242 sc
.ForwardSetState(SCE_OSCRIPT_DEFAULT
);
244 identifierClassifier
.ClassifyIdentifier(sc
);
246 // Avoid a sequence of two words be mistaken for a label. A
247 // switch case would be an example.
248 isFirstIdentifier
= false;
250 } else if (sc
.state
== SCE_OSCRIPT_GLOBAL
) {
251 if (!IsIdentifierChar(sc
.ch
)) {
252 sc
.SetState(SCE_OSCRIPT_DEFAULT
);
254 } else if (sc
.state
== SCE_OSCRIPT_PROPERTY
) {
255 if (!IsIdentifierChar(sc
.ch
)) {
256 // Any member access introduced by the dot operator is
257 // initially marked as a property access. If an opening
258 // parenthesis is detected later it is changed to method call.
259 // KNOWN PROBLEM: The same as at the function call recognition
260 // for SCE_OSCRIPT_IDENTIFIER above.
262 sc
.ChangeState(SCE_OSCRIPT_METHOD
);
264 sc
.SetState(SCE_OSCRIPT_DEFAULT
);
266 } else if (sc
.state
== SCE_OSCRIPT_NUMBER
) {
267 if (!IsNumberChar(sc
.ch
, sc
.chNext
)) {
268 sc
.SetState(SCE_OSCRIPT_DEFAULT
);
270 } else if (sc
.state
== SCE_OSCRIPT_SINGLEQUOTE_STRING
) {
272 // Two consequential apostrophes convert to a single one.
273 if (sc
.chNext
== '\'') {
276 sc
.ForwardSetState(SCE_OSCRIPT_DEFAULT
);
278 } else if (sc
.atLineEnd
) {
279 sc
.ForwardSetState(SCE_OSCRIPT_DEFAULT
);
281 } else if (sc
.state
== SCE_OSCRIPT_DOUBLEQUOTE_STRING
) {
283 // Two consequential quotation marks convert to a single one.
284 if (sc
.chNext
== '\"') {
287 sc
.ForwardSetState(SCE_OSCRIPT_DEFAULT
);
289 } else if (sc
.atLineEnd
) {
290 sc
.ForwardSetState(SCE_OSCRIPT_DEFAULT
);
292 } else if (sc
.state
== SCE_OSCRIPT_BLOCK_COMMENT
) {
293 if (sc
.Match('*', '/')) {
295 sc
.ForwardSetState(SCE_OSCRIPT_DEFAULT
);
297 } else if (sc
.state
== SCE_OSCRIPT_LINE_COMMENT
) {
299 sc
.ForwardSetState(SCE_OSCRIPT_DEFAULT
);
301 } else if (sc
.state
== SCE_OSCRIPT_PREPROCESSOR
) {
302 if (IsDocCommentStart(sc
)) {
303 sc
.ChangeState(SCE_OSCRIPT_DOC_COMMENT
);
304 endDocComment
= false;
305 } else if (sc
.atLineEnd
) {
306 sc
.ForwardSetState(SCE_OSCRIPT_DEFAULT
);
308 } else if (sc
.state
== SCE_OSCRIPT_DOC_COMMENT
) {
309 // KNOWN PROBLEM: The first line detected that would close a
310 // conditional preprocessor block (#endif) the documentation
311 // comment block will end. (Nested #if-#endif blocks are not
312 // supported. Hopefully it will not occur often that a line
313 // within the text block would stat with #endif.
314 if (isFirstToken
&& IsDocCommentEnd(sc
)) {
315 endDocComment
= true;
316 } else if (sc
.atLineEnd
&& endDocComment
) {
317 sc
.ForwardSetState(SCE_OSCRIPT_DEFAULT
);
321 // Check what state starts with the current character.
322 if (sc
.state
== SCE_OSCRIPT_DEFAULT
) {
323 if (sc
.Match('\'')) {
324 sc
.SetState(SCE_OSCRIPT_SINGLEQUOTE_STRING
);
325 } else if (sc
.Match('\"')) {
326 sc
.SetState(SCE_OSCRIPT_DOUBLEQUOTE_STRING
);
327 } else if (sc
.Match('/', '/')) {
328 sc
.SetState(SCE_OSCRIPT_LINE_COMMENT
);
330 } else if (sc
.Match('/', '*')) {
331 sc
.SetState(SCE_OSCRIPT_BLOCK_COMMENT
);
333 } else if (isFirstToken
&& sc
.Match('#')) {
334 sc
.SetState(SCE_OSCRIPT_PREPROCESSOR
);
335 } else if (sc
.Match('$')) {
336 // Both process-global ($xxx) and thread-global ($$xxx)
337 // variables are handled as one global.
338 sc
.SetState(SCE_OSCRIPT_GLOBAL
);
339 } else if (IsNaturalNumberStart(sc
.ch
)) {
340 sc
.SetState(SCE_OSCRIPT_NUMBER
);
341 } else if (IsPrefixedNumberStart(sc
.ch
, sc
.chNext
)) {
342 sc
.SetState(SCE_OSCRIPT_NUMBER
);
344 } else if (sc
.Match('.') && IsIdentifierStart(sc
.chNext
)) {
345 // Every object member access is marked as a property access
346 // initially. The decision between property and method is made
347 // after parsing the identifier and looking what comes then.
348 // KNOWN PROBLEM: If there is whitespace between the following
349 // identifier and the dot, the dot will not be recognized
350 // as a member accessing operator. In turn, the identifier
351 // will not be recognizable as a property or a method too.
352 sc
.SetState(SCE_OSCRIPT_OPERATOR
);
354 sc
.SetState(SCE_OSCRIPT_PROPERTY
);
355 } else if (IsIdentifierStart(sc
.ch
)) {
356 sc
.SetState(SCE_OSCRIPT_IDENTIFIER
);
357 } else if (IsOperator(sc
.ch
)) {
358 sc
.SetState(SCE_OSCRIPT_OPERATOR
);
362 if (isFirstToken
&& !IsASpaceOrTab(sc
.ch
)) {
363 isFirstToken
= false;
370 // ------------------------------------------
371 // Functions supporting OScript code folding.
373 static inline bool IsBlockComment(int style
) {
374 return style
== SCE_OSCRIPT_BLOCK_COMMENT
;
377 static bool IsLineComment(int line
, Accessor
&styler
) {
378 int pos
= styler
.LineStart(line
);
379 int eolPos
= styler
.LineStart(line
+ 1) - 1;
380 for (int i
= pos
; i
< eolPos
; i
++) {
382 char chNext
= styler
.SafeGetCharAt(i
+ 1);
383 int style
= styler
.StyleAt(i
);
384 if (ch
== '/' && chNext
== '/' && style
== SCE_OSCRIPT_LINE_COMMENT
) {
386 } else if (!IsASpaceOrTab(ch
)) {
393 static inline bool IsPreprocessor(int style
) {
394 return style
== SCE_OSCRIPT_PREPROCESSOR
||
395 style
== SCE_OSCRIPT_DOC_COMMENT
;
398 static void GetRangeLowered(unsigned int start
, unsigned int end
,
399 Accessor
&styler
, char *s
, unsigned int len
) {
401 while (i
< end
- start
+ 1 && i
< len
- 1) {
402 s
[i
] = static_cast<char>(tolower(styler
[start
+ i
]));
408 static void GetForwardWordLowered(unsigned int start
, Accessor
&styler
,
409 char *s
, unsigned int len
) {
411 while (i
< len
- 1 && IsAlpha(styler
.SafeGetCharAt(start
+ i
))) {
412 s
[i
] = static_cast<char>(tolower(styler
.SafeGetCharAt(start
+ i
)));
418 static void UpdatePreprocessorFoldLevel(int &levelCurrent
,
419 unsigned int startPos
, Accessor
&styler
) {
420 char s
[7]; // Size of the longest possible keyword + null.
421 GetForwardWordLowered(startPos
, styler
, s
, sizeof(s
));
423 if (strcmp(s
, "ifdef") == 0 ||
424 strcmp(s
, "ifndef") == 0) {
426 } else if (strcmp(s
, "endif") == 0) {
428 if (levelCurrent
< SC_FOLDLEVELBASE
) {
429 levelCurrent
= SC_FOLDLEVELBASE
;
434 static void UpdateKeywordFoldLevel(int &levelCurrent
, unsigned int lastStart
,
435 unsigned int currentPos
, Accessor
&styler
) {
437 GetRangeLowered(lastStart
, currentPos
, styler
, s
, sizeof(s
));
439 if (strcmp(s
, "if") == 0 || strcmp(s
, "for") == 0 ||
440 strcmp(s
, "switch") == 0 || strcmp(s
, "function") == 0 ||
441 strcmp(s
, "while") == 0 || strcmp(s
, "repeat") == 0) {
443 } else if (strcmp(s
, "end") == 0 || strcmp(s
, "until") == 0) {
445 if (levelCurrent
< SC_FOLDLEVELBASE
) {
446 levelCurrent
= SC_FOLDLEVELBASE
;
451 // ------------------------------
452 // Function folding OScript code.
454 static void FoldOScriptDoc(unsigned int startPos
, int length
, int initStyle
,
455 WordList
*[], Accessor
&styler
) {
456 bool foldComment
= styler
.GetPropertyInt("fold.comment") != 0;
457 bool foldPreprocessor
= styler
.GetPropertyInt("fold.preprocessor") != 0;
458 bool foldCompact
= styler
.GetPropertyInt("fold.compact", 1) != 0;
459 int endPos
= startPos
+ length
;
460 int visibleChars
= 0;
461 int lineCurrent
= styler
.GetLine(startPos
);
462 int levelPrev
= styler
.LevelAt(lineCurrent
) & SC_FOLDLEVELNUMBERMASK
;
463 int levelCurrent
= levelPrev
;
464 char chNext
= styler
[startPos
];
465 int styleNext
= styler
.StyleAt(startPos
);
466 int style
= initStyle
;
469 for (int i
= startPos
; i
< endPos
; i
++) {
471 chNext
= styler
.SafeGetCharAt(i
+ 1);
472 int stylePrev
= style
;
474 styleNext
= styler
.StyleAt(i
+ 1);
475 bool atLineEnd
= (ch
== '\r' && chNext
!= '\n') || (ch
== '\n');
477 if (foldComment
&& IsBlockComment(style
)) {
478 if (!IsBlockComment(stylePrev
)) {
480 } else if (!IsBlockComment(styleNext
) && !atLineEnd
) {
481 // Comments do not end at end of line and the next character
482 // may not be styled.
486 if (foldComment
&& atLineEnd
&& IsLineComment(lineCurrent
, styler
)) {
487 if (!IsLineComment(lineCurrent
- 1, styler
) &&
488 IsLineComment(lineCurrent
+ 1, styler
))
490 else if (IsLineComment(lineCurrent
- 1, styler
) &&
491 !IsLineComment(lineCurrent
+1, styler
))
494 if (foldPreprocessor
) {
495 if (ch
== '#' && IsPreprocessor(style
)) {
496 UpdatePreprocessorFoldLevel(levelCurrent
, i
+ 1, styler
);
500 if (stylePrev
!= SCE_OSCRIPT_KEYWORD
&& style
== SCE_OSCRIPT_KEYWORD
) {
503 if (stylePrev
== SCE_OSCRIPT_KEYWORD
) {
504 if(IsIdentifierChar(ch
) && !IsIdentifierChar(chNext
)) {
505 UpdateKeywordFoldLevel(levelCurrent
, lastStart
, i
, styler
);
513 int level
= levelPrev
;
514 if (visibleChars
== 0 && foldCompact
)
515 level
|= SC_FOLDLEVELWHITEFLAG
;
516 if ((levelCurrent
> levelPrev
) && (visibleChars
> 0))
517 level
|= SC_FOLDLEVELHEADERFLAG
;
518 if (level
!= styler
.LevelAt(lineCurrent
)) {
519 styler
.SetLevel(lineCurrent
, level
);
522 levelPrev
= levelCurrent
;
527 // If we did not reach EOLN in the previous loop, store the line level and
528 // whitespace information. The rest will be filled in later.
530 if (visibleChars
== 0 && foldCompact
)
531 lev
|= SC_FOLDLEVELWHITEFLAG
;
532 styler
.SetLevel(lineCurrent
, lev
);
535 // --------------------------------------------
536 // Declaration of the OScript lexer descriptor.
538 static const char * const oscriptWordListDesc
[] = {
539 "Keywords and reserved words",
542 "Built-in value and reference types",
543 "Built-in global functions",
544 "Built-in static objects",
548 LexerModule
lmOScript(SCLEX_OSCRIPT
, ColouriseOScriptDoc
, "oscript", FoldOScriptDoc
, oscriptWordListDesc
);