]>
git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/src/LexCaml.cxx
5f4fad5fb867ddc63e8f3e477263f9b1dce7e922
1 // Scintilla source code edit control
3 ** Lexer for Objective Caml.
5 // Copyright 2005 by Robert Roessler <robertr@rftp.com>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 20050204 Initial release.
9 20050205 Quick compiler standards/"cleanliness" adjustment.
10 20050206 Added cast for IsLeadByte().
11 20050209 Changes to "external" build support.
12 20050306 Fix for 1st-char-in-doc "corner" case.
13 20050502 Fix for [harmless] one-past-the-end coloring.
14 20050515 Refined numeric token recognition logic.
15 20051125 Added 2nd "optional" keywords class.
16 20051129 Support "magic" (read-only) comments for RCaml.
17 20051204 Swtich to using StyleContext infrastructure.
30 #include "StyleContext.h"
32 #include "Scintilla.h"
35 // Since the Microsoft __iscsym[f] funcs are not ANSI...
36 inline int iscaml(int c
) {return isalnum(c
) || c
== '_';}
37 inline int iscamlf(int c
) {return isalpha(c
) || c
== '_';}
38 inline int iscamld(int c
) {return isdigit(c
) || c
== '_';}
40 static const int baseT
[24] = {
41 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A - L */
42 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,16 /* M - X */
45 #ifdef BUILD_AS_EXTERNAL_LEXER
47 (actually seems to work!)
49 #include "WindowAccessor.h"
50 #include "ExternalLexer.h"
56 static void ColouriseCamlDoc(
57 unsigned int startPos
, int length
,
59 WordList
*keywordlists
[],
62 static void FoldCamlDoc(
63 unsigned int startPos
, int length
,
65 WordList
*keywordlists
[],
68 static void InternalLexOrFold(int lexOrFold
, unsigned int startPos
, int length
,
69 int initStyle
, char *words
[], WindowID window
, char *props
);
71 static const char* LexerName
= "caml";
74 void Platform::DebugPrintf(const char *format
, ...) {
77 va_start(pArguments
, format
);
78 vsprintf(buffer
,format
,pArguments
);
80 Platform::DebugDisplay(buffer
);
83 void Platform::DebugPrintf(const char *, ...) {
87 bool Platform::IsDBCSLeadByte(int codePage
, char ch
) {
88 return ::IsDBCSLeadByteEx(codePage
, ch
) != 0;
91 long Platform::SendScintilla(WindowID w
, unsigned int msg
, unsigned long wParam
, long lParam
) {
92 return ::SendMessage(reinterpret_cast<HWND
>(w
), msg
, wParam
, lParam
);
95 long Platform::SendScintillaPointer(WindowID w
, unsigned int msg
, unsigned long wParam
, void *lParam
) {
96 return ::SendMessage(reinterpret_cast<HWND
>(w
), msg
, wParam
,
97 reinterpret_cast<LPARAM
>(lParam
));
100 void EXT_LEXER_DECL
Fold(unsigned int lexer
, unsigned int startPos
, int length
,
101 int initStyle
, char *words
[], WindowID window
, char *props
)
103 // below useless evaluation(s) to supress "not used" warnings
105 // build expected data structures and do the Fold
106 InternalLexOrFold(1, startPos
, length
, initStyle
, words
, window
, props
);
110 int EXT_LEXER_DECL
GetLexerCount()
112 return 1; // just us [Objective] Caml lexers here!
115 void EXT_LEXER_DECL
GetLexerName(unsigned int Index
, char *name
, int buflength
)
117 // below useless evaluation(s) to supress "not used" warnings
119 // return as much of our lexer name as will fit (what's up with Index?)
122 int n
= strlen(LexerName
);
125 memcpy(name
, LexerName
, n
), name
[n
] = '\0';
129 void EXT_LEXER_DECL
Lex(unsigned int lexer
, unsigned int startPos
, int length
,
130 int initStyle
, char *words
[], WindowID window
, char *props
)
132 // below useless evaluation(s) to supress "not used" warnings
134 // build expected data structures and do the Lex
135 InternalLexOrFold(0, startPos
, length
, initStyle
, words
, window
, props
);
138 static void InternalLexOrFold(int foldOrLex
, unsigned int startPos
, int length
,
139 int initStyle
, char *words
[], WindowID window
, char *props
)
141 // create and initialize a WindowAccessor (including contained PropSet)
143 ps
.SetMultiple(props
);
144 WindowAccessor
wa(window
, ps
);
145 // create and initialize WordList(s)
147 for (; words
[nWL
]; nWL
++) ; // count # of WordList PTRs needed
148 WordList
** wl
= new WordList
* [nWL
+ 1];// alloc WordList PTRs
150 for (; i
< nWL
; i
++) {
151 wl
[i
] = new WordList(); // (works or THROWS bad_alloc EXCEPTION)
152 wl
[i
]->Set(words
[i
]);
155 // call our "internal" folder/lexer (... then do Flush!)
157 FoldCamlDoc(startPos
, length
, initStyle
, wl
, wa
);
159 ColouriseCamlDoc(startPos
, length
, initStyle
, wl
, wa
);
161 // clean up before leaving
162 for (i
= nWL
- 1; i
>= 0; i
--)
168 #endif /* BUILD_AS_EXTERNAL_LEXER */
170 void ColouriseCamlDoc(
171 unsigned int startPos
, int length
,
173 WordList
*keywordlists
[],
177 StyleContext
sc(startPos
, length
, initStyle
, styler
);
178 // set up [initial] state info (terminating states that shouldn't "bleed")
180 if (sc
.state
< SCE_CAML_STRING
)
181 sc
.state
= SCE_CAML_DEFAULT
;
182 if (sc
.state
>= SCE_CAML_COMMENT
)
183 nesting
= (sc
.state
& 0x0f) - SCE_CAML_COMMENT
;
185 int chBase
= 0, chToken
= 0, chLit
= 0;
186 WordList
& keywords
= *keywordlists
[0];
187 WordList
& keywords2
= *keywordlists
[1];
188 WordList
& keywords3
= *keywordlists
[2];
189 const int useMagic
= styler
.GetPropertyInt("lexer.caml.magic", 0);
191 // foreach char in range...
193 // set up [per-char] state info
194 int state2
= -1; // (ASSUME no state change)
195 int chColor
= sc
.currentPos
- 1;// (ASSUME standard coloring range)
196 bool advance
= true; // (ASSUME scanner "eats" 1 char)
198 // step state machine
199 switch (sc
.state
& 0x0f) {
200 case SCE_CAML_DEFAULT
:
201 chToken
= sc
.currentPos
; // save [possible] token start (JIC)
202 // it's wide open; what do we have?
204 state2
= SCE_CAML_IDENTIFIER
;
205 else if (sc
.Match('`') && iscamlf(sc
.chNext
))
206 state2
= SCE_CAML_TAGNAME
;
207 else if (sc
.Match('#') && isdigit(sc
.chNext
))
208 state2
= SCE_CAML_LINENUM
;
209 else if (isdigit(sc
.ch
)) {
210 state2
= SCE_CAML_NUMBER
, chBase
= 10;
211 if (sc
.Match('0') && strchr("bBoOxX", sc
.chNext
))
212 chBase
= baseT
[tolower(sc
.chNext
) - 'a'], sc
.Forward();
213 } else if (sc
.Match('\'')) /* (char literal?) */
214 state2
= SCE_CAML_CHAR
, chLit
= 0;
215 else if (sc
.Match('\"'))
216 state2
= SCE_CAML_STRING
;
217 else if (sc
.Match('(', '*'))
218 state2
= SCE_CAML_COMMENT
,
219 sc
.ch
= ' ', // (make SURE "(*)" isn't seen as a closed comment)
221 else if (strchr("!?~" /* Caml "prefix-symbol" */
222 "=<>@^|&+-*/$%" /* Caml "infix-symbol" */
223 "()[]{};,:.#", sc
.ch
)) /* Caml "bracket" or ;,:.# */
224 state2
= SCE_CAML_OPERATOR
;
227 case SCE_CAML_IDENTIFIER
:
228 // [try to] interpret as [additional] identifier char
229 if (!(iscaml(sc
.ch
) || sc
.Match('\''))) {
230 const int n
= sc
.currentPos
- chToken
;
232 // length is believable as keyword, [re-]construct token
234 for (int i
= -n
; i
< 0; i
++)
235 t
[n
+ i
] = static_cast<char>(sc
.GetRelative(i
));
237 // special-case "_" token as KEYWORD
238 if ((n
== 1 && sc
.chPrev
== '_') || keywords
.InList(t
))
239 sc
.ChangeState(SCE_CAML_KEYWORD
);
240 else if (keywords2
.InList(t
))
241 sc
.ChangeState(SCE_CAML_KEYWORD2
);
242 else if (keywords3
.InList(t
))
243 sc
.ChangeState(SCE_CAML_KEYWORD3
);
245 state2
= SCE_CAML_DEFAULT
, advance
= false;
249 case SCE_CAML_TAGNAME
:
250 // [try to] interpret as [additional] tagname char
251 if (!(iscaml(sc
.ch
) || sc
.Match('\'')))
252 state2
= SCE_CAML_DEFAULT
, advance
= false;
255 /*case SCE_CAML_KEYWORD:
256 case SCE_CAML_KEYWORD2:
257 case SCE_CAML_KEYWORD3:
258 // [try to] interpret as [additional] keyword char
260 state2 = SCE_CAML_DEFAULT, advance = false;
263 case SCE_CAML_LINENUM
:
264 // [try to] interpret as [additional] linenum directive char
266 state2
= SCE_CAML_DEFAULT
, advance
= false;
269 case SCE_CAML_OPERATOR
: {
270 // [try to] interpret as [additional] operator char
272 if (iscaml(sc
.ch
) || isspace(sc
.ch
) /* ident or whitespace */
273 || (o
= strchr(")]};,\'\"`#", sc
.ch
),o
)/* "termination" chars */
274 || !strchr("!$%&*+-./:<=>?@^|~", sc
.ch
)/* "operator" chars */) {
275 // check for INCLUSIVE termination
276 if (o
&& strchr(")]};,", sc
.ch
)) {
277 if ((sc
.Match(')') && sc
.chPrev
== '(')
278 || (sc
.Match(']') && sc
.chPrev
== '['))
279 // special-case "()" and "[]" tokens as KEYWORDS
280 sc
.ChangeState(SCE_CAML_KEYWORD
);
284 state2
= SCE_CAML_DEFAULT
;
289 case SCE_CAML_NUMBER
:
290 // [try to] interpret as [additional] numeric literal char
291 // N.B. - improperly accepts "extra" digits in base 2 or 8 literals
292 if (iscamld(sc
.ch
) || IsADigit(sc
.ch
, chBase
))
294 // how about an integer suffix?
295 if ((sc
.Match('l') || sc
.Match('L') || sc
.Match('n'))
296 && (iscamld(sc
.chPrev
) || IsADigit(sc
.chPrev
, chBase
)))
298 // or a floating-point literal?
300 // with a decimal point?
301 if (sc
.Match('.') && iscamld(sc
.chPrev
))
303 // with an exponent? (I)
304 if ((sc
.Match('e') || sc
.Match('E'))
305 && (iscamld(sc
.chPrev
) || sc
.chPrev
== '.'))
307 // with an exponent? (II)
308 if ((sc
.Match('+') || sc
.Match('-'))
309 && (sc
.chPrev
== 'e' || sc
.chPrev
== 'E'))
312 // it looks like we have run out of number
313 state2
= SCE_CAML_DEFAULT
, advance
= false;
317 // [try to] interpret as [additional] char literal char
318 if (sc
.Match('\\')) {
319 chLit
= 1; // (definitely IS a char literal)
320 if (sc
.chPrev
== '\\')
321 sc
.ch
= ' '; // (so termination test isn't fooled)
322 // should we be terminating - one way or another?
323 } else if ((sc
.Match('\'') && sc
.chPrev
!= '\\') || sc
.atLineEnd
) {
324 state2
= SCE_CAML_DEFAULT
;
328 sc
.ChangeState(SCE_CAML_IDENTIFIER
);
329 // ... maybe a char literal, maybe not
330 } else if (chLit
< 1 && sc
.currentPos
- chToken
>= 2)
331 sc
.ChangeState(SCE_CAML_IDENTIFIER
), advance
= false;
334 case SCE_CAML_STRING
:
335 // [try to] interpret as [additional] string literal char
336 if (sc
.Match('\\') && sc
.chPrev
== '\\')
337 sc
.ch
= ' '; // (so '\\' doesn't cause us trouble)
338 else if (sc
.Match('\"') && sc
.chPrev
!= '\\')
339 state2
= SCE_CAML_DEFAULT
, chColor
++;
342 case SCE_CAML_COMMENT
:
343 case SCE_CAML_COMMENT1
:
344 case SCE_CAML_COMMENT2
:
345 case SCE_CAML_COMMENT3
:
346 // we're IN a comment - does this start a NESTED comment?
347 if (sc
.Match('(', '*'))
348 state2
= sc
.state
+ 1, chToken
= sc
.currentPos
,
349 sc
.ch
= ' ', // (make SURE "(*)" isn't seen as a closed comment)
350 sc
.Forward(), nesting
++;
351 // [try to] interpret as [additional] comment char
352 else if (sc
.Match(')') && sc
.chPrev
== '*') {
354 state2
= (sc
.state
& 0x0f) - 1, chToken
= 0, nesting
--;
356 state2
= SCE_CAML_DEFAULT
;
358 // enable "magic" (read-only) comment AS REQUIRED
359 } else if (useMagic
&& sc
.currentPos
- chToken
== 4
360 && sc
.Match('c') && sc
.chPrev
== 'r' && sc
.GetRelative(-2) == '@')
361 sc
.state
|= 0x10; // (switch to read-only comment style)
365 // handle state change and char coloring as required
367 styler
.ColourTo(chColor
, sc
.state
), sc
.ChangeState(state2
);
368 // move to next char UNLESS re-scanning current char
373 // do any required terminal char coloring (JIC)
377 #ifdef BUILD_AS_EXTERNAL_LEXER
379 #endif /* BUILD_AS_EXTERNAL_LEXER */
381 unsigned int startPos
, int length
,
383 WordList
*keywordlists
[],
386 // below useless evaluation(s) to supress "not used" warnings
387 startPos
|| length
|| initStyle
|| keywordlists
[0] || styler
.Length();
390 static const char * const camlWordListDesc
[] = {
391 "Keywords", // primary Objective Caml keywords
392 "Keywords2", // "optional" keywords (typically from Pervasives)
393 "Keywords3", // "optional" keywords (typically typenames)
397 #ifndef BUILD_AS_EXTERNAL_LEXER
398 LexerModule
lmCaml(SCLEX_CAML
, ColouriseCamlDoc
, "caml", FoldCamlDoc
, camlWordListDesc
);
399 #endif /* BUILD_AS_EXTERNAL_LEXER */