]>
git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/lexers/LexCaml.cxx
1 // Scintilla source code edit control
3 ** Lexer for Objective Caml.
5 // Copyright 2005-2009 by Robert Roessler <robertr@rftp.com>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 20050204 Initial release.
9 20050205 Quick compiler standards/"cleanliness" adjustment.
10 20050206 Added cast for IsLeadByte().
11 20050209 Changes to "external" build support.
12 20050306 Fix for 1st-char-in-doc "corner" case.
13 20050502 Fix for [harmless] one-past-the-end coloring.
14 20050515 Refined numeric token recognition logic.
15 20051125 Added 2nd "optional" keywords class.
16 20051129 Support "magic" (read-only) comments for RCaml.
17 20051204 Swtich to using StyleContext infrastructure.
18 20090629 Add full Standard ML '97 support.
29 #include "Scintilla.h"
32 #include "PropSetSimple.h"
34 #include "LexAccessor.h"
36 #include "StyleContext.h"
37 #include "CharacterSet.h"
38 #include "LexerModule.h"
40 // Since the Microsoft __iscsym[f] funcs are not ANSI...
41 inline int iscaml(int c
) {return isalnum(c
) || c
== '_';}
42 inline int iscamlf(int c
) {return isalpha(c
) || c
== '_';}
44 static const int baseT
[24] = {
45 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A - L */
46 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,16 /* M - X */
50 using namespace Scintilla
;
53 #ifdef BUILD_AS_EXTERNAL_LEXER
55 (actually seems to work!)
58 #include "WindowAccessor.h"
59 #include "ExternalLexer.h"
62 #define EXT_LEXER_DECL __declspec( dllexport ) __stdcall
68 static void ColouriseCamlDoc(
69 unsigned int startPos
, int length
,
71 WordList
*keywordlists
[],
74 static void FoldCamlDoc(
75 unsigned int startPos
, int length
,
77 WordList
*keywordlists
[],
80 static void InternalLexOrFold(int lexOrFold
, unsigned int startPos
, int length
,
81 int initStyle
, char *words
[], WindowID window
, char *props
);
83 static const char* LexerName
= "caml";
86 void Platform::DebugPrintf(const char *format
, ...) {
89 va_start(pArguments
, format
);
90 vsprintf(buffer
,format
,pArguments
);
92 Platform::DebugDisplay(buffer
);
95 void Platform::DebugPrintf(const char *, ...) {
99 bool Platform::IsDBCSLeadByte(int codePage
, char ch
) {
100 return ::IsDBCSLeadByteEx(codePage
, ch
) != 0;
103 long Platform::SendScintilla(WindowID w
, unsigned int msg
, unsigned long wParam
, long lParam
) {
104 return ::SendMessage(reinterpret_cast<HWND
>(w
), msg
, wParam
, lParam
);
107 long Platform::SendScintillaPointer(WindowID w
, unsigned int msg
, unsigned long wParam
, void *lParam
) {
108 return ::SendMessage(reinterpret_cast<HWND
>(w
), msg
, wParam
,
109 reinterpret_cast<LPARAM
>(lParam
));
112 void EXT_LEXER_DECL
Fold(unsigned int lexer
, unsigned int startPos
, int length
,
113 int initStyle
, char *words
[], WindowID window
, char *props
)
115 // below useless evaluation(s) to supress "not used" warnings
117 // build expected data structures and do the Fold
118 InternalLexOrFold(1, startPos
, length
, initStyle
, words
, window
, props
);
122 int EXT_LEXER_DECL
GetLexerCount()
124 return 1; // just us [Objective] Caml lexers here!
127 void EXT_LEXER_DECL
GetLexerName(unsigned int Index
, char *name
, int buflength
)
129 // below useless evaluation(s) to supress "not used" warnings
131 // return as much of our lexer name as will fit (what's up with Index?)
134 int n
= strlen(LexerName
);
137 memcpy(name
, LexerName
, n
), name
[n
] = '\0';
141 void EXT_LEXER_DECL
Lex(unsigned int lexer
, unsigned int startPos
, int length
,
142 int initStyle
, char *words
[], WindowID window
, char *props
)
144 // below useless evaluation(s) to supress "not used" warnings
146 // build expected data structures and do the Lex
147 InternalLexOrFold(0, startPos
, length
, initStyle
, words
, window
, props
);
150 static void InternalLexOrFold(int foldOrLex
, unsigned int startPos
, int length
,
151 int initStyle
, char *words
[], WindowID window
, char *props
)
153 // create and initialize a WindowAccessor (including contained PropSet)
155 ps
.SetMultiple(props
);
156 WindowAccessor
wa(window
, ps
);
157 // create and initialize WordList(s)
159 for (; words
[nWL
]; nWL
++) ; // count # of WordList PTRs needed
160 WordList
** wl
= new WordList
* [nWL
+ 1];// alloc WordList PTRs
162 for (; i
< nWL
; i
++) {
163 wl
[i
] = new WordList(); // (works or THROWS bad_alloc EXCEPTION)
164 wl
[i
]->Set(words
[i
]);
167 // call our "internal" folder/lexer (... then do Flush!)
169 FoldCamlDoc(startPos
, length
, initStyle
, wl
, wa
);
171 ColouriseCamlDoc(startPos
, length
, initStyle
, wl
, wa
);
173 // clean up before leaving
174 for (i
= nWL
- 1; i
>= 0; i
--)
180 #endif /* BUILD_AS_EXTERNAL_LEXER */
182 void ColouriseCamlDoc(
183 unsigned int startPos
, int length
,
185 WordList
*keywordlists
[],
189 StyleContext
sc(startPos
, length
, initStyle
, styler
);
191 int chBase
= 0, chToken
= 0, chLit
= 0;
192 WordList
& keywords
= *keywordlists
[0];
193 WordList
& keywords2
= *keywordlists
[1];
194 WordList
& keywords3
= *keywordlists
[2];
195 const bool isSML
= keywords
.InList("andalso");
196 const int useMagic
= styler
.GetPropertyInt("lexer.caml.magic", 0);
198 // set up [initial] state info (terminating states that shouldn't "bleed")
199 const int state_
= sc
.state
& 0x0f;
200 if (state_
<= SCE_CAML_CHAR
201 || (isSML
&& state_
== SCE_CAML_STRING
))
202 sc
.state
= SCE_CAML_DEFAULT
;
203 int nesting
= (state_
>= SCE_CAML_COMMENT
)? (state_
- SCE_CAML_COMMENT
): 0;
205 // foreach char in range...
207 // set up [per-char] state info
208 int state2
= -1; // (ASSUME no state change)
209 int chColor
= sc
.currentPos
- 1;// (ASSUME standard coloring range)
210 bool advance
= true; // (ASSUME scanner "eats" 1 char)
212 // step state machine
213 switch (sc
.state
& 0x0f) {
214 case SCE_CAML_DEFAULT
:
215 chToken
= sc
.currentPos
; // save [possible] token start (JIC)
216 // it's wide open; what do we have?
218 state2
= SCE_CAML_IDENTIFIER
;
219 else if (!isSML
&& sc
.Match('`') && iscamlf(sc
.chNext
))
220 state2
= SCE_CAML_TAGNAME
;
221 else if (!isSML
&& sc
.Match('#') && isdigit(sc
.chNext
))
222 state2
= SCE_CAML_LINENUM
;
223 else if (isdigit(sc
.ch
)) {
224 // it's a number, assume base 10
225 state2
= SCE_CAML_NUMBER
, chBase
= 10;
227 // there MAY be a base specified...
228 const char* baseC
= "bBoOxX";
230 if (sc
.chNext
== 'w')
231 sc
.Forward(); // (consume SML "word" indicator)
234 // ... change to specified base AS REQUIRED
235 if (strchr(baseC
, sc
.chNext
))
236 chBase
= baseT
[tolower(sc
.chNext
) - 'a'], sc
.Forward();
238 } else if (!isSML
&& sc
.Match('\'')) // (Caml char literal?)
239 state2
= SCE_CAML_CHAR
, chLit
= 0;
240 else if (isSML
&& sc
.Match('#', '"')) // (SML char literal?)
241 state2
= SCE_CAML_CHAR
, sc
.Forward();
242 else if (sc
.Match('"'))
243 state2
= SCE_CAML_STRING
;
244 else if (sc
.Match('(', '*'))
245 state2
= SCE_CAML_COMMENT
, sc
.Forward(), sc
.ch
= ' '; // (*)...
246 else if (strchr("!?~" /* Caml "prefix-symbol" */
247 "=<>@^|&+-*/$%" /* Caml "infix-symbol" */
248 "()[]{};,:.#", sc
.ch
) // Caml "bracket" or ;,:.#
249 // SML "extra" ident chars
250 || (isSML
&& (sc
.Match('\\') || sc
.Match('`'))))
251 state2
= SCE_CAML_OPERATOR
;
254 case SCE_CAML_IDENTIFIER
:
255 // [try to] interpret as [additional] identifier char
256 if (!(iscaml(sc
.ch
) || sc
.Match('\''))) {
257 const int n
= sc
.currentPos
- chToken
;
259 // length is believable as keyword, [re-]construct token
261 for (int i
= -n
; i
< 0; i
++)
262 t
[n
+ i
] = static_cast<char>(sc
.GetRelative(i
));
264 // special-case "_" token as KEYWORD
265 if ((n
== 1 && sc
.chPrev
== '_') || keywords
.InList(t
))
266 sc
.ChangeState(SCE_CAML_KEYWORD
);
267 else if (keywords2
.InList(t
))
268 sc
.ChangeState(SCE_CAML_KEYWORD2
);
269 else if (keywords3
.InList(t
))
270 sc
.ChangeState(SCE_CAML_KEYWORD3
);
272 state2
= SCE_CAML_DEFAULT
, advance
= false;
276 case SCE_CAML_TAGNAME
:
277 // [try to] interpret as [additional] tagname char
278 if (!(iscaml(sc
.ch
) || sc
.Match('\'')))
279 state2
= SCE_CAML_DEFAULT
, advance
= false;
282 /*case SCE_CAML_KEYWORD:
283 case SCE_CAML_KEYWORD2:
284 case SCE_CAML_KEYWORD3:
285 // [try to] interpret as [additional] keyword char
287 state2 = SCE_CAML_DEFAULT, advance = false;
290 case SCE_CAML_LINENUM
:
291 // [try to] interpret as [additional] linenum directive char
293 state2
= SCE_CAML_DEFAULT
, advance
= false;
296 case SCE_CAML_OPERATOR
: {
297 // [try to] interpret as [additional] operator char
299 if (iscaml(sc
.ch
) || isspace(sc
.ch
) // ident or whitespace
300 || (o
= strchr(")]};,\'\"#", sc
.ch
),o
) // "termination" chars
301 || (!isSML
&& sc
.Match('`')) // Caml extra term char
302 || (!strchr("!$%&*+-./:<=>?@^|~", sc
.ch
)// "operator" chars
303 // SML extra ident chars
304 && !(isSML
&& (sc
.Match('\\') || sc
.Match('`'))))) {
305 // check for INCLUSIVE termination
306 if (o
&& strchr(")]};,", sc
.ch
)) {
307 if ((sc
.Match(')') && sc
.chPrev
== '(')
308 || (sc
.Match(']') && sc
.chPrev
== '['))
309 // special-case "()" and "[]" tokens as KEYWORDS
310 sc
.ChangeState(SCE_CAML_KEYWORD
);
314 state2
= SCE_CAML_DEFAULT
;
319 case SCE_CAML_NUMBER
:
320 // [try to] interpret as [additional] numeric literal char
321 if ((!isSML
&& sc
.Match('_')) || IsADigit(sc
.ch
, chBase
))
323 // how about an integer suffix?
324 if (!isSML
&& (sc
.Match('l') || sc
.Match('L') || sc
.Match('n'))
325 && (sc
.chPrev
== '_' || IsADigit(sc
.chPrev
, chBase
)))
327 // or a floating-point literal?
329 // with a decimal point?
331 && ((!isSML
&& sc
.chPrev
== '_')
332 || IsADigit(sc
.chPrev
, chBase
)))
334 // with an exponent? (I)
335 if ((sc
.Match('e') || sc
.Match('E'))
336 && ((!isSML
&& (sc
.chPrev
== '.' || sc
.chPrev
== '_'))
337 || IsADigit(sc
.chPrev
, chBase
)))
339 // with an exponent? (II)
340 if (((!isSML
&& (sc
.Match('+') || sc
.Match('-')))
341 || (isSML
&& sc
.Match('~')))
342 && (sc
.chPrev
== 'e' || sc
.chPrev
== 'E'))
345 // it looks like we have run out of number
346 state2
= SCE_CAML_DEFAULT
, advance
= false;
351 // [try to] interpret as [additional] char literal char
352 if (sc
.Match('\\')) {
353 chLit
= 1; // (definitely IS a char literal)
354 if (sc
.chPrev
== '\\')
355 sc
.ch
= ' '; // (...\\')
356 // should we be terminating - one way or another?
357 } else if ((sc
.Match('\'') && sc
.chPrev
!= '\\')
359 state2
= SCE_CAML_DEFAULT
;
363 sc
.ChangeState(SCE_CAML_IDENTIFIER
);
364 // ... maybe a char literal, maybe not
365 } else if (chLit
< 1 && sc
.currentPos
- chToken
>= 2)
366 sc
.ChangeState(SCE_CAML_IDENTIFIER
), advance
= false;
369 // fall through for SML char literal (handle like string) */
371 case SCE_CAML_STRING
:
372 // [try to] interpret as [additional] [SML char/] string literal char
373 if (isSML
&& sc
.Match('\\') && sc
.chPrev
!= '\\' && isspace(sc
.chNext
))
374 state2
= SCE_CAML_WHITE
;
375 else if (sc
.Match('\\') && sc
.chPrev
== '\\')
376 sc
.ch
= ' '; // (...\\")
377 // should we be terminating - one way or another?
378 else if ((sc
.Match('"') && sc
.chPrev
!= '\\')
379 || (isSML
&& sc
.atLineEnd
)) {
380 state2
= SCE_CAML_DEFAULT
;
387 // [try to] interpret as [additional] SML embedded whitespace char
388 if (sc
.Match('\\')) {
389 // style this puppy NOW...
390 state2
= SCE_CAML_STRING
, sc
.ch
= ' ' /* (...\") */, chColor
++,
391 styler
.ColourTo(chColor
, SCE_CAML_WHITE
), styler
.Flush();
392 // ... then backtrack to determine original SML literal type
394 for (; p
>= 0 && styler
.StyleAt(p
) == SCE_CAML_WHITE
; p
--) ;
396 state2
= static_cast<int>(styler
.StyleAt(p
));
397 // take care of state change NOW
398 sc
.ChangeState(state2
), state2
= -1;
402 case SCE_CAML_COMMENT
:
403 case SCE_CAML_COMMENT1
:
404 case SCE_CAML_COMMENT2
:
405 case SCE_CAML_COMMENT3
:
406 // we're IN a comment - does this start a NESTED comment?
407 if (sc
.Match('(', '*'))
408 state2
= sc
.state
+ 1, chToken
= sc
.currentPos
,
409 sc
.Forward(), sc
.ch
= ' ' /* (*)... */, nesting
++;
410 // [try to] interpret as [additional] comment char
411 else if (sc
.Match(')') && sc
.chPrev
== '*') {
413 state2
= (sc
.state
& 0x0f) - 1, chToken
= 0, nesting
--;
415 state2
= SCE_CAML_DEFAULT
;
417 // enable "magic" (read-only) comment AS REQUIRED
418 } else if (useMagic
&& sc
.currentPos
- chToken
== 4
419 && sc
.Match('c') && sc
.chPrev
== 'r' && sc
.GetRelative(-2) == '@')
420 sc
.state
|= 0x10; // (switch to read-only comment style)
424 // handle state change and char coloring AS REQUIRED
426 styler
.ColourTo(chColor
, sc
.state
), sc
.ChangeState(state2
);
427 // move to next char UNLESS re-scanning current char
432 // do any required terminal char coloring (JIC)
436 #ifdef BUILD_AS_EXTERNAL_LEXER
438 #endif /* BUILD_AS_EXTERNAL_LEXER */
447 static const char * const camlWordListDesc
[] = {
448 "Keywords", // primary Objective Caml keywords
449 "Keywords2", // "optional" keywords (typically from Pervasives)
450 "Keywords3", // "optional" keywords (typically typenames)
454 #ifndef BUILD_AS_EXTERNAL_LEXER
455 LexerModule
lmCaml(SCLEX_CAML
, ColouriseCamlDoc
, "caml", FoldCamlDoc
, camlWordListDesc
);
456 #endif /* BUILD_AS_EXTERNAL_LEXER */