]>
git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/src/LexCaml.cxx
1 // Scintilla source code edit control
3 ** Lexer for Objective Caml.
5 // Copyright 2005 by Robert Roessler <robertr@rftp.com>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 20050204 Initial release.
9 20050205 Quick compiler standards/"cleanliness" adjustment.
10 20050206 Added cast for IsLeadByte().
11 20050209 Changes to "external" build support.
12 20050306 Fix for 1st-char-in-doc "corner" case.
13 20050502 Fix for [harmless] one-past-the-end coloring.
14 20050515 Refined numeric token recognition logic.
15 20051125 Added 2nd "optional" keywords class.
16 20051129 Support "magic" (read-only) comments for RCaml.
17 20051204 Swtich to using StyleContext infrastructure.
30 #include "StyleContext.h"
32 #include "Scintilla.h"
35 // Since the Microsoft __iscsym[f] funcs are not ANSI...
36 inline int iscaml(int c
) {return isalnum(c
) || c
== '_';}
37 inline int iscamlf(int c
) {return isalpha(c
) || c
== '_';}
38 inline int iscamld(int c
) {return isdigit(c
) || c
== '_';}
40 static const int baseT
[24] = {
41 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A - L */
42 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,16 /* M - X */
46 using namespace Scintilla
;
49 #ifdef BUILD_AS_EXTERNAL_LEXER
51 (actually seems to work!)
53 #include "WindowAccessor.h"
54 #include "ExternalLexer.h"
60 static void ColouriseCamlDoc(
61 unsigned int startPos
, int length
,
63 WordList
*keywordlists
[],
66 static void FoldCamlDoc(
67 unsigned int startPos
, int length
,
69 WordList
*keywordlists
[],
72 static void InternalLexOrFold(int lexOrFold
, unsigned int startPos
, int length
,
73 int initStyle
, char *words
[], WindowID window
, char *props
);
75 static const char* LexerName
= "caml";
78 void Platform::DebugPrintf(const char *format
, ...) {
81 va_start(pArguments
, format
);
82 vsprintf(buffer
,format
,pArguments
);
84 Platform::DebugDisplay(buffer
);
87 void Platform::DebugPrintf(const char *, ...) {
91 bool Platform::IsDBCSLeadByte(int codePage
, char ch
) {
92 return ::IsDBCSLeadByteEx(codePage
, ch
) != 0;
95 long Platform::SendScintilla(WindowID w
, unsigned int msg
, unsigned long wParam
, long lParam
) {
96 return ::SendMessage(reinterpret_cast<HWND
>(w
), msg
, wParam
, lParam
);
99 long Platform::SendScintillaPointer(WindowID w
, unsigned int msg
, unsigned long wParam
, void *lParam
) {
100 return ::SendMessage(reinterpret_cast<HWND
>(w
), msg
, wParam
,
101 reinterpret_cast<LPARAM
>(lParam
));
104 void EXT_LEXER_DECL
Fold(unsigned int lexer
, unsigned int startPos
, int length
,
105 int initStyle
, char *words
[], WindowID window
, char *props
)
107 // below useless evaluation(s) to supress "not used" warnings
109 // build expected data structures and do the Fold
110 InternalLexOrFold(1, startPos
, length
, initStyle
, words
, window
, props
);
114 int EXT_LEXER_DECL
GetLexerCount()
116 return 1; // just us [Objective] Caml lexers here!
119 void EXT_LEXER_DECL
GetLexerName(unsigned int Index
, char *name
, int buflength
)
121 // below useless evaluation(s) to supress "not used" warnings
123 // return as much of our lexer name as will fit (what's up with Index?)
126 int n
= strlen(LexerName
);
129 memcpy(name
, LexerName
, n
), name
[n
] = '\0';
133 void EXT_LEXER_DECL
Lex(unsigned int lexer
, unsigned int startPos
, int length
,
134 int initStyle
, char *words
[], WindowID window
, char *props
)
136 // below useless evaluation(s) to supress "not used" warnings
138 // build expected data structures and do the Lex
139 InternalLexOrFold(0, startPos
, length
, initStyle
, words
, window
, props
);
142 static void InternalLexOrFold(int foldOrLex
, unsigned int startPos
, int length
,
143 int initStyle
, char *words
[], WindowID window
, char *props
)
145 // create and initialize a WindowAccessor (including contained PropSet)
147 ps
.SetMultiple(props
);
148 WindowAccessor
wa(window
, ps
);
149 // create and initialize WordList(s)
151 for (; words
[nWL
]; nWL
++) ; // count # of WordList PTRs needed
152 WordList
** wl
= new WordList
* [nWL
+ 1];// alloc WordList PTRs
154 for (; i
< nWL
; i
++) {
155 wl
[i
] = new WordList(); // (works or THROWS bad_alloc EXCEPTION)
156 wl
[i
]->Set(words
[i
]);
159 // call our "internal" folder/lexer (... then do Flush!)
161 FoldCamlDoc(startPos
, length
, initStyle
, wl
, wa
);
163 ColouriseCamlDoc(startPos
, length
, initStyle
, wl
, wa
);
165 // clean up before leaving
166 for (i
= nWL
- 1; i
>= 0; i
--)
172 #endif /* BUILD_AS_EXTERNAL_LEXER */
174 void ColouriseCamlDoc(
175 unsigned int startPos
, int length
,
177 WordList
*keywordlists
[],
181 StyleContext
sc(startPos
, length
, initStyle
, styler
);
182 // set up [initial] state info (terminating states that shouldn't "bleed")
184 if (sc
.state
< SCE_CAML_STRING
)
185 sc
.state
= SCE_CAML_DEFAULT
;
186 if (sc
.state
>= SCE_CAML_COMMENT
)
187 nesting
= (sc
.state
& 0x0f) - SCE_CAML_COMMENT
;
189 int chBase
= 0, chToken
= 0, chLit
= 0;
190 WordList
& keywords
= *keywordlists
[0];
191 WordList
& keywords2
= *keywordlists
[1];
192 WordList
& keywords3
= *keywordlists
[2];
193 const int useMagic
= styler
.GetPropertyInt("lexer.caml.magic", 0);
195 // foreach char in range...
197 // set up [per-char] state info
198 int state2
= -1; // (ASSUME no state change)
199 int chColor
= sc
.currentPos
- 1;// (ASSUME standard coloring range)
200 bool advance
= true; // (ASSUME scanner "eats" 1 char)
202 // step state machine
203 switch (sc
.state
& 0x0f) {
204 case SCE_CAML_DEFAULT
:
205 chToken
= sc
.currentPos
; // save [possible] token start (JIC)
206 // it's wide open; what do we have?
208 state2
= SCE_CAML_IDENTIFIER
;
209 else if (sc
.Match('`') && iscamlf(sc
.chNext
))
210 state2
= SCE_CAML_TAGNAME
;
211 else if (sc
.Match('#') && isdigit(sc
.chNext
))
212 state2
= SCE_CAML_LINENUM
;
213 else if (isdigit(sc
.ch
)) {
214 state2
= SCE_CAML_NUMBER
, chBase
= 10;
215 if (sc
.Match('0') && strchr("bBoOxX", sc
.chNext
))
216 chBase
= baseT
[tolower(sc
.chNext
) - 'a'], sc
.Forward();
217 } else if (sc
.Match('\'')) /* (char literal?) */
218 state2
= SCE_CAML_CHAR
, chLit
= 0;
219 else if (sc
.Match('\"'))
220 state2
= SCE_CAML_STRING
;
221 else if (sc
.Match('(', '*'))
222 state2
= SCE_CAML_COMMENT
,
223 sc
.ch
= ' ', // (make SURE "(*)" isn't seen as a closed comment)
225 else if (strchr("!?~" /* Caml "prefix-symbol" */
226 "=<>@^|&+-*/$%" /* Caml "infix-symbol" */
227 "()[]{};,:.#", sc
.ch
)) /* Caml "bracket" or ;,:.# */
228 state2
= SCE_CAML_OPERATOR
;
231 case SCE_CAML_IDENTIFIER
:
232 // [try to] interpret as [additional] identifier char
233 if (!(iscaml(sc
.ch
) || sc
.Match('\''))) {
234 const int n
= sc
.currentPos
- chToken
;
236 // length is believable as keyword, [re-]construct token
238 for (int i
= -n
; i
< 0; i
++)
239 t
[n
+ i
] = static_cast<char>(sc
.GetRelative(i
));
241 // special-case "_" token as KEYWORD
242 if ((n
== 1 && sc
.chPrev
== '_') || keywords
.InList(t
))
243 sc
.ChangeState(SCE_CAML_KEYWORD
);
244 else if (keywords2
.InList(t
))
245 sc
.ChangeState(SCE_CAML_KEYWORD2
);
246 else if (keywords3
.InList(t
))
247 sc
.ChangeState(SCE_CAML_KEYWORD3
);
249 state2
= SCE_CAML_DEFAULT
, advance
= false;
253 case SCE_CAML_TAGNAME
:
254 // [try to] interpret as [additional] tagname char
255 if (!(iscaml(sc
.ch
) || sc
.Match('\'')))
256 state2
= SCE_CAML_DEFAULT
, advance
= false;
259 /*case SCE_CAML_KEYWORD:
260 case SCE_CAML_KEYWORD2:
261 case SCE_CAML_KEYWORD3:
262 // [try to] interpret as [additional] keyword char
264 state2 = SCE_CAML_DEFAULT, advance = false;
267 case SCE_CAML_LINENUM
:
268 // [try to] interpret as [additional] linenum directive char
270 state2
= SCE_CAML_DEFAULT
, advance
= false;
273 case SCE_CAML_OPERATOR
: {
274 // [try to] interpret as [additional] operator char
276 if (iscaml(sc
.ch
) || isspace(sc
.ch
) /* ident or whitespace */
277 || (o
= strchr(")]};,\'\"`#", sc
.ch
),o
)/* "termination" chars */
278 || !strchr("!$%&*+-./:<=>?@^|~", sc
.ch
)/* "operator" chars */) {
279 // check for INCLUSIVE termination
280 if (o
&& strchr(")]};,", sc
.ch
)) {
281 if ((sc
.Match(')') && sc
.chPrev
== '(')
282 || (sc
.Match(']') && sc
.chPrev
== '['))
283 // special-case "()" and "[]" tokens as KEYWORDS
284 sc
.ChangeState(SCE_CAML_KEYWORD
);
288 state2
= SCE_CAML_DEFAULT
;
293 case SCE_CAML_NUMBER
:
294 // [try to] interpret as [additional] numeric literal char
295 // N.B. - improperly accepts "extra" digits in base 2 or 8 literals
296 if (iscamld(sc
.ch
) || IsADigit(sc
.ch
, chBase
))
298 // how about an integer suffix?
299 if ((sc
.Match('l') || sc
.Match('L') || sc
.Match('n'))
300 && (iscamld(sc
.chPrev
) || IsADigit(sc
.chPrev
, chBase
)))
302 // or a floating-point literal?
304 // with a decimal point?
305 if (sc
.Match('.') && iscamld(sc
.chPrev
))
307 // with an exponent? (I)
308 if ((sc
.Match('e') || sc
.Match('E'))
309 && (iscamld(sc
.chPrev
) || sc
.chPrev
== '.'))
311 // with an exponent? (II)
312 if ((sc
.Match('+') || sc
.Match('-'))
313 && (sc
.chPrev
== 'e' || sc
.chPrev
== 'E'))
316 // it looks like we have run out of number
317 state2
= SCE_CAML_DEFAULT
, advance
= false;
321 // [try to] interpret as [additional] char literal char
322 if (sc
.Match('\\')) {
323 chLit
= 1; // (definitely IS a char literal)
324 if (sc
.chPrev
== '\\')
325 sc
.ch
= ' '; // (so termination test isn't fooled)
326 // should we be terminating - one way or another?
327 } else if ((sc
.Match('\'') && sc
.chPrev
!= '\\') || sc
.atLineEnd
) {
328 state2
= SCE_CAML_DEFAULT
;
332 sc
.ChangeState(SCE_CAML_IDENTIFIER
);
333 // ... maybe a char literal, maybe not
334 } else if (chLit
< 1 && sc
.currentPos
- chToken
>= 2)
335 sc
.ChangeState(SCE_CAML_IDENTIFIER
), advance
= false;
338 case SCE_CAML_STRING
:
339 // [try to] interpret as [additional] string literal char
340 if (sc
.Match('\\') && sc
.chPrev
== '\\')
341 sc
.ch
= ' '; // (so '\\' doesn't cause us trouble)
342 else if (sc
.Match('\"') && sc
.chPrev
!= '\\')
343 state2
= SCE_CAML_DEFAULT
, chColor
++;
346 case SCE_CAML_COMMENT
:
347 case SCE_CAML_COMMENT1
:
348 case SCE_CAML_COMMENT2
:
349 case SCE_CAML_COMMENT3
:
350 // we're IN a comment - does this start a NESTED comment?
351 if (sc
.Match('(', '*'))
352 state2
= sc
.state
+ 1, chToken
= sc
.currentPos
,
353 sc
.ch
= ' ', // (make SURE "(*)" isn't seen as a closed comment)
354 sc
.Forward(), nesting
++;
355 // [try to] interpret as [additional] comment char
356 else if (sc
.Match(')') && sc
.chPrev
== '*') {
358 state2
= (sc
.state
& 0x0f) - 1, chToken
= 0, nesting
--;
360 state2
= SCE_CAML_DEFAULT
;
362 // enable "magic" (read-only) comment AS REQUIRED
363 } else if (useMagic
&& sc
.currentPos
- chToken
== 4
364 && sc
.Match('c') && sc
.chPrev
== 'r' && sc
.GetRelative(-2) == '@')
365 sc
.state
|= 0x10; // (switch to read-only comment style)
369 // handle state change and char coloring as required
371 styler
.ColourTo(chColor
, sc
.state
), sc
.ChangeState(state2
);
372 // move to next char UNLESS re-scanning current char
377 // do any required terminal char coloring (JIC)
381 #ifdef BUILD_AS_EXTERNAL_LEXER
383 #endif /* BUILD_AS_EXTERNAL_LEXER */
385 unsigned int startPos
, int length
,
387 WordList
*keywordlists
[],
390 // below useless evaluation(s) to supress "not used" warnings
391 startPos
|| length
|| initStyle
|| keywordlists
[0] || styler
.Length();
394 static const char * const camlWordListDesc
[] = {
395 "Keywords", // primary Objective Caml keywords
396 "Keywords2", // "optional" keywords (typically from Pervasives)
397 "Keywords3", // "optional" keywords (typically typenames)
401 #ifndef BUILD_AS_EXTERNAL_LEXER
402 LexerModule
lmCaml(SCLEX_CAML
, ColouriseCamlDoc
, "caml", FoldCamlDoc
, camlWordListDesc
);
403 #endif /* BUILD_AS_EXTERNAL_LEXER */