]>
git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/lexers/LexCaml.cxx
   1 // Scintilla source code edit control 
   3  ** Lexer for Objective Caml. 
   5 // Copyright 2005-2009 by Robert Roessler <robertr@rftp.com> 
   6 // The License.txt file describes the conditions under which this software may be distributed. 
   8         20050204 Initial release. 
   9         20050205 Quick compiler standards/"cleanliness" adjustment. 
  10         20050206 Added cast for IsLeadByte(). 
  11         20050209 Changes to "external" build support. 
  12         20050306 Fix for 1st-char-in-doc "corner" case. 
  13         20050502 Fix for [harmless] one-past-the-end coloring. 
  14         20050515 Refined numeric token recognition logic. 
  15         20051125 Added 2nd "optional" keywords class. 
  16         20051129 Support "magic" (read-only) comments for RCaml. 
  17         20051204 Swtich to using StyleContext infrastructure. 
  18         20090629 Add full Standard ML '97 support. 
  29 #include "Scintilla.h" 
  32 #include "PropSetSimple.h" 
  34 #include "LexAccessor.h" 
  36 #include "StyleContext.h" 
  37 #include "CharacterSet.h" 
  38 #include "LexerModule.h" 
  40 //      Since the Microsoft __iscsym[f] funcs are not ANSI... 
  41 inline int  iscaml(int c
) {return isalnum(c
) || c 
== '_';} 
  42 inline int iscamlf(int c
) {return isalpha(c
) || c 
== '_';} 
  44 static const int baseT
[24] = { 
  45         0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     /* A - L */ 
  46         0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,16      /* M - X */ 
  50 using namespace Scintilla
; 
  53 #ifdef BUILD_AS_EXTERNAL_LEXER 
  55         (actually seems to work!) 
  58 #include "WindowAccessor.h" 
  59 #include "ExternalLexer.h" 
  62 #define EXT_LEXER_DECL __declspec( dllexport ) __stdcall 
  68 static void ColouriseCamlDoc( 
  69         unsigned int startPos
, int length
, 
  71         WordList 
*keywordlists
[], 
  74 static void FoldCamlDoc( 
  75         unsigned int startPos
, int length
, 
  77         WordList 
*keywordlists
[], 
  80 static void InternalLexOrFold(int lexOrFold
, unsigned int startPos
, int length
, 
  81         int initStyle
, char *words
[], WindowID window
, char *props
); 
  83 static const char* LexerName 
= "caml"; 
  86 void Platform::DebugPrintf(const char *format
, ...) { 
  89         va_start(pArguments
, format
); 
  90         vsprintf(buffer
,format
,pArguments
); 
  92         Platform::DebugDisplay(buffer
); 
  95 void Platform::DebugPrintf(const char *, ...) { 
  99 bool Platform::IsDBCSLeadByte(int codePage
, char ch
) { 
 100         return ::IsDBCSLeadByteEx(codePage
, ch
) != 0; 
 103 long Platform::SendScintilla(WindowID w
, unsigned int msg
, unsigned long wParam
, long lParam
) { 
 104         return ::SendMessage(reinterpret_cast<HWND
>(w
), msg
, wParam
, lParam
); 
 107 long Platform::SendScintillaPointer(WindowID w
, unsigned int msg
, unsigned long wParam
, void *lParam
) { 
 108         return ::SendMessage(reinterpret_cast<HWND
>(w
), msg
, wParam
, 
 109                 reinterpret_cast<LPARAM
>(lParam
)); 
 112 void EXT_LEXER_DECL 
Fold(unsigned int lexer
, unsigned int startPos
, int length
, 
 113         int initStyle
, char *words
[], WindowID window
, char *props
) 
 115         // below useless evaluation(s) to supress "not used" warnings 
 117         // build expected data structures and do the Fold 
 118         InternalLexOrFold(1, startPos
, length
, initStyle
, words
, window
, props
); 
 122 int EXT_LEXER_DECL 
GetLexerCount() 
 124         return 1;       // just us [Objective] Caml lexers here! 
 127 void EXT_LEXER_DECL 
GetLexerName(unsigned int Index
, char *name
, int buflength
) 
 129         // below useless evaluation(s) to supress "not used" warnings 
 131         // return as much of our lexer name as will fit (what's up with Index?) 
 134                 int n 
= strlen(LexerName
); 
 137                 memcpy(name
, LexerName
, n
), name
[n
] = '\0'; 
 141 void EXT_LEXER_DECL 
Lex(unsigned int lexer
, unsigned int startPos
, int length
, 
 142         int initStyle
, char *words
[], WindowID window
, char *props
) 
 144         // below useless evaluation(s) to supress "not used" warnings 
 146         // build expected data structures and do the Lex 
 147         InternalLexOrFold(0, startPos
, length
, initStyle
, words
, window
, props
); 
 150 static void InternalLexOrFold(int foldOrLex
, unsigned int startPos
, int length
, 
 151         int initStyle
, char *words
[], WindowID window
, char *props
) 
 153         // create and initialize a WindowAccessor (including contained PropSet) 
 155         ps
.SetMultiple(props
); 
 156         WindowAccessor 
wa(window
, ps
); 
 157         // create and initialize WordList(s) 
 159         for (; words
[nWL
]; nWL
++) ;     // count # of WordList PTRs needed 
 160         WordList
** wl 
= new WordList
* [nWL 
+ 1];// alloc WordList PTRs 
 162         for (; i 
< nWL
; i
++) { 
 163                 wl
[i
] = new WordList(); // (works or THROWS bad_alloc EXCEPTION) 
 164                 wl
[i
]->Set(words
[i
]); 
 167         // call our "internal" folder/lexer (... then do Flush!) 
 169                 FoldCamlDoc(startPos
, length
, initStyle
, wl
, wa
); 
 171                 ColouriseCamlDoc(startPos
, length
, initStyle
, wl
, wa
); 
 173         // clean up before leaving 
 174         for (i 
= nWL 
- 1; i 
>= 0; i
--) 
 180 #endif  /* BUILD_AS_EXTERNAL_LEXER */ 
 182 void ColouriseCamlDoc( 
 183         unsigned int startPos
, int length
, 
 185         WordList 
*keywordlists
[], 
 189         StyleContext 
sc(startPos
, length
, initStyle
, styler
); 
 191         int chBase 
= 0, chToken 
= 0, chLit 
= 0; 
 192         WordList
& keywords  
= *keywordlists
[0]; 
 193         WordList
& keywords2 
= *keywordlists
[1]; 
 194         WordList
& keywords3 
= *keywordlists
[2]; 
 195         const bool isSML 
= keywords
.InList("andalso"); 
 196         const int useMagic 
= styler
.GetPropertyInt("lexer.caml.magic", 0); 
 198         // set up [initial] state info (terminating states that shouldn't "bleed") 
 199         const int state_ 
= sc
.state 
& 0x0f; 
 200         if (state_ 
<= SCE_CAML_CHAR
 
 201                 || (isSML 
&& state_ 
== SCE_CAML_STRING
)) 
 202                 sc
.state 
= SCE_CAML_DEFAULT
; 
 203         int nesting 
= (state_ 
>= SCE_CAML_COMMENT
)? (state_ 
- SCE_CAML_COMMENT
): 0; 
 205         // foreach char in range... 
 207                 // set up [per-char] state info 
 208                 int state2 
= -1;                                // (ASSUME no state change) 
 209                 int chColor 
= sc
.currentPos 
- 1;// (ASSUME standard coloring range) 
 210                 bool advance 
= true;                    // (ASSUME scanner "eats" 1 char) 
 212                 // step state machine 
 213                 switch (sc
.state 
& 0x0f) { 
 214                 case SCE_CAML_DEFAULT
: 
 215                         chToken 
= sc
.currentPos
;        // save [possible] token start (JIC) 
 216                         // it's wide open; what do we have? 
 218                                 state2 
= SCE_CAML_IDENTIFIER
; 
 219                         else if (!isSML 
&& sc
.Match('`') && iscamlf(sc
.chNext
)) 
 220                                 state2 
= SCE_CAML_TAGNAME
; 
 221                         else if (!isSML 
&& sc
.Match('#') && isdigit(sc
.chNext
)) 
 222                                 state2 
= SCE_CAML_LINENUM
; 
 223                         else if (isdigit(sc
.ch
)) { 
 224                                 // it's a number, assume base 10 
 225                                 state2 
= SCE_CAML_NUMBER
, chBase 
= 10; 
 227                                         // there MAY be a base specified... 
 228                                         const char* baseC 
= "bBoOxX"; 
 230                                                 if (sc
.chNext 
== 'w') 
 231                                                         sc
.Forward();   // (consume SML "word" indicator) 
 234                                         // ... change to specified base AS REQUIRED 
 235                                         if (strchr(baseC
, sc
.chNext
)) 
 236                                                 chBase 
= baseT
[tolower(sc
.chNext
) - 'a'], sc
.Forward(); 
 238                         } else if (!isSML 
&& sc
.Match('\''))    // (Caml char literal?) 
 239                                 state2 
= SCE_CAML_CHAR
, chLit 
= 0; 
 240                         else if (isSML 
&& sc
.Match('#', '"'))   // (SML char literal?) 
 241                                 state2 
= SCE_CAML_CHAR
, sc
.Forward(); 
 242                         else if (sc
.Match('"')) 
 243                                 state2 
= SCE_CAML_STRING
; 
 244                         else if (sc
.Match('(', '*')) 
 245                                 state2 
= SCE_CAML_COMMENT
, sc
.Forward(), sc
.ch 
= ' '; // (*)... 
 246                         else if (strchr("!?~"                   /* Caml "prefix-symbol" */ 
 247                                         "=<>@^|&+-*/$%"                 /* Caml "infix-symbol" */ 
 248                                         "()[]{};,:.#", sc
.ch
)   // Caml "bracket" or ;,:.# 
 249                                                                                         // SML "extra" ident chars 
 250                                 || (isSML 
&& (sc
.Match('\\') || sc
.Match('`')))) 
 251                                 state2 
= SCE_CAML_OPERATOR
; 
 254                 case SCE_CAML_IDENTIFIER
: 
 255                         // [try to] interpret as [additional] identifier char 
 256                         if (!(iscaml(sc
.ch
) || sc
.Match('\''))) { 
 257                                 const int n 
= sc
.currentPos 
- chToken
; 
 259                                         // length is believable as keyword, [re-]construct token 
 261                                         for (int i 
= -n
; i 
< 0; i
++) 
 262                                                 t
[n 
+ i
] = static_cast<char>(sc
.GetRelative(i
)); 
 264                                         // special-case "_" token as KEYWORD 
 265                                         if ((n 
== 1 && sc
.chPrev 
== '_') || keywords
.InList(t
)) 
 266                                                 sc
.ChangeState(SCE_CAML_KEYWORD
); 
 267                                         else if (keywords2
.InList(t
)) 
 268                                                 sc
.ChangeState(SCE_CAML_KEYWORD2
); 
 269                                         else if (keywords3
.InList(t
)) 
 270                                                 sc
.ChangeState(SCE_CAML_KEYWORD3
); 
 272                                 state2 
= SCE_CAML_DEFAULT
, advance 
= false; 
 276                 case SCE_CAML_TAGNAME
: 
 277                         // [try to] interpret as [additional] tagname char 
 278                         if (!(iscaml(sc
.ch
) || sc
.Match('\''))) 
 279                                 state2 
= SCE_CAML_DEFAULT
, advance 
= false; 
 282                 /*case SCE_CAML_KEYWORD: 
 283                 case SCE_CAML_KEYWORD2: 
 284                 case SCE_CAML_KEYWORD3: 
 285                         // [try to] interpret as [additional] keyword char 
 287                                 state2 = SCE_CAML_DEFAULT, advance = false; 
 290                 case SCE_CAML_LINENUM
: 
 291                         // [try to] interpret as [additional] linenum directive char 
 293                                 state2 
= SCE_CAML_DEFAULT
, advance 
= false; 
 296                 case SCE_CAML_OPERATOR
: { 
 297                         // [try to] interpret as [additional] operator char 
 299                         if (iscaml(sc
.ch
) || isspace(sc
.ch
)                     // ident or whitespace 
 300                                 || (o 
= strchr(")]};,\'\"#", sc
.ch
),o
)  // "termination" chars 
 301                                 || (!isSML 
&& sc
.Match('`'))                    // Caml extra term char 
 302                                 || (!strchr("!$%&*+-./:<=>?@^|~", sc
.ch
)// "operator" chars 
 303                                                                                                                 // SML extra ident chars 
 304                                         && !(isSML 
&& (sc
.Match('\\') || sc
.Match('`'))))) { 
 305                                 // check for INCLUSIVE termination 
 306                                 if (o 
&& strchr(")]};,", sc
.ch
)) { 
 307                                         if ((sc
.Match(')') && sc
.chPrev 
== '(') 
 308                                                 || (sc
.Match(']') && sc
.chPrev 
== '[')) 
 309                                                 // special-case "()" and "[]" tokens as KEYWORDS 
 310                                                 sc
.ChangeState(SCE_CAML_KEYWORD
); 
 314                                 state2 
= SCE_CAML_DEFAULT
; 
 319                 case SCE_CAML_NUMBER
: 
 320                         // [try to] interpret as [additional] numeric literal char 
 321                         if ((!isSML 
&& sc
.Match('_')) || IsADigit(sc
.ch
, chBase
)) 
 323                         // how about an integer suffix? 
 324                         if (!isSML 
&& (sc
.Match('l') || sc
.Match('L') || sc
.Match('n')) 
 325                                 && (sc
.chPrev 
== '_' || IsADigit(sc
.chPrev
, chBase
))) 
 327                         // or a floating-point literal? 
 329                                 // with a decimal point? 
 331                                         && ((!isSML 
&& sc
.chPrev 
== '_') 
 332                                                 || IsADigit(sc
.chPrev
, chBase
))) 
 334                                 // with an exponent? (I) 
 335                                 if ((sc
.Match('e') || sc
.Match('E')) 
 336                                         && ((!isSML 
&& (sc
.chPrev 
== '.' || sc
.chPrev 
== '_')) 
 337                                                 || IsADigit(sc
.chPrev
, chBase
))) 
 339                                 // with an exponent? (II) 
 340                                 if (((!isSML 
&& (sc
.Match('+') || sc
.Match('-'))) 
 341                                                 || (isSML 
&& sc
.Match('~'))) 
 342                                         && (sc
.chPrev 
== 'e' || sc
.chPrev 
== 'E')) 
 345                         // it looks like we have run out of number 
 346                         state2 
= SCE_CAML_DEFAULT
, advance 
= false; 
 351                                 // [try to] interpret as [additional] char literal char 
 352                                 if (sc
.Match('\\')) { 
 353                                         chLit 
= 1;      // (definitely IS a char literal) 
 354                                         if (sc
.chPrev 
== '\\') 
 355                                                 sc
.ch 
= ' ';    // (...\\') 
 356                                 // should we be terminating - one way or another? 
 357                                 } else if ((sc
.Match('\'') && sc
.chPrev 
!= '\\') 
 359                                         state2 
= SCE_CAML_DEFAULT
; 
 363                                                 sc
.ChangeState(SCE_CAML_IDENTIFIER
); 
 364                                 // ... maybe a char literal, maybe not 
 365                                 } else if (chLit 
< 1 && sc
.currentPos 
- chToken 
>= 2) 
 366                                         sc
.ChangeState(SCE_CAML_IDENTIFIER
), advance 
= false; 
 369                                 // fall through for SML char literal (handle like string) */ 
 371                 case SCE_CAML_STRING
: 
 372                         // [try to] interpret as [additional] [SML char/] string literal char 
 373                         if (isSML 
&& sc
.Match('\\') && sc
.chPrev 
!= '\\' && isspace(sc
.chNext
)) 
 374                                 state2 
= SCE_CAML_WHITE
; 
 375                         else if (sc
.Match('\\') && sc
.chPrev 
== '\\') 
 376                                 sc
.ch 
= ' ';    // (...\\") 
 377                         // should we be terminating - one way or another? 
 378                         else if ((sc
.Match('"') && sc
.chPrev 
!= '\\') 
 379                                 || (isSML 
&& sc
.atLineEnd
)) { 
 380                                 state2 
= SCE_CAML_DEFAULT
; 
 387                         // [try to] interpret as [additional] SML embedded whitespace char 
 388                         if (sc
.Match('\\')) { 
 389                                 // style this puppy NOW... 
 390                                 state2 
= SCE_CAML_STRING
, sc
.ch 
= ' ' /* (...\") */, chColor
++, 
 391                                         styler
.ColourTo(chColor
, SCE_CAML_WHITE
), styler
.Flush(); 
 392                                 // ... then backtrack to determine original SML literal type 
 394                                 for (; p 
>= 0 && styler
.StyleAt(p
) == SCE_CAML_WHITE
; p
--) ; 
 396                                         state2 
= static_cast<int>(styler
.StyleAt(p
)); 
 397                                 // take care of state change NOW 
 398                                 sc
.ChangeState(state2
), state2 
= -1; 
 402                 case SCE_CAML_COMMENT
: 
 403                 case SCE_CAML_COMMENT1
: 
 404                 case SCE_CAML_COMMENT2
: 
 405                 case SCE_CAML_COMMENT3
: 
 406                         // we're IN a comment - does this start a NESTED comment? 
 407                         if (sc
.Match('(', '*')) 
 408                                 state2 
= sc
.state 
+ 1, chToken 
= sc
.currentPos
, 
 409                                         sc
.Forward(), sc
.ch 
= ' ' /* (*)... */, nesting
++; 
 410                         // [try to] interpret as [additional] comment char 
 411                         else if (sc
.Match(')') && sc
.chPrev 
== '*') { 
 413                                         state2 
= (sc
.state 
& 0x0f) - 1, chToken 
= 0, nesting
--; 
 415                                         state2 
= SCE_CAML_DEFAULT
; 
 417                         // enable "magic" (read-only) comment AS REQUIRED 
 418                         } else if (useMagic 
&& sc
.currentPos 
- chToken 
== 4 
 419                                 && sc
.Match('c') && sc
.chPrev 
== 'r' && sc
.GetRelative(-2) == '@') 
 420                                 sc
.state 
|= 0x10;       // (switch to read-only comment style) 
 424                 // handle state change and char coloring AS REQUIRED 
 426                         styler
.ColourTo(chColor
, sc
.state
), sc
.ChangeState(state2
); 
 427                 // move to next char UNLESS re-scanning current char 
 432         // do any required terminal char coloring (JIC) 
 436 #ifdef BUILD_AS_EXTERNAL_LEXER 
 438 #endif  /* BUILD_AS_EXTERNAL_LEXER */ 
 447 static const char * const camlWordListDesc
[] = { 
 448         "Keywords",             // primary Objective Caml keywords 
 449         "Keywords2",    // "optional" keywords (typically from Pervasives) 
 450         "Keywords3",    // "optional" keywords (typically typenames) 
 454 #ifndef BUILD_AS_EXTERNAL_LEXER 
 455 LexerModule 
lmCaml(SCLEX_CAML
, ColouriseCamlDoc
, "caml", FoldCamlDoc
, camlWordListDesc
); 
 456 #endif  /* BUILD_AS_EXTERNAL_LEXER */