1 // Scintilla source code edit control
3 ** Lexer for C++, C, Java, and JavaScript.
4 ** Further folding features and configuration properties added by "Udo Lechner" <dlchnr(at)gmx(dot)net>
6 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
7 // The License.txt file describes the conditions under which this software may be distributed.
22 #include "Scintilla.h"
26 #include "LexAccessor.h"
28 #include "StyleContext.h"
29 #include "CharacterSet.h"
30 #include "LexerModule.h"
31 #include "OptionSet.h"
32 #include "SparseState.h"
35 using namespace Scintilla
;
38 static bool IsSpaceEquiv(int state
) {
39 return (state
<= SCE_C_COMMENTDOC
) ||
40 // including SCE_C_DEFAULT, SCE_C_COMMENT, SCE_C_COMMENTLINE
41 (state
== SCE_C_COMMENTLINEDOC
) || (state
== SCE_C_COMMENTDOCKEYWORD
) ||
42 (state
== SCE_C_COMMENTDOCKEYWORDERROR
);
45 // Preconditions: sc.currentPos points to a character after '+' or '-'.
46 // The test for pos reaching 0 should be redundant,
47 // and is in only for safety measures.
48 // Limitation: this code will give the incorrect answer for code like
50 // Putting a space between the '++' post-inc operator and the '+' binary op
51 // fixes this, and is highly recommended for readability anyway.
52 static bool FollowsPostfixOperator(StyleContext
&sc
, LexAccessor
&styler
) {
53 int pos
= (int) sc
.currentPos
;
55 char ch
= styler
[pos
];
56 if (ch
== '+' || ch
== '-') {
57 return styler
[pos
- 1] == ch
;
63 static bool followsReturnKeyword(StyleContext
&sc
, LexAccessor
&styler
) {
64 // Don't look at styles, so no need to flush.
65 int pos
= (int) sc
.currentPos
;
66 int currentLine
= styler
.GetLine(pos
);
67 int lineStartPos
= styler
.LineStart(currentLine
);
69 while (--pos
> lineStartPos
) {
70 ch
= styler
.SafeGetCharAt(pos
);
71 if (ch
!= ' ' && ch
!= '\t') {
75 const char *retBack
= "nruter";
76 const char *s
= retBack
;
78 && pos
>= lineStartPos
79 && styler
.SafeGetCharAt(pos
) == *s
) {
86 static std::string
GetRestOfLine(LexAccessor
&styler
, int start
, bool allowSpace
) {
87 std::string restOfLine
;
89 char ch
= styler
.SafeGetCharAt(start
, '\n');
90 while ((ch
!= '\r') && (ch
!= '\n')) {
91 if (allowSpace
|| (ch
!= ' '))
94 ch
= styler
.SafeGetCharAt(start
+ i
, '\n');
99 static bool IsStreamCommentStyle(int style
) {
100 return style
== SCE_C_COMMENT
||
101 style
== SCE_C_COMMENTDOC
||
102 style
== SCE_C_COMMENTDOCKEYWORD
||
103 style
== SCE_C_COMMENTDOCKEYWORDERROR
;
106 static std::vector
<std::string
> Tokenize(const std::string
&s
) {
107 // Break into space separated tokens
109 std::vector
<std::string
> tokens
;
110 for (const char *cp
= s
.c_str(); *cp
; cp
++) {
111 if ((*cp
== ' ') || (*cp
== '\t')) {
113 tokens
.push_back(word
);
121 tokens
.push_back(word
);
126 struct PPDefinition
{
130 PPDefinition(int line_
, const std::string
&key_
, const std::string
&value_
) :
131 line(line_
), key(key_
), value(value_
) {
139 bool ValidLevel() const {
140 return level
>= 0 && level
< 32;
142 int maskLevel() const {
146 LinePPState() : state(0), ifTaken(0), level(-1) {
148 bool IsInactive() const {
151 bool CurrentIfTaken() {
152 return (ifTaken
& maskLevel()) != 0;
154 void StartSection(bool on
) {
158 state
&= ~maskLevel();
159 ifTaken
|= maskLevel();
161 state
|= maskLevel();
162 ifTaken
&= ~maskLevel();
168 state
&= ~maskLevel();
169 ifTaken
&= ~maskLevel();
173 void InvertCurrentLevel() {
175 state
^= maskLevel();
176 ifTaken
|= maskLevel();
181 // Hold the preprocessor state for each line seen.
182 // Currently one entry per line but could become sparse with just one entry per preprocessor line.
184 std::vector
<LinePPState
> vlls
;
186 LinePPState
ForLine(int line
) {
187 if ((line
> 0) && (vlls
.size() > static_cast<size_t>(line
))) {
190 return LinePPState();
193 void Add(int line
, LinePPState lls
) {
199 // An individual named option for use in an OptionSet
201 // Options used for LexerCPP
203 bool stylingWithinPreprocessor
;
204 bool identifiersAllowDollars
;
205 bool trackPreprocessor
;
206 bool updatePreprocessor
;
207 bool triplequotedStrings
;
208 bool hashquotedStrings
;
210 bool foldSyntaxBased
;
212 bool foldCommentMultiline
;
213 bool foldCommentExplicit
;
214 std::string foldExplicitStart
;
215 std::string foldExplicitEnd
;
216 bool foldExplicitAnywhere
;
217 bool foldPreprocessor
;
221 stylingWithinPreprocessor
= false;
222 identifiersAllowDollars
= true;
223 trackPreprocessor
= true;
224 updatePreprocessor
= true;
225 triplequotedStrings
= false;
226 hashquotedStrings
= false;
228 foldSyntaxBased
= true;
230 foldCommentMultiline
= true;
231 foldCommentExplicit
= true;
232 foldExplicitStart
= "";
233 foldExplicitEnd
= "";
234 foldExplicitAnywhere
= false;
235 foldPreprocessor
= false;
241 static const char *const cppWordLists
[] = {
242 "Primary keywords and identifiers",
243 "Secondary keywords and identifiers",
244 "Documentation comment keywords",
245 "Global classes and typedefs",
246 "Preprocessor definitions",
250 struct OptionSetCPP
: public OptionSet
<OptionsCPP
> {
252 DefineProperty("styling.within.preprocessor", &OptionsCPP::stylingWithinPreprocessor
,
253 "For C++ code, determines whether all preprocessor code is styled in the "
254 "preprocessor style (0, the default) or only from the initial # to the end "
255 "of the command word(1).");
257 DefineProperty("lexer.cpp.allow.dollars", &OptionsCPP::identifiersAllowDollars
,
258 "Set to 0 to disallow the '$' character in identifiers with the cpp lexer.");
260 DefineProperty("lexer.cpp.track.preprocessor", &OptionsCPP::trackPreprocessor
,
261 "Set to 1 to interpret #if/#else/#endif to grey out code that is not active.");
263 DefineProperty("lexer.cpp.update.preprocessor", &OptionsCPP::updatePreprocessor
,
264 "Set to 1 to update preprocessor definitions when #define found.");
266 DefineProperty("lexer.cpp.triplequoted.strings", &OptionsCPP::triplequotedStrings
,
267 "Set to 1 to enable highlighting of triple-quoted strings.");
269 DefineProperty("lexer.cpp.hashquoted.strings", &OptionsCPP::hashquotedStrings
,
270 "Set to 1 to enable highlighting of hash-quoted strings.");
272 DefineProperty("fold", &OptionsCPP::fold
);
274 DefineProperty("fold.cpp.syntax.based", &OptionsCPP::foldSyntaxBased
,
275 "Set this property to 0 to disable syntax based folding.");
277 DefineProperty("fold.comment", &OptionsCPP::foldComment
,
278 "This option enables folding multi-line comments and explicit fold points when using the C++ lexer. "
279 "Explicit fold points allows adding extra folding by placing a //{ comment at the start and a //} "
280 "at the end of a section that should fold.");
282 DefineProperty("fold.cpp.comment.multiline", &OptionsCPP::foldCommentMultiline
,
283 "Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
285 DefineProperty("fold.cpp.comment.explicit", &OptionsCPP::foldCommentExplicit
,
286 "Set this property to 0 to disable folding explicit fold points when fold.comment=1.");
288 DefineProperty("fold.cpp.explicit.start", &OptionsCPP::foldExplicitStart
,
289 "The string to use for explicit fold start points, replacing the standard //{.");
291 DefineProperty("fold.cpp.explicit.end", &OptionsCPP::foldExplicitEnd
,
292 "The string to use for explicit fold end points, replacing the standard //}.");
294 DefineProperty("fold.cpp.explicit.anywhere", &OptionsCPP::foldExplicitAnywhere
,
295 "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
297 DefineProperty("fold.preprocessor", &OptionsCPP::foldPreprocessor
,
298 "This option enables folding preprocessor directives when using the C++ lexer. "
299 "Includes C#'s explicit #region and #endregion folding directives.");
301 DefineProperty("fold.compact", &OptionsCPP::foldCompact
);
303 DefineProperty("fold.at.else", &OptionsCPP::foldAtElse
,
304 "This option enables C++ folding on a \"} else {\" line of an if statement.");
306 DefineWordListSets(cppWordLists
);
310 class LexerCPP
: public ILexer
{
312 CharacterSet setWord
;
313 CharacterSet setNegationOp
;
314 CharacterSet setArithmethicOp
;
315 CharacterSet setRelOp
;
316 CharacterSet setLogicalOp
;
318 std::vector
<PPDefinition
> ppDefineHistory
;
323 WordList ppDefinitions
;
324 std::map
<std::string
, std::string
> preprocessorDefinitionsStart
;
327 SparseState
<std::string
> rawStringTerminators
;
328 enum { activeFlag
= 0x40 };
330 LexerCPP(bool caseSensitive_
) :
331 caseSensitive(caseSensitive_
),
332 setWord(CharacterSet::setAlphaNum
, "._", 0x80, true),
333 setNegationOp(CharacterSet::setNone
, "!"),
334 setArithmethicOp(CharacterSet::setNone
, "+-/*%"),
335 setRelOp(CharacterSet::setNone
, "=!<>"),
336 setLogicalOp(CharacterSet::setNone
, "|&") {
338 virtual ~LexerCPP() {
340 void SCI_METHOD
Release() {
343 int SCI_METHOD
Version() const {
346 const char * SCI_METHOD
PropertyNames() {
347 return osCPP
.PropertyNames();
349 int SCI_METHOD
PropertyType(const char *name
) {
350 return osCPP
.PropertyType(name
);
352 const char * SCI_METHOD
DescribeProperty(const char *name
) {
353 return osCPP
.DescribeProperty(name
);
355 int SCI_METHOD
PropertySet(const char *key
, const char *val
);
356 const char * SCI_METHOD
DescribeWordListSets() {
357 return osCPP
.DescribeWordListSets();
359 int SCI_METHOD
WordListSet(int n
, const char *wl
);
360 void SCI_METHOD
Lex(unsigned int startPos
, int length
, int initStyle
, IDocument
*pAccess
);
361 void SCI_METHOD
Fold(unsigned int startPos
, int length
, int initStyle
, IDocument
*pAccess
);
363 void * SCI_METHOD
PrivateCall(int, void *) {
367 static ILexer
*LexerFactoryCPP() {
368 return new LexerCPP(true);
370 static ILexer
*LexerFactoryCPPInsensitive() {
371 return new LexerCPP(false);
373 static int MaskActive(int style
) {
374 return style
& ~activeFlag
;
376 void EvaluateTokens(std::vector
<std::string
> &tokens
);
377 bool EvaluateExpression(const std::string
&expr
, const std::map
<std::string
, std::string
> &preprocessorDefinitions
);
380 int SCI_METHOD
LexerCPP::PropertySet(const char *key
, const char *val
) {
381 if (osCPP
.PropertySet(&options
, key
, val
)) {
382 if (strcmp(key
, "lexer.cpp.allow.dollars") == 0) {
383 setWord
= CharacterSet(CharacterSet::setAlphaNum
, "._", 0x80, true);
384 if (options
.identifiersAllowDollars
) {
393 int SCI_METHOD
LexerCPP::WordListSet(int n
, const char *wl
) {
394 WordList
*wordListN
= 0;
397 wordListN
= &keywords
;
400 wordListN
= &keywords2
;
403 wordListN
= &keywords3
;
406 wordListN
= &keywords4
;
409 wordListN
= &ppDefinitions
;
412 int firstModification
= -1;
416 if (*wordListN
!= wlNew
) {
418 firstModification
= 0;
420 // Rebuild preprocessorDefinitions
421 preprocessorDefinitionsStart
.clear();
422 for (int nDefinition
= 0; nDefinition
< ppDefinitions
.len
; nDefinition
++) {
423 char *cpDefinition
= ppDefinitions
.words
[nDefinition
];
424 char *cpEquals
= strchr(cpDefinition
, '=');
426 std::string
name(cpDefinition
, cpEquals
- cpDefinition
);
427 std::string
val(cpEquals
+1);
428 preprocessorDefinitionsStart
[name
] = val
;
430 std::string
name(cpDefinition
);
431 std::string
val("1");
432 preprocessorDefinitionsStart
[name
] = val
;
438 return firstModification
;
441 // Functor used to truncate history
444 After(int line_
) : line(line_
) {}
445 bool operator()(PPDefinition
&p
) const {
446 return p
.line
> line
;
450 void SCI_METHOD
LexerCPP::Lex(unsigned int startPos
, int length
, int initStyle
, IDocument
*pAccess
) {
451 LexAccessor
styler(pAccess
);
453 CharacterSet
setOKBeforeRE(CharacterSet::setNone
, "([{=,:;!%^&*|?~+-");
454 CharacterSet
setCouldBePostOp(CharacterSet::setNone
, "+-");
456 CharacterSet
setDoxygen(CharacterSet::setAlpha
, "$@\\&<>#{}[]");
458 CharacterSet
setWordStart(CharacterSet::setAlpha
, "_", 0x80, true);
460 if (options
.identifiersAllowDollars
) {
461 setWordStart
.Add('$');
464 int chPrevNonWhite
= ' ';
465 int visibleChars
= 0;
466 bool lastWordWasUUID
= false;
467 int styleBeforeDCKeyword
= SCE_C_DEFAULT
;
468 bool continuationLine
= false;
469 bool isIncludePreprocessor
= false;
471 int lineCurrent
= styler
.GetLine(startPos
);
472 if ((MaskActive(initStyle
) == SCE_C_PREPROCESSOR
) ||
473 (MaskActive(initStyle
) == SCE_C_COMMENTLINE
) ||
474 (MaskActive(initStyle
) == SCE_C_COMMENTLINEDOC
)) {
475 // Set continuationLine if last character of previous line is '\'
476 if (lineCurrent
> 0) {
477 int chBack
= styler
.SafeGetCharAt(startPos
-1, 0);
478 int chBack2
= styler
.SafeGetCharAt(startPos
-2, 0);
479 int lineEndChar
= '!';
480 if (chBack2
== '\r' && chBack
== '\n') {
481 lineEndChar
= styler
.SafeGetCharAt(startPos
-3, 0);
482 } else if (chBack
== '\n' || chBack
== '\r') {
483 lineEndChar
= chBack2
;
485 continuationLine
= lineEndChar
== '\\';
489 // look back to set chPrevNonWhite properly for better regex colouring
492 while (--back
&& IsSpaceEquiv(MaskActive(styler
.StyleAt(back
))))
494 if (MaskActive(styler
.StyleAt(back
)) == SCE_C_OPERATOR
) {
495 chPrevNonWhite
= styler
.SafeGetCharAt(back
);
499 StyleContext
sc(startPos
, length
, initStyle
, styler
, 0x7f);
500 LinePPState preproc
= vlls
.ForLine(lineCurrent
);
502 bool definitionsChanged
= false;
504 // Truncate ppDefineHistory before current line
506 if (!options
.updatePreprocessor
)
507 ppDefineHistory
.clear();
509 std::vector
<PPDefinition
>::iterator itInvalid
= std::find_if(ppDefineHistory
.begin(), ppDefineHistory
.end(), After(lineCurrent
-1));
510 if (itInvalid
!= ppDefineHistory
.end()) {
511 ppDefineHistory
.erase(itInvalid
, ppDefineHistory
.end());
512 definitionsChanged
= true;
515 std::map
<std::string
, std::string
> preprocessorDefinitions
= preprocessorDefinitionsStart
;
516 for (std::vector
<PPDefinition
>::iterator itDef
= ppDefineHistory
.begin(); itDef
!= ppDefineHistory
.end(); ++itDef
) {
517 preprocessorDefinitions
[itDef
->key
] = itDef
->value
;
520 std::string rawStringTerminator
= rawStringTerminators
.ValueAt(lineCurrent
-1);
521 SparseState
<std::string
> rawSTNew(lineCurrent
);
523 int activitySet
= preproc
.IsInactive() ? activeFlag
: 0;
527 if (sc
.atLineStart
) {
528 // Using MaskActive() is not needed in the following statement.
529 // Inside inactive preprocessor declaration, state will be reset anyway at the end of this block.
530 if ((sc
.state
== SCE_C_STRING
) || (sc
.state
== SCE_C_CHARACTER
)) {
531 // Prevent SCE_C_STRINGEOL from leaking back to previous line which
532 // ends with a line continuation by locking in the state upto this position.
533 sc
.SetState(sc
.state
);
535 if ((MaskActive(sc
.state
) == SCE_C_PREPROCESSOR
) && (!continuationLine
)) {
536 sc
.SetState(SCE_C_DEFAULT
|activitySet
);
538 // Reset states to begining of colourise so no surprises
539 // if different sets of lines lexed.
541 lastWordWasUUID
= false;
542 isIncludePreprocessor
= false;
543 if (preproc
.IsInactive()) {
544 activitySet
= activeFlag
;
545 sc
.SetState(sc
.state
| activitySet
);
551 vlls
.Add(lineCurrent
, preproc
);
552 if (rawStringTerminator
!= "") {
553 rawSTNew
.Set(lineCurrent
-1, rawStringTerminator
);
557 // Handle line continuation generically.
559 if (sc
.chNext
== '\n' || sc
.chNext
== '\r') {
561 vlls
.Add(lineCurrent
, preproc
);
563 if (sc
.ch
== '\r' && sc
.chNext
== '\n') {
566 continuationLine
= true;
572 const bool atLineEndBeforeSwitch
= sc
.atLineEnd
;
574 // Determine if the current state should terminate.
575 switch (MaskActive(sc
.state
)) {
577 sc
.SetState(SCE_C_DEFAULT
|activitySet
);
580 // We accept almost anything because of hex. and number suffixes
581 if (!(setWord
.Contains(sc
.ch
) || ((sc
.ch
== '+' || sc
.ch
== '-') && (sc
.chPrev
== 'e' || sc
.chPrev
== 'E')))) {
582 sc
.SetState(SCE_C_DEFAULT
|activitySet
);
585 case SCE_C_IDENTIFIER
:
586 if (!setWord
.Contains(sc
.ch
) || (sc
.ch
== '.')) {
589 sc
.GetCurrent(s
, sizeof(s
));
591 sc
.GetCurrentLowered(s
, sizeof(s
));
593 if (keywords
.InList(s
)) {
594 lastWordWasUUID
= strcmp(s
, "uuid") == 0;
595 sc
.ChangeState(SCE_C_WORD
|activitySet
);
596 } else if (keywords2
.InList(s
)) {
597 sc
.ChangeState(SCE_C_WORD2
|activitySet
);
598 } else if (keywords4
.InList(s
)) {
599 sc
.ChangeState(SCE_C_GLOBALCLASS
|activitySet
);
601 const bool literalString
= sc
.ch
== '\"';
602 if (literalString
|| sc
.ch
== '\'') {
603 size_t lenS
= strlen(s
);
604 const bool raw
= literalString
&& sc
.chPrev
== 'R';
609 ((lenS
== 1) && ((s
[0] == 'L') || (s
[0] == 'u') || (s
[0] == 'U'))) ||
610 ((lenS
== 2) && literalString
&& (s
[0] == 'u') && (s
[1] == '8'));
613 sc
.ChangeState((raw
? SCE_C_STRINGRAW
: SCE_C_STRING
)|activitySet
);
615 sc
.ChangeState(SCE_C_CHARACTER
|activitySet
);
618 sc
.SetState(SCE_C_DEFAULT
|activitySet
);
621 case SCE_C_PREPROCESSOR
:
622 if (options
.stylingWithinPreprocessor
) {
623 if (IsASpace(sc
.ch
)) {
624 sc
.SetState(SCE_C_DEFAULT
|activitySet
);
627 if (sc
.Match('/', '*')) {
628 sc
.SetState(SCE_C_PREPROCESSORCOMMENT
|activitySet
);
629 sc
.Forward(); // Eat the *
630 } else if (sc
.Match('/', '/')) {
631 sc
.SetState(SCE_C_DEFAULT
|activitySet
);
635 case SCE_C_PREPROCESSORCOMMENT
:
636 if (sc
.Match('*', '/')) {
638 sc
.ForwardSetState(SCE_C_PREPROCESSOR
|activitySet
);
639 continue; // Without advancing in case of '\'.
643 if (sc
.Match('*', '/')) {
645 sc
.ForwardSetState(SCE_C_DEFAULT
|activitySet
);
648 case SCE_C_COMMENTDOC
:
649 if (sc
.Match('*', '/')) {
651 sc
.ForwardSetState(SCE_C_DEFAULT
|activitySet
);
652 } else if (sc
.ch
== '@' || sc
.ch
== '\\') { // JavaDoc and Doxygen support
653 // Verify that we have the conditions to mark a comment-doc-keyword
654 if ((IsASpace(sc
.chPrev
) || sc
.chPrev
== '*') && (!IsASpace(sc
.chNext
))) {
655 styleBeforeDCKeyword
= SCE_C_COMMENTDOC
;
656 sc
.SetState(SCE_C_COMMENTDOCKEYWORD
|activitySet
);
660 case SCE_C_COMMENTLINE
:
661 if (sc
.atLineStart
&& !continuationLine
) {
662 sc
.SetState(SCE_C_DEFAULT
|activitySet
);
665 case SCE_C_COMMENTLINEDOC
:
666 if (sc
.atLineStart
&& !continuationLine
) {
667 sc
.SetState(SCE_C_DEFAULT
|activitySet
);
668 } else if (sc
.ch
== '@' || sc
.ch
== '\\') { // JavaDoc and Doxygen support
669 // Verify that we have the conditions to mark a comment-doc-keyword
670 if ((IsASpace(sc
.chPrev
) || sc
.chPrev
== '/' || sc
.chPrev
== '!') && (!IsASpace(sc
.chNext
))) {
671 styleBeforeDCKeyword
= SCE_C_COMMENTLINEDOC
;
672 sc
.SetState(SCE_C_COMMENTDOCKEYWORD
|activitySet
);
676 case SCE_C_COMMENTDOCKEYWORD
:
677 if ((styleBeforeDCKeyword
== SCE_C_COMMENTDOC
) && sc
.Match('*', '/')) {
678 sc
.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR
);
680 sc
.ForwardSetState(SCE_C_DEFAULT
|activitySet
);
681 } else if (!setDoxygen
.Contains(sc
.ch
)) {
684 sc
.GetCurrent(s
, sizeof(s
));
686 sc
.GetCurrentLowered(s
, sizeof(s
));
688 if (!IsASpace(sc
.ch
) || !keywords3
.InList(s
+ 1)) {
689 sc
.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR
|activitySet
);
691 sc
.SetState(styleBeforeDCKeyword
|activitySet
);
696 sc
.ChangeState(SCE_C_STRINGEOL
|activitySet
);
697 } else if (isIncludePreprocessor
) {
699 sc
.ForwardSetState(SCE_C_DEFAULT
|activitySet
);
700 isIncludePreprocessor
= false;
702 } else if (sc
.ch
== '\\') {
703 if (sc
.chNext
== '\"' || sc
.chNext
== '\'' || sc
.chNext
== '\\') {
706 } else if (sc
.ch
== '\"') {
707 sc
.ForwardSetState(SCE_C_DEFAULT
|activitySet
);
710 case SCE_C_HASHQUOTEDSTRING
:
712 if (sc
.chNext
== '\"' || sc
.chNext
== '\'' || sc
.chNext
== '\\') {
715 } else if (sc
.ch
== '\"') {
716 sc
.ForwardSetState(SCE_C_DEFAULT
|activitySet
);
719 case SCE_C_STRINGRAW
:
720 if (sc
.Match(rawStringTerminator
.c_str())) {
721 for (size_t termPos
=rawStringTerminator
.size(); termPos
; termPos
--)
723 sc
.SetState(SCE_C_DEFAULT
|activitySet
);
724 rawStringTerminator
= "";
727 case SCE_C_CHARACTER
:
729 sc
.ChangeState(SCE_C_STRINGEOL
|activitySet
);
730 } else if (sc
.ch
== '\\') {
731 if (sc
.chNext
== '\"' || sc
.chNext
== '\'' || sc
.chNext
== '\\') {
734 } else if (sc
.ch
== '\'') {
735 sc
.ForwardSetState(SCE_C_DEFAULT
|activitySet
);
739 if (sc
.atLineStart
) {
740 sc
.SetState(SCE_C_DEFAULT
|activitySet
);
741 } else if (sc
.ch
== '/') {
743 while ((sc
.ch
< 0x80) && islower(sc
.ch
))
744 sc
.Forward(); // gobble regex flags
745 sc
.SetState(SCE_C_DEFAULT
|activitySet
);
746 } else if (sc
.ch
== '\\') {
747 // Gobble up the quoted character
748 if (sc
.chNext
== '\\' || sc
.chNext
== '/') {
753 case SCE_C_STRINGEOL
:
754 if (sc
.atLineStart
) {
755 sc
.SetState(SCE_C_DEFAULT
|activitySet
);
760 if (sc
.chNext
== '\"') {
763 sc
.ForwardSetState(SCE_C_DEFAULT
|activitySet
);
767 case SCE_C_TRIPLEVERBATIM
:
768 if (sc
.Match("\"\"\"")) {
769 while (sc
.Match('"')) {
772 sc
.SetState(SCE_C_DEFAULT
|activitySet
);
776 if (sc
.ch
== '\r' || sc
.ch
== '\n' || sc
.ch
== ')') {
777 sc
.SetState(SCE_C_DEFAULT
|activitySet
);
781 if (sc
.atLineEnd
&& !atLineEndBeforeSwitch
) {
782 // State exit processing consumed characters up to end of line.
784 vlls
.Add(lineCurrent
, preproc
);
787 // Determine if a new state should be entered.
788 if (MaskActive(sc
.state
) == SCE_C_DEFAULT
) {
789 if (sc
.Match('@', '\"')) {
790 sc
.SetState(SCE_C_VERBATIM
|activitySet
);
792 } else if (options
.triplequotedStrings
&& sc
.Match("\"\"\"")) {
793 sc
.SetState(SCE_C_TRIPLEVERBATIM
|activitySet
);
795 } else if (options
.hashquotedStrings
&& sc
.Match('#', '\"')) {
796 sc
.SetState(SCE_C_HASHQUOTEDSTRING
|activitySet
);
798 } else if (IsADigit(sc
.ch
) || (sc
.ch
== '.' && IsADigit(sc
.chNext
))) {
799 if (lastWordWasUUID
) {
800 sc
.SetState(SCE_C_UUID
|activitySet
);
801 lastWordWasUUID
= false;
803 sc
.SetState(SCE_C_NUMBER
|activitySet
);
805 } else if (setWordStart
.Contains(sc
.ch
) || (sc
.ch
== '@')) {
806 if (lastWordWasUUID
) {
807 sc
.SetState(SCE_C_UUID
|activitySet
);
808 lastWordWasUUID
= false;
810 sc
.SetState(SCE_C_IDENTIFIER
|activitySet
);
812 } else if (sc
.Match('/', '*')) {
813 if (sc
.Match("/**") || sc
.Match("/*!")) { // Support of Qt/Doxygen doc. style
814 sc
.SetState(SCE_C_COMMENTDOC
|activitySet
);
816 sc
.SetState(SCE_C_COMMENT
|activitySet
);
818 sc
.Forward(); // Eat the * so it isn't used for the end of the comment
819 } else if (sc
.Match('/', '/')) {
820 if ((sc
.Match("///") && !sc
.Match("////")) || sc
.Match("//!"))
821 // Support of Qt/Doxygen doc. style
822 sc
.SetState(SCE_C_COMMENTLINEDOC
|activitySet
);
824 sc
.SetState(SCE_C_COMMENTLINE
|activitySet
);
825 } else if (sc
.ch
== '/'
826 && (setOKBeforeRE
.Contains(chPrevNonWhite
)
827 || followsReturnKeyword(sc
, styler
))
828 && (!setCouldBePostOp
.Contains(chPrevNonWhite
)
829 || !FollowsPostfixOperator(sc
, styler
))) {
830 sc
.SetState(SCE_C_REGEX
|activitySet
); // JavaScript's RegEx
831 } else if (sc
.ch
== '\"') {
832 if (sc
.chPrev
== 'R') {
834 if (MaskActive(styler
.StyleAt(sc
.currentPos
- 1)) == SCE_C_STRINGRAW
) {
835 sc
.SetState(SCE_C_STRINGRAW
|activitySet
);
836 rawStringTerminator
= ")";
837 for (int termPos
= sc
.currentPos
+ 1;; termPos
++) {
838 char chTerminator
= styler
.SafeGetCharAt(termPos
, '(');
839 if (chTerminator
== '(')
841 rawStringTerminator
+= chTerminator
;
843 rawStringTerminator
+= '\"';
845 sc
.SetState(SCE_C_STRING
|activitySet
);
848 sc
.SetState(SCE_C_STRING
|activitySet
);
850 isIncludePreprocessor
= false; // ensure that '>' won't end the string
851 } else if (isIncludePreprocessor
&& sc
.ch
== '<') {
852 sc
.SetState(SCE_C_STRING
|activitySet
);
853 } else if (sc
.ch
== '\'') {
854 sc
.SetState(SCE_C_CHARACTER
|activitySet
);
855 } else if (sc
.ch
== '#' && visibleChars
== 0) {
856 // Preprocessor commands are alone on their line
857 sc
.SetState(SCE_C_PREPROCESSOR
|activitySet
);
858 // Skip whitespace between # and preprocessor word
861 } while ((sc
.ch
== ' ' || sc
.ch
== '\t') && sc
.More());
863 sc
.SetState(SCE_C_DEFAULT
|activitySet
);
864 } else if (sc
.Match("include")) {
865 isIncludePreprocessor
= true;
867 if (options
.trackPreprocessor
) {
868 if (sc
.Match("ifdef") || sc
.Match("ifndef")) {
869 bool isIfDef
= sc
.Match("ifdef");
870 int i
= isIfDef
? 5 : 6;
871 std::string restOfLine
= GetRestOfLine(styler
, sc
.currentPos
+ i
+ 1, false);
872 bool foundDef
= preprocessorDefinitions
.find(restOfLine
) != preprocessorDefinitions
.end();
873 preproc
.StartSection(isIfDef
== foundDef
);
874 } else if (sc
.Match("if")) {
875 std::string restOfLine
= GetRestOfLine(styler
, sc
.currentPos
+ 2, true);
876 bool ifGood
= EvaluateExpression(restOfLine
, preprocessorDefinitions
);
877 preproc
.StartSection(ifGood
);
878 } else if (sc
.Match("else")) {
879 if (!preproc
.CurrentIfTaken()) {
880 preproc
.InvertCurrentLevel();
881 activitySet
= preproc
.IsInactive() ? activeFlag
: 0;
883 sc
.ChangeState(SCE_C_PREPROCESSOR
|activitySet
);
884 } else if (!preproc
.IsInactive()) {
885 preproc
.InvertCurrentLevel();
886 activitySet
= preproc
.IsInactive() ? activeFlag
: 0;
888 sc
.ChangeState(SCE_C_PREPROCESSOR
|activitySet
);
890 } else if (sc
.Match("elif")) {
891 // Ensure only one chosen out of #if .. #elif .. #elif .. #else .. #endif
892 if (!preproc
.CurrentIfTaken()) {
894 std::string restOfLine
= GetRestOfLine(styler
, sc
.currentPos
+ 2, true);
895 bool ifGood
= EvaluateExpression(restOfLine
, preprocessorDefinitions
);
897 preproc
.InvertCurrentLevel();
898 activitySet
= preproc
.IsInactive() ? activeFlag
: 0;
900 sc
.ChangeState(SCE_C_PREPROCESSOR
|activitySet
);
902 } else if (!preproc
.IsInactive()) {
903 preproc
.InvertCurrentLevel();
904 activitySet
= preproc
.IsInactive() ? activeFlag
: 0;
906 sc
.ChangeState(SCE_C_PREPROCESSOR
|activitySet
);
908 } else if (sc
.Match("endif")) {
909 preproc
.EndSection();
910 activitySet
= preproc
.IsInactive() ? activeFlag
: 0;
911 sc
.ChangeState(SCE_C_PREPROCESSOR
|activitySet
);
912 } else if (sc
.Match("define")) {
913 if (options
.updatePreprocessor
&& !preproc
.IsInactive()) {
914 std::string restOfLine
= GetRestOfLine(styler
, sc
.currentPos
+ 6, true);
915 if (restOfLine
.find(")") == std::string::npos
) { // Don't handle macros with arguments
916 std::vector
<std::string
> tokens
= Tokenize(restOfLine
);
918 std::string
value("1");
919 if (tokens
.size() >= 1) {
921 if (tokens
.size() >= 2) {
924 preprocessorDefinitions
[key
] = value
;
925 ppDefineHistory
.push_back(PPDefinition(lineCurrent
, key
, value
));
926 definitionsChanged
= true;
933 } else if (isoperator(static_cast<char>(sc
.ch
))) {
934 sc
.SetState(SCE_C_OPERATOR
|activitySet
);
938 if (!IsASpace(sc
.ch
) && !IsSpaceEquiv(MaskActive(sc
.state
))) {
939 chPrevNonWhite
= sc
.ch
;
942 continuationLine
= false;
945 const bool rawStringsChanged
= rawStringTerminators
.Merge(rawSTNew
, lineCurrent
);
946 if (definitionsChanged
|| rawStringsChanged
)
947 styler
.ChangeLexerState(startPos
, startPos
+ length
);
951 // Store both the current line's fold level and the next lines in the
952 // level store to make it easy to pick up with each increment
953 // and to make it possible to fiddle the current level for "} else {".
955 void SCI_METHOD
LexerCPP::Fold(unsigned int startPos
, int length
, int initStyle
, IDocument
*pAccess
) {
960 LexAccessor
styler(pAccess
);
962 unsigned int endPos
= startPos
+ length
;
963 int visibleChars
= 0;
964 int lineCurrent
= styler
.GetLine(startPos
);
965 int levelCurrent
= SC_FOLDLEVELBASE
;
967 levelCurrent
= styler
.LevelAt(lineCurrent
-1) >> 16;
968 int levelMinCurrent
= levelCurrent
;
969 int levelNext
= levelCurrent
;
970 char chNext
= styler
[startPos
];
971 int styleNext
= MaskActive(styler
.StyleAt(startPos
));
972 int style
= MaskActive(initStyle
);
973 const bool userDefinedFoldMarkers
= !options
.foldExplicitStart
.empty() && !options
.foldExplicitEnd
.empty();
974 for (unsigned int i
= startPos
; i
< endPos
; i
++) {
976 chNext
= styler
.SafeGetCharAt(i
+ 1);
977 int stylePrev
= style
;
979 styleNext
= MaskActive(styler
.StyleAt(i
+ 1));
980 bool atEOL
= (ch
== '\r' && chNext
!= '\n') || (ch
== '\n');
981 if (options
.foldComment
&& options
.foldCommentMultiline
&& IsStreamCommentStyle(style
)) {
982 if (!IsStreamCommentStyle(stylePrev
) && (stylePrev
!= SCE_C_COMMENTLINEDOC
)) {
984 } else if (!IsStreamCommentStyle(styleNext
) && (styleNext
!= SCE_C_COMMENTLINEDOC
) && !atEOL
) {
985 // Comments don't end at end of line and the next character may be unstyled.
989 if (options
.foldComment
&& options
.foldCommentExplicit
&& ((style
== SCE_C_COMMENTLINE
) || options
.foldExplicitAnywhere
)) {
990 if (userDefinedFoldMarkers
) {
991 if (styler
.Match(i
, options
.foldExplicitStart
.c_str())) {
993 } else if (styler
.Match(i
, options
.foldExplicitEnd
.c_str())) {
997 if ((ch
== '/') && (chNext
== '/')) {
998 char chNext2
= styler
.SafeGetCharAt(i
+ 2);
999 if (chNext2
== '{') {
1001 } else if (chNext2
== '}') {
1007 if (options
.foldPreprocessor
&& (style
== SCE_C_PREPROCESSOR
)) {
1009 unsigned int j
= i
+ 1;
1010 while ((j
< endPos
) && IsASpaceOrTab(styler
.SafeGetCharAt(j
))) {
1013 if (styler
.Match(j
, "region") || styler
.Match(j
, "if")) {
1015 } else if (styler
.Match(j
, "end")) {
1020 if (options
.foldSyntaxBased
&& (style
== SCE_C_OPERATOR
)) {
1022 // Measure the minimum before a '{' to allow
1023 // folding on "} else {"
1024 if (levelMinCurrent
> levelNext
) {
1025 levelMinCurrent
= levelNext
;
1028 } else if (ch
== '}') {
1034 if (atEOL
|| (i
== endPos
-1)) {
1035 int levelUse
= levelCurrent
;
1036 if (options
.foldSyntaxBased
&& options
.foldAtElse
) {
1037 levelUse
= levelMinCurrent
;
1039 int lev
= levelUse
| levelNext
<< 16;
1040 if (visibleChars
== 0 && options
.foldCompact
)
1041 lev
|= SC_FOLDLEVELWHITEFLAG
;
1042 if (levelUse
< levelNext
)
1043 lev
|= SC_FOLDLEVELHEADERFLAG
;
1044 if (lev
!= styler
.LevelAt(lineCurrent
)) {
1045 styler
.SetLevel(lineCurrent
, lev
);
1048 levelCurrent
= levelNext
;
1049 levelMinCurrent
= levelCurrent
;
1050 if (atEOL
&& (i
== static_cast<unsigned int>(styler
.Length()-1))) {
1051 // There is an empty line at end of file so give it same level and empty
1052 styler
.SetLevel(lineCurrent
, (levelCurrent
| levelCurrent
<< 16) | SC_FOLDLEVELWHITEFLAG
);
1059 void LexerCPP::EvaluateTokens(std::vector
<std::string
> &tokens
) {
1061 // Evaluate defined() statements to either 0 or 1
1062 for (size_t i
=0; (i
+2)<tokens
.size();) {
1063 if ((tokens
[i
] == "defined") && (tokens
[i
+1] == "(")) {
1064 const char *val
= "0";
1065 if (tokens
[i
+2] == ")") {
1067 tokens
.erase(tokens
.begin() + i
+ 1, tokens
.begin() + i
+ 3);
1068 } else if (((i
+3)<tokens
.size()) && (tokens
[i
+3] == ")")) {
1070 tokens
.erase(tokens
.begin() + i
+ 1, tokens
.begin() + i
+ 4);
1079 // Find bracketed subexpressions and recurse on them
1080 std::vector
<std::string
>::iterator itBracket
= std::find(tokens
.begin(), tokens
.end(), "(");
1081 std::vector
<std::string
>::iterator itEndBracket
= std::find(tokens
.begin(), tokens
.end(), ")");
1082 while ((itBracket
!= tokens
.end()) && (itEndBracket
!= tokens
.end()) && (itEndBracket
> itBracket
)) {
1083 std::vector
<std::string
> inBracket(itBracket
+ 1, itEndBracket
);
1084 EvaluateTokens(inBracket
);
1086 // The insertion is done before the removal because there were failures with the opposite approach
1087 tokens
.insert(itBracket
, inBracket
.begin(), inBracket
.end());
1088 itBracket
= std::find(tokens
.begin(), tokens
.end(), "(");
1089 itEndBracket
= std::find(tokens
.begin(), tokens
.end(), ")");
1090 tokens
.erase(itBracket
, itEndBracket
+ 1);
1092 itBracket
= std::find(tokens
.begin(), tokens
.end(), "(");
1093 itEndBracket
= std::find(tokens
.begin(), tokens
.end(), ")");
1096 // Evaluate logical negations
1097 for (size_t j
=0; (j
+1)<tokens
.size();) {
1098 if (setNegationOp
.Contains(tokens
[j
][0])) {
1099 int isTrue
= atoi(tokens
[j
+1].c_str());
1100 if (tokens
[j
] == "!")
1102 std::vector
<std::string
>::iterator itInsert
=
1103 tokens
.erase(tokens
.begin() + j
, tokens
.begin() + j
+ 2);
1104 tokens
.insert(itInsert
, isTrue
? "1" : "0");
1110 // Evaluate expressions in precedence order
1111 enum precedence
{ precArithmetic
, precRelative
, precLogical
};
1112 for (int prec
=precArithmetic
; prec
<= precLogical
; prec
++) {
1113 // Looking at 3 tokens at a time so end at 2 before end
1114 for (size_t k
=0; (k
+2)<tokens
.size();) {
1115 char chOp
= tokens
[k
+1][0];
1117 ((prec
==precArithmetic
) && setArithmethicOp
.Contains(chOp
)) ||
1118 ((prec
==precRelative
) && setRelOp
.Contains(chOp
)) ||
1119 ((prec
==precLogical
) && setLogicalOp
.Contains(chOp
))
1121 int valA
= atoi(tokens
[k
].c_str());
1122 int valB
= atoi(tokens
[k
+2].c_str());
1124 if (tokens
[k
+1] == "+")
1125 result
= valA
+ valB
;
1126 else if (tokens
[k
+1] == "-")
1127 result
= valA
- valB
;
1128 else if (tokens
[k
+1] == "*")
1129 result
= valA
* valB
;
1130 else if (tokens
[k
+1] == "/")
1131 result
= valA
/ (valB
? valB
: 1);
1132 else if (tokens
[k
+1] == "%")
1133 result
= valA
% (valB
? valB
: 1);
1134 else if (tokens
[k
+1] == "<")
1135 result
= valA
< valB
;
1136 else if (tokens
[k
+1] == "<=")
1137 result
= valA
<= valB
;
1138 else if (tokens
[k
+1] == ">")
1139 result
= valA
> valB
;
1140 else if (tokens
[k
+1] == ">=")
1141 result
= valA
>= valB
;
1142 else if (tokens
[k
+1] == "==")
1143 result
= valA
== valB
;
1144 else if (tokens
[k
+1] == "!=")
1145 result
= valA
!= valB
;
1146 else if (tokens
[k
+1] == "||")
1147 result
= valA
|| valB
;
1148 else if (tokens
[k
+1] == "&&")
1149 result
= valA
&& valB
;
1151 sprintf(sResult
, "%d", result
);
1152 std::vector
<std::string
>::iterator itInsert
=
1153 tokens
.erase(tokens
.begin() + k
, tokens
.begin() + k
+ 3);
1154 tokens
.insert(itInsert
, sResult
);
1162 bool LexerCPP::EvaluateExpression(const std::string
&expr
, const std::map
<std::string
, std::string
> &preprocessorDefinitions
) {
1163 // Break into tokens, replacing with definitions
1165 std::vector
<std::string
> tokens
;
1166 const char *cp
= expr
.c_str();
1168 if (setWord
.Contains(*cp
)) {
1171 std::map
<std::string
, std::string
>::const_iterator it
= preprocessorDefinitions
.find(word
);
1172 if (it
!= preprocessorDefinitions
.end()) {
1173 tokens
.push_back(it
->second
);
1174 } else if (!word
.empty() && ((word
[0] >= '0' && word
[0] <= '9') || (word
== "defined"))) {
1175 tokens
.push_back(word
);
1181 if ((*cp
!= ' ') && (*cp
!= '\t')) {
1182 std::string
op(cp
, 1);
1183 if (setRelOp
.Contains(*cp
)) {
1184 if (setRelOp
.Contains(cp
[1])) {
1188 } else if (setLogicalOp
.Contains(*cp
)) {
1189 if (setLogicalOp
.Contains(cp
[1])) {
1194 tokens
.push_back(op
);
1200 EvaluateTokens(tokens
);
1202 // "0" or "" -> false else true
1203 bool isFalse
= tokens
.empty() ||
1204 ((tokens
.size() == 1) && ((tokens
[0] == "") || tokens
[0] == "0"));
1208 LexerModule
lmCPP(SCLEX_CPP
, LexerCPP::LexerFactoryCPP
, "cpp", cppWordLists
);
1209 LexerModule
lmCPPNoCase(SCLEX_CPPNOCASE
, LexerCPP::LexerFactoryCPPInsensitive
, "cppnocase", cppWordLists
);