1 // Scintilla source code edit control
3 ** Lexer for Assembler, just for the MASM syntax
4 ** Written by The Black Horus
5 ** Enhancements and NASM stuff by Kein-Hong Man, 2003-10
6 ** SCE_ASM_COMMENTBLOCK and SCE_ASM_CHARACTER are for future GNU as colouring
7 ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
9 // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
10 // The License.txt file describes the conditions under which this software may be distributed.
24 #include "Scintilla.h"
28 #include "LexAccessor.h"
29 #include "StyleContext.h"
30 #include "CharacterSet.h"
31 #include "LexerModule.h"
32 #include "OptionSet.h"
35 using namespace Scintilla
;
38 static inline bool IsAWordChar(const int ch
) {
39 return (ch
< 0x80) && (isalnum(ch
) || ch
== '.' ||
40 ch
== '_' || ch
== '?');
43 static inline bool IsAWordStart(const int ch
) {
44 return (ch
< 0x80) && (isalnum(ch
) || ch
== '_' || ch
== '.' ||
45 ch
== '%' || ch
== '@' || ch
== '$' || ch
== '?');
48 static inline bool IsAsmOperator(const int ch
) {
49 if ((ch
< 0x80) && (isalnum(ch
)))
51 // '.' left out as it is used to make up numbers
52 if (ch
== '*' || ch
== '/' || ch
== '-' || ch
== '+' ||
53 ch
== '(' || ch
== ')' || ch
== '=' || ch
== '^' ||
54 ch
== '[' || ch
== ']' || ch
== '<' || ch
== '&' ||
55 ch
== '>' || ch
== ',' || ch
== '|' || ch
== '~' ||
56 ch
== '%' || ch
== ':')
61 static bool IsStreamCommentStyle(int style
) {
62 return style
== SCE_ASM_COMMENTDIRECTIVE
|| style
== SCE_ASM_COMMENTBLOCK
;
65 static inline int LowerCase(int c
) {
66 if (c
>= 'A' && c
<= 'Z')
71 // An individual named option for use in an OptionSet
73 // Options used for LexerAsm
75 std::string delimiter
;
78 bool foldCommentMultiline
;
79 bool foldCommentExplicit
;
80 std::string foldExplicitStart
;
81 std::string foldExplicitEnd
;
82 bool foldExplicitAnywhere
;
87 foldSyntaxBased
= true;
88 foldCommentMultiline
= false;
89 foldCommentExplicit
= false;
90 foldExplicitStart
= "";
92 foldExplicitAnywhere
= false;
97 static const char * const asmWordListDesc
[] = {
102 "Directive operands",
103 "Extended instructions",
104 "Directives4Foldstart",
105 "Directives4Foldend",
109 struct OptionSetAsm
: public OptionSet
<OptionsAsm
> {
111 DefineProperty("lexer.asm.comment.delimiter", &OptionsAsm::delimiter
,
112 "Character used for COMMENT directive's delimiter, replacing the standard \"~\".");
114 DefineProperty("fold", &OptionsAsm::fold
);
116 DefineProperty("fold.asm.syntax.based", &OptionsAsm::foldSyntaxBased
,
117 "Set this property to 0 to disable syntax based folding.");
119 DefineProperty("fold.asm.comment.multiline", &OptionsAsm::foldCommentMultiline
,
120 "Set this property to 1 to enable folding multi-line comments.");
122 DefineProperty("fold.asm.comment.explicit", &OptionsAsm::foldCommentExplicit
,
123 "This option enables folding explicit fold points when using the Asm lexer. "
124 "Explicit fold points allows adding extra folding by placing a ;{ comment at the start and a ;} "
125 "at the end of a section that should fold.");
127 DefineProperty("fold.asm.explicit.start", &OptionsAsm::foldExplicitStart
,
128 "The string to use for explicit fold start points, replacing the standard ;{.");
130 DefineProperty("fold.asm.explicit.end", &OptionsAsm::foldExplicitEnd
,
131 "The string to use for explicit fold end points, replacing the standard ;}.");
133 DefineProperty("fold.asm.explicit.anywhere", &OptionsAsm::foldExplicitAnywhere
,
134 "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
136 DefineProperty("fold.compact", &OptionsAsm::foldCompact
);
138 DefineWordListSets(asmWordListDesc
);
142 class LexerAsm
: public ILexer
{
143 WordList cpuInstruction
;
144 WordList mathInstruction
;
147 WordList directiveOperand
;
148 WordList extInstruction
;
149 WordList directives4foldstart
;
150 WordList directives4foldend
;
156 virtual ~LexerAsm() {
158 void SCI_METHOD
Release() {
161 int SCI_METHOD
Version() const {
164 const char * SCI_METHOD
PropertyNames() {
165 return osAsm
.PropertyNames();
167 int SCI_METHOD
PropertyType(const char *name
) {
168 return osAsm
.PropertyType(name
);
170 const char * SCI_METHOD
DescribeProperty(const char *name
) {
171 return osAsm
.DescribeProperty(name
);
173 int SCI_METHOD
PropertySet(const char *key
, const char *val
);
174 const char * SCI_METHOD
DescribeWordListSets() {
175 return osAsm
.DescribeWordListSets();
177 int SCI_METHOD
WordListSet(int n
, const char *wl
);
178 void SCI_METHOD
Lex(unsigned int startPos
, int length
, int initStyle
, IDocument
*pAccess
);
179 void SCI_METHOD
Fold(unsigned int startPos
, int length
, int initStyle
, IDocument
*pAccess
);
181 void * SCI_METHOD
PrivateCall(int, void *) {
185 static ILexer
*LexerFactoryAsm() {
186 return new LexerAsm();
190 int SCI_METHOD
LexerAsm::PropertySet(const char *key
, const char *val
) {
191 if (osAsm
.PropertySet(&options
, key
, val
)) {
197 int SCI_METHOD
LexerAsm::WordListSet(int n
, const char *wl
) {
198 WordList
*wordListN
= 0;
201 wordListN
= &cpuInstruction
;
204 wordListN
= &mathInstruction
;
207 wordListN
= ®isters
;
210 wordListN
= &directive
;
213 wordListN
= &directiveOperand
;
216 wordListN
= &extInstruction
;
219 wordListN
= &directives4foldstart
;
222 wordListN
= &directives4foldend
;
225 int firstModification
= -1;
229 if (*wordListN
!= wlNew
) {
231 firstModification
= 0;
234 return firstModification
;
237 void SCI_METHOD
LexerAsm::Lex(unsigned int startPos
, int length
, int initStyle
, IDocument
*pAccess
) {
238 LexAccessor
styler(pAccess
);
240 // Do not leak onto next line
241 if (initStyle
== SCE_ASM_STRINGEOL
)
242 initStyle
= SCE_ASM_DEFAULT
;
244 StyleContext
sc(startPos
, length
, initStyle
, styler
);
246 for (; sc
.More(); sc
.Forward())
249 // Prevent SCE_ASM_STRINGEOL from leaking back to previous line
250 if (sc
.atLineStart
&& (sc
.state
== SCE_ASM_STRING
)) {
251 sc
.SetState(SCE_ASM_STRING
);
252 } else if (sc
.atLineStart
&& (sc
.state
== SCE_ASM_CHARACTER
)) {
253 sc
.SetState(SCE_ASM_CHARACTER
);
256 // Handle line continuation generically.
258 if (sc
.chNext
== '\n' || sc
.chNext
== '\r') {
260 if (sc
.ch
== '\r' && sc
.chNext
== '\n') {
267 // Determine if the current state should terminate.
268 if (sc
.state
== SCE_ASM_OPERATOR
) {
269 if (!IsAsmOperator(sc
.ch
)) {
270 sc
.SetState(SCE_ASM_DEFAULT
);
272 } else if (sc
.state
== SCE_ASM_NUMBER
) {
273 if (!IsAWordChar(sc
.ch
)) {
274 sc
.SetState(SCE_ASM_DEFAULT
);
276 } else if (sc
.state
== SCE_ASM_IDENTIFIER
) {
277 if (!IsAWordChar(sc
.ch
) ) {
279 sc
.GetCurrentLowered(s
, sizeof(s
));
280 bool IsDirective
= false;
282 if (cpuInstruction
.InList(s
)) {
283 sc
.ChangeState(SCE_ASM_CPUINSTRUCTION
);
284 } else if (mathInstruction
.InList(s
)) {
285 sc
.ChangeState(SCE_ASM_MATHINSTRUCTION
);
286 } else if (registers
.InList(s
)) {
287 sc
.ChangeState(SCE_ASM_REGISTER
);
288 } else if (directive
.InList(s
)) {
289 sc
.ChangeState(SCE_ASM_DIRECTIVE
);
291 } else if (directiveOperand
.InList(s
)) {
292 sc
.ChangeState(SCE_ASM_DIRECTIVEOPERAND
);
293 } else if (extInstruction
.InList(s
)) {
294 sc
.ChangeState(SCE_ASM_EXTINSTRUCTION
);
296 sc
.SetState(SCE_ASM_DEFAULT
);
297 if (IsDirective
&& !strcmp(s
, "comment")) {
298 char delimiter
= options
.delimiter
.empty() ? '~' : options
.delimiter
.c_str()[0];
299 while (IsASpaceOrTab(sc
.ch
) && !sc
.atLineEnd
) {
300 sc
.ForwardSetState(SCE_ASM_DEFAULT
);
302 if (sc
.ch
== delimiter
) {
303 sc
.SetState(SCE_ASM_COMMENTDIRECTIVE
);
307 } else if (sc
.state
== SCE_ASM_COMMENTDIRECTIVE
) {
308 char delimiter
= options
.delimiter
.empty() ? '~' : options
.delimiter
.c_str()[0];
309 if (sc
.ch
== delimiter
) {
310 while (!sc
.atLineEnd
) {
313 sc
.SetState(SCE_ASM_DEFAULT
);
315 } else if (sc
.state
== SCE_ASM_COMMENT
) {
317 sc
.SetState(SCE_ASM_DEFAULT
);
319 } else if (sc
.state
== SCE_ASM_STRING
) {
321 if (sc
.chNext
== '\"' || sc
.chNext
== '\'' || sc
.chNext
== '\\') {
324 } else if (sc
.ch
== '\"') {
325 sc
.ForwardSetState(SCE_ASM_DEFAULT
);
326 } else if (sc
.atLineEnd
) {
327 sc
.ChangeState(SCE_ASM_STRINGEOL
);
328 sc
.ForwardSetState(SCE_ASM_DEFAULT
);
330 } else if (sc
.state
== SCE_ASM_CHARACTER
) {
332 if (sc
.chNext
== '\"' || sc
.chNext
== '\'' || sc
.chNext
== '\\') {
335 } else if (sc
.ch
== '\'') {
336 sc
.ForwardSetState(SCE_ASM_DEFAULT
);
337 } else if (sc
.atLineEnd
) {
338 sc
.ChangeState(SCE_ASM_STRINGEOL
);
339 sc
.ForwardSetState(SCE_ASM_DEFAULT
);
343 // Determine if a new state should be entered.
344 if (sc
.state
== SCE_ASM_DEFAULT
) {
346 sc
.SetState(SCE_ASM_COMMENT
);
347 } else if (isascii(sc
.ch
) && (isdigit(sc
.ch
) || (sc
.ch
== '.' && isascii(sc
.chNext
) && isdigit(sc
.chNext
)))) {
348 sc
.SetState(SCE_ASM_NUMBER
);
349 } else if (IsAWordStart(sc
.ch
)) {
350 sc
.SetState(SCE_ASM_IDENTIFIER
);
351 } else if (sc
.ch
== '\"') {
352 sc
.SetState(SCE_ASM_STRING
);
353 } else if (sc
.ch
== '\'') {
354 sc
.SetState(SCE_ASM_CHARACTER
);
355 } else if (IsAsmOperator(sc
.ch
)) {
356 sc
.SetState(SCE_ASM_OPERATOR
);
364 // Store both the current line's fold level and the next lines in the
365 // level store to make it easy to pick up with each increment
366 // and to make it possible to fiddle the current level for "else".
368 void SCI_METHOD
LexerAsm::Fold(unsigned int startPos
, int length
, int initStyle
, IDocument
*pAccess
) {
373 LexAccessor
styler(pAccess
);
375 unsigned int endPos
= startPos
+ length
;
376 int visibleChars
= 0;
377 int lineCurrent
= styler
.GetLine(startPos
);
378 int levelCurrent
= SC_FOLDLEVELBASE
;
380 levelCurrent
= styler
.LevelAt(lineCurrent
-1) >> 16;
381 int levelNext
= levelCurrent
;
382 char chNext
= styler
[startPos
];
383 int styleNext
= styler
.StyleAt(startPos
);
384 int style
= initStyle
;
387 const bool userDefinedFoldMarkers
= !options
.foldExplicitStart
.empty() && !options
.foldExplicitEnd
.empty();
388 for (unsigned int i
= startPos
; i
< endPos
; i
++) {
390 chNext
= styler
.SafeGetCharAt(i
+ 1);
391 int stylePrev
= style
;
393 styleNext
= styler
.StyleAt(i
+ 1);
394 bool atEOL
= (ch
== '\r' && chNext
!= '\n') || (ch
== '\n');
395 if (options
.foldCommentMultiline
&& IsStreamCommentStyle(style
)) {
396 if (!IsStreamCommentStyle(stylePrev
)) {
398 } else if (!IsStreamCommentStyle(styleNext
) && !atEOL
) {
399 // Comments don't end at end of line and the next character may be unstyled.
403 if (options
.foldCommentExplicit
&& ((style
== SCE_ASM_COMMENT
) || options
.foldExplicitAnywhere
)) {
404 if (userDefinedFoldMarkers
) {
405 if (styler
.Match(i
, options
.foldExplicitStart
.c_str())) {
407 } else if (styler
.Match(i
, options
.foldExplicitEnd
.c_str())) {
414 } else if (chNext
== '}') {
420 if (options
.foldSyntaxBased
&& (style
== SCE_ASM_DIRECTIVE
)) {
421 word
[wordlen
++] = static_cast<char>(LowerCase(ch
));
422 if (wordlen
== 100) { // prevent overflow
426 if (styleNext
!= SCE_ASM_DIRECTIVE
) { // reading directive ready
427 word
[wordlen
] = '\0';
429 if (directives4foldstart
.InList(word
)) {
431 } else if (directives4foldend
.InList(word
)){
438 if (atEOL
|| (i
== endPos
-1)) {
439 int levelUse
= levelCurrent
;
440 int lev
= levelUse
| levelNext
<< 16;
441 if (visibleChars
== 0 && options
.foldCompact
)
442 lev
|= SC_FOLDLEVELWHITEFLAG
;
443 if (levelUse
< levelNext
)
444 lev
|= SC_FOLDLEVELHEADERFLAG
;
445 if (lev
!= styler
.LevelAt(lineCurrent
)) {
446 styler
.SetLevel(lineCurrent
, lev
);
449 levelCurrent
= levelNext
;
450 if (atEOL
&& (i
== static_cast<unsigned int>(styler
.Length()-1))) {
451 // There is an empty line at end of file so give it same level and empty
452 styler
.SetLevel(lineCurrent
, (levelCurrent
| levelCurrent
<< 16) | SC_FOLDLEVELWHITEFLAG
);
459 LexerModule
lmAsm(SCLEX_ASM
, LexerAsm::LexerFactoryAsm
, "asm", asmWordListDesc
);