1 // Scintilla source code edit control 
   3  ** Lexer for Assembler, just for the MASM syntax 
   4  ** Written by The Black Horus 
   5  ** Enhancements and NASM stuff by Kein-Hong Man, 2003-10 
   6  ** SCE_ASM_COMMENTBLOCK and SCE_ASM_CHARACTER are for future GNU as colouring 
   7  ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net> 
   9 // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org> 
  10 // The License.txt file describes the conditions under which this software may be distributed. 
  24 #include "Scintilla.h" 
  28 #include "LexAccessor.h" 
  29 #include "StyleContext.h" 
  30 #include "CharacterSet.h" 
  31 #include "LexerModule.h" 
  32 #include "OptionSet.h" 
  35 using namespace Scintilla
; 
  38 static inline bool IsAWordChar(const int ch
) { 
  39         return (ch 
< 0x80) && (isalnum(ch
) || ch 
== '.' || 
  40                 ch 
== '_' || ch 
== '?'); 
  43 static inline bool IsAWordStart(const int ch
) { 
  44         return (ch 
< 0x80) && (isalnum(ch
) || ch 
== '_' || ch 
== '.' || 
  45                 ch 
== '%' || ch 
== '@' || ch 
== '$' || ch 
== '?'); 
  48 static inline bool IsAsmOperator(const int ch
) { 
  49         if ((ch 
< 0x80) && (isalnum(ch
))) 
  51         // '.' left out as it is used to make up numbers 
  52         if (ch 
== '*' || ch 
== '/' || ch 
== '-' || ch 
== '+' || 
  53                 ch 
== '(' || ch 
== ')' || ch 
== '=' || ch 
== '^' || 
  54                 ch 
== '[' || ch 
== ']' || ch 
== '<' || ch 
== '&' || 
  55                 ch 
== '>' || ch 
== ',' || ch 
== '|' || ch 
== '~' || 
  56                 ch 
== '%' || ch 
== ':') 
  61 static bool IsStreamCommentStyle(int style
) { 
  62         return style 
== SCE_ASM_COMMENTDIRECTIVE 
|| style 
== SCE_ASM_COMMENTBLOCK
; 
  65 static inline int LowerCase(int c
) { 
  66         if (c 
>= 'A' && c 
<= 'Z') 
  71 // An individual named option for use in an OptionSet 
  73 // Options used for LexerAsm 
  75         std::string delimiter
; 
  78         bool foldCommentMultiline
; 
  79         bool foldCommentExplicit
; 
  80         std::string foldExplicitStart
; 
  81         std::string foldExplicitEnd
; 
  82         bool foldExplicitAnywhere
; 
  87                 foldSyntaxBased 
= true; 
  88                 foldCommentMultiline 
= false; 
  89                 foldCommentExplicit 
= false; 
  90                 foldExplicitStart 
= ""; 
  92                 foldExplicitAnywhere 
= false; 
  97 static const char * const asmWordListDesc
[] = { 
 102         "Directive operands", 
 103         "Extended instructions", 
 104         "Directives4Foldstart", 
 105         "Directives4Foldend", 
 109 struct OptionSetAsm 
: public OptionSet
<OptionsAsm
> { 
 111                 DefineProperty("lexer.asm.comment.delimiter", &OptionsAsm::delimiter
, 
 112                         "Character used for COMMENT directive's delimiter, replacing the standard \"~\"."); 
 114                 DefineProperty("fold", &OptionsAsm::fold
); 
 116                 DefineProperty("fold.asm.syntax.based", &OptionsAsm::foldSyntaxBased
, 
 117                         "Set this property to 0 to disable syntax based folding."); 
 119                 DefineProperty("fold.asm.comment.multiline", &OptionsAsm::foldCommentMultiline
, 
 120                         "Set this property to 1 to enable folding multi-line comments."); 
 122                 DefineProperty("fold.asm.comment.explicit", &OptionsAsm::foldCommentExplicit
, 
 123                         "This option enables folding explicit fold points when using the Asm lexer. " 
 124                         "Explicit fold points allows adding extra folding by placing a ;{ comment at the start and a ;} " 
 125                         "at the end of a section that should fold."); 
 127                 DefineProperty("fold.asm.explicit.start", &OptionsAsm::foldExplicitStart
, 
 128                         "The string to use for explicit fold start points, replacing the standard ;{."); 
 130                 DefineProperty("fold.asm.explicit.end", &OptionsAsm::foldExplicitEnd
, 
 131                         "The string to use for explicit fold end points, replacing the standard ;}."); 
 133                 DefineProperty("fold.asm.explicit.anywhere", &OptionsAsm::foldExplicitAnywhere
, 
 134                         "Set this property to 1 to enable explicit fold points anywhere, not just in line comments."); 
 136                 DefineProperty("fold.compact", &OptionsAsm::foldCompact
); 
 138                 DefineWordListSets(asmWordListDesc
); 
 142 class LexerAsm 
: public ILexer 
{ 
 143         WordList cpuInstruction
; 
 144         WordList mathInstruction
; 
 147         WordList directiveOperand
; 
 148         WordList extInstruction
; 
 149         WordList directives4foldstart
; 
 150         WordList directives4foldend
; 
 156         virtual ~LexerAsm() { 
 158         void SCI_METHOD 
Release() { 
 161         int SCI_METHOD 
Version() const { 
 164         const char * SCI_METHOD 
PropertyNames() { 
 165                 return osAsm
.PropertyNames(); 
 167         int SCI_METHOD 
PropertyType(const char *name
) { 
 168                 return osAsm
.PropertyType(name
); 
 170         const char * SCI_METHOD 
DescribeProperty(const char *name
) { 
 171                 return osAsm
.DescribeProperty(name
); 
 173         int SCI_METHOD 
PropertySet(const char *key
, const char *val
); 
 174         const char * SCI_METHOD 
DescribeWordListSets() { 
 175                 return osAsm
.DescribeWordListSets(); 
 177         int SCI_METHOD 
WordListSet(int n
, const char *wl
); 
 178         void SCI_METHOD 
Lex(unsigned int startPos
, int length
, int initStyle
, IDocument 
*pAccess
); 
 179         void SCI_METHOD 
Fold(unsigned int startPos
, int length
, int initStyle
, IDocument 
*pAccess
); 
 181         void * SCI_METHOD 
PrivateCall(int, void *) { 
 185         static ILexer 
*LexerFactoryAsm() { 
 186                 return new LexerAsm(); 
 190 int SCI_METHOD 
LexerAsm::PropertySet(const char *key
, const char *val
) { 
 191         if (osAsm
.PropertySet(&options
, key
, val
)) { 
 197 int SCI_METHOD 
LexerAsm::WordListSet(int n
, const char *wl
) { 
 198         WordList 
*wordListN 
= 0; 
 201                 wordListN 
= &cpuInstruction
; 
 204                 wordListN 
= &mathInstruction
; 
 207                 wordListN 
= ®isters
; 
 210                 wordListN 
= &directive
; 
 213                 wordListN 
= &directiveOperand
; 
 216                 wordListN 
= &extInstruction
; 
 219                 wordListN 
= &directives4foldstart
; 
 222                 wordListN 
= &directives4foldend
; 
 225         int firstModification 
= -1; 
 229                 if (*wordListN 
!= wlNew
) { 
 231                         firstModification 
= 0; 
 234         return firstModification
; 
 237 void SCI_METHOD 
LexerAsm::Lex(unsigned int startPos
, int length
, int initStyle
, IDocument 
*pAccess
) { 
 238         LexAccessor 
styler(pAccess
); 
 240         // Do not leak onto next line 
 241         if (initStyle 
== SCE_ASM_STRINGEOL
) 
 242                 initStyle 
= SCE_ASM_DEFAULT
; 
 244         StyleContext 
sc(startPos
, length
, initStyle
, styler
); 
 246         for (; sc
.More(); sc
.Forward()) 
 249                 // Prevent SCE_ASM_STRINGEOL from leaking back to previous line 
 250                 if (sc
.atLineStart 
&& (sc
.state 
== SCE_ASM_STRING
)) { 
 251                         sc
.SetState(SCE_ASM_STRING
); 
 252                 } else if (sc
.atLineStart 
&& (sc
.state 
== SCE_ASM_CHARACTER
)) { 
 253                         sc
.SetState(SCE_ASM_CHARACTER
); 
 256                 // Handle line continuation generically. 
 258                         if (sc
.chNext 
== '\n' || sc
.chNext 
== '\r') { 
 260                                 if (sc
.ch 
== '\r' && sc
.chNext 
== '\n') { 
 267                 // Determine if the current state should terminate. 
 268                 if (sc
.state 
== SCE_ASM_OPERATOR
) { 
 269                         if (!IsAsmOperator(sc
.ch
)) { 
 270                             sc
.SetState(SCE_ASM_DEFAULT
); 
 272                 } else if (sc
.state 
== SCE_ASM_NUMBER
) { 
 273                         if (!IsAWordChar(sc
.ch
)) { 
 274                                 sc
.SetState(SCE_ASM_DEFAULT
); 
 276                 } else if (sc
.state 
== SCE_ASM_IDENTIFIER
) { 
 277                         if (!IsAWordChar(sc
.ch
) ) { 
 279                                 sc
.GetCurrentLowered(s
, sizeof(s
)); 
 280                                 bool IsDirective 
= false; 
 282                                 if (cpuInstruction
.InList(s
)) { 
 283                                         sc
.ChangeState(SCE_ASM_CPUINSTRUCTION
); 
 284                                 } else if (mathInstruction
.InList(s
)) { 
 285                                         sc
.ChangeState(SCE_ASM_MATHINSTRUCTION
); 
 286                                 } else if (registers
.InList(s
)) { 
 287                                         sc
.ChangeState(SCE_ASM_REGISTER
); 
 288                                 }  else if (directive
.InList(s
)) { 
 289                                         sc
.ChangeState(SCE_ASM_DIRECTIVE
); 
 291                                 } else if (directiveOperand
.InList(s
)) { 
 292                                         sc
.ChangeState(SCE_ASM_DIRECTIVEOPERAND
); 
 293                                 } else if (extInstruction
.InList(s
)) { 
 294                                         sc
.ChangeState(SCE_ASM_EXTINSTRUCTION
); 
 296                                 sc
.SetState(SCE_ASM_DEFAULT
); 
 297                                 if (IsDirective 
&& !strcmp(s
, "comment")) { 
 298                                         char delimiter 
= options
.delimiter
.empty() ? '~' : options
.delimiter
.c_str()[0]; 
 299                                         while (IsASpaceOrTab(sc
.ch
) && !sc
.atLineEnd
) { 
 300                                                 sc
.ForwardSetState(SCE_ASM_DEFAULT
); 
 302                                         if (sc
.ch 
== delimiter
) { 
 303                                                 sc
.SetState(SCE_ASM_COMMENTDIRECTIVE
); 
 307                 } else if (sc
.state 
== SCE_ASM_COMMENTDIRECTIVE
) { 
 308                         char delimiter 
= options
.delimiter
.empty() ? '~' : options
.delimiter
.c_str()[0]; 
 309                         if (sc
.ch 
== delimiter
) { 
 310                                 while (!sc
.atLineEnd
) { 
 313                                 sc
.SetState(SCE_ASM_DEFAULT
); 
 315                 } else if (sc
.state 
== SCE_ASM_COMMENT 
) { 
 317                                 sc
.SetState(SCE_ASM_DEFAULT
); 
 319                 } else if (sc
.state 
== SCE_ASM_STRING
) { 
 321                                 if (sc
.chNext 
== '\"' || sc
.chNext 
== '\'' || sc
.chNext 
== '\\') { 
 324                         } else if (sc
.ch 
== '\"') { 
 325                                 sc
.ForwardSetState(SCE_ASM_DEFAULT
); 
 326                         } else if (sc
.atLineEnd
) { 
 327                                 sc
.ChangeState(SCE_ASM_STRINGEOL
); 
 328                                 sc
.ForwardSetState(SCE_ASM_DEFAULT
); 
 330                 } else if (sc
.state 
== SCE_ASM_CHARACTER
) { 
 332                                 if (sc
.chNext 
== '\"' || sc
.chNext 
== '\'' || sc
.chNext 
== '\\') { 
 335                         } else if (sc
.ch 
== '\'') { 
 336                                 sc
.ForwardSetState(SCE_ASM_DEFAULT
); 
 337                         } else if (sc
.atLineEnd
) { 
 338                                 sc
.ChangeState(SCE_ASM_STRINGEOL
); 
 339                                 sc
.ForwardSetState(SCE_ASM_DEFAULT
); 
 343                 // Determine if a new state should be entered. 
 344                 if (sc
.state 
== SCE_ASM_DEFAULT
) { 
 346                                 sc
.SetState(SCE_ASM_COMMENT
); 
 347                         } else if (isascii(sc
.ch
) && (isdigit(sc
.ch
) || (sc
.ch 
== '.' && isascii(sc
.chNext
) && isdigit(sc
.chNext
)))) { 
 348                                 sc
.SetState(SCE_ASM_NUMBER
); 
 349                         } else if (IsAWordStart(sc
.ch
)) { 
 350                                 sc
.SetState(SCE_ASM_IDENTIFIER
); 
 351                         } else if (sc
.ch 
== '\"') { 
 352                                 sc
.SetState(SCE_ASM_STRING
); 
 353                         } else if (sc
.ch 
== '\'') { 
 354                                 sc
.SetState(SCE_ASM_CHARACTER
); 
 355                         } else if (IsAsmOperator(sc
.ch
)) { 
 356                                 sc
.SetState(SCE_ASM_OPERATOR
); 
 364 // Store both the current line's fold level and the next lines in the 
 365 // level store to make it easy to pick up with each increment 
 366 // and to make it possible to fiddle the current level for "else". 
 368 void SCI_METHOD 
LexerAsm::Fold(unsigned int startPos
, int length
, int initStyle
, IDocument 
*pAccess
) { 
 373         LexAccessor 
styler(pAccess
); 
 375         unsigned int endPos 
= startPos 
+ length
; 
 376         int visibleChars 
= 0; 
 377         int lineCurrent 
= styler
.GetLine(startPos
); 
 378         int levelCurrent 
= SC_FOLDLEVELBASE
; 
 380                 levelCurrent 
= styler
.LevelAt(lineCurrent
-1) >> 16; 
 381         int levelNext 
= levelCurrent
; 
 382         char chNext 
= styler
[startPos
]; 
 383         int styleNext 
= styler
.StyleAt(startPos
); 
 384         int style 
= initStyle
; 
 387         const bool userDefinedFoldMarkers 
= !options
.foldExplicitStart
.empty() && !options
.foldExplicitEnd
.empty(); 
 388         for (unsigned int i 
= startPos
; i 
< endPos
; i
++) { 
 390                 chNext 
= styler
.SafeGetCharAt(i 
+ 1); 
 391                 int stylePrev 
= style
; 
 393                 styleNext 
= styler
.StyleAt(i 
+ 1); 
 394                 bool atEOL 
= (ch 
== '\r' && chNext 
!= '\n') || (ch 
== '\n'); 
 395                 if (options
.foldCommentMultiline 
&& IsStreamCommentStyle(style
)) { 
 396                         if (!IsStreamCommentStyle(stylePrev
)) { 
 398                         } else if (!IsStreamCommentStyle(styleNext
) && !atEOL
) { 
 399                                 // Comments don't end at end of line and the next character may be unstyled. 
 403                 if (options
.foldCommentExplicit 
&& ((style 
== SCE_ASM_COMMENT
) || options
.foldExplicitAnywhere
)) { 
 404                         if (userDefinedFoldMarkers
) { 
 405                                 if (styler
.Match(i
, options
.foldExplicitStart
.c_str())) { 
 407                                 } else if (styler
.Match(i
, options
.foldExplicitEnd
.c_str())) { 
 414                                         } else if (chNext 
== '}') { 
 420                 if (options
.foldSyntaxBased 
&& (style 
== SCE_ASM_DIRECTIVE
)) { 
 421                         word
[wordlen
++] = static_cast<char>(LowerCase(ch
)); 
 422                         if (wordlen 
== 100) {                   // prevent overflow 
 426                         if (styleNext 
!= SCE_ASM_DIRECTIVE
) {   // reading directive ready 
 427                                 word
[wordlen
] = '\0'; 
 429                                 if (directives4foldstart
.InList(word
)) { 
 431                                 } else if (directives4foldend
.InList(word
)){ 
 438                 if (atEOL 
|| (i 
== endPos
-1)) { 
 439                         int levelUse 
= levelCurrent
; 
 440                         int lev 
= levelUse 
| levelNext 
<< 16; 
 441                         if (visibleChars 
== 0 && options
.foldCompact
) 
 442                                 lev 
|= SC_FOLDLEVELWHITEFLAG
; 
 443                         if (levelUse 
< levelNext
) 
 444                                 lev 
|= SC_FOLDLEVELHEADERFLAG
; 
 445                         if (lev 
!= styler
.LevelAt(lineCurrent
)) { 
 446                                 styler
.SetLevel(lineCurrent
, lev
); 
 449                         levelCurrent 
= levelNext
; 
 450                         if (atEOL 
&& (i 
== static_cast<unsigned int>(styler
.Length()-1))) { 
 451                                 // There is an empty line at end of file so give it same level and empty 
 452                                 styler
.SetLevel(lineCurrent
, (levelCurrent 
| levelCurrent 
<< 16) | SC_FOLDLEVELWHITEFLAG
); 
 459 LexerModule 
lmAsm(SCLEX_ASM
, LexerAsm::LexerFactoryAsm
, "asm", asmWordListDesc
);