src/stc/scintilla/lexers/LexBasic.cxx

   1 // Scintilla source code edit control
   2 /** @file LexBasic.cxx
   3  ** Lexer for BlitzBasic and PureBasic.
   4  ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
   5  **/
   6 // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
   7 // The License.txt file describes the conditions under which this software may be distributed.
   8
   9 // This tries to be a unified Lexer/Folder for all the BlitzBasic/BlitzMax/PurBasic basics
  10 // and derivatives. Once they diverge enough, might want to split it into multiple
  11 // lexers for more code clearity.
  12 //
  13 // Mail me (elias <at> users <dot> sf <dot> net) for any bugs.
  14
  15 // Folding only works for simple things like functions or types.
  16
  17 // You may want to have a look at my ctags lexer as well, if you additionally to coloring
  18 // and folding need to extract things like label tags in your editor.
  19
  20 #include <stdlib.h>
  21 #include <string.h>
  22 #include <stdio.h>
  23 #include <stdarg.h>
  24 #include <assert.h>
  25 #include <ctype.h>
  26
  27 #include <string>
  28 #include <map>
  29
  30 #include "ILexer.h"
  31 #include "Scintilla.h"
  32 #include "SciLexer.h"
  33
  34 #include "WordList.h"
  35 #include "LexAccessor.h"
  36 #include "StyleContext.h"
  37 #include "CharacterSet.h"
  38 #include "LexerModule.h"
  39 #include "OptionSet.h"
  40
  41 #ifdef SCI_NAMESPACE
  42 using namespace Scintilla;
  43 #endif
  44
  45 /* Bits:
  46  * 1  - whitespace
  47  * 2  - operator
  48  * 4  - identifier
  49  * 8  - decimal digit
  50  * 16 - hex digit
  51  * 32 - bin digit
  52  */
  53 static int character_classification[128] =
  54 {
  55     0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  0,  0,  1,  0,  0,
  56     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
  57     1,  2,  0,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  10, 2,
  58     60, 60, 28, 28, 28, 28, 28, 28, 28, 28, 2,  2,  2,  2,  2,  2,
  59     2,  20, 20, 20, 20, 20, 20, 4,  4,  4,  4,  4,  4,  4,  4,  4,
  60     4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  4,
  61     2,  20, 20, 20, 20, 20, 20, 4,  4,  4,  4,  4,  4,  4,  4,  4,
  62     4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  0
  63 };
  64
  65 static bool IsSpace(int c) {
  66         return c < 128 && (character_classification[c] & 1);
  67 }
  68
  69 static bool IsOperator(int c) {
  70         return c < 128 && (character_classification[c] & 2);
  71 }
  72
  73 static bool IsIdentifier(int c) {
  74         return c < 128 && (character_classification[c] & 4);
  75 }
  76
  77 static bool IsDigit(int c) {
  78         return c < 128 && (character_classification[c] & 8);
  79 }
  80
  81 static bool IsHexDigit(int c) {
  82         return c < 128 && (character_classification[c] & 16);
  83 }
  84
  85 static bool IsBinDigit(int c) {
  86         return c < 128 && (character_classification[c] & 32);
  87 }
  88
  89 static int LowerCase(int c)
  90 {
  91         if (c >= 'A' && c <= 'Z')
  92                 return 'a' + c - 'A';
  93         return c;
  94 }
  95
  96 static int CheckBlitzFoldPoint(char const *token, int &level) {
  97         if (!strcmp(token, "function") ||
  98                 !strcmp(token, "type")) {
  99                 level |= SC_FOLDLEVELHEADERFLAG;
 100                 return 1;
 101         }
 102         if (!strcmp(token, "end function") ||
 103                 !strcmp(token, "end type")) {
 104                 return -1;
 105         }
 106         return 0;
 107 }
 108
 109 static int CheckPureFoldPoint(char const *token, int &level) {
 110         if (!strcmp(token, "procedure") ||
 111                 !strcmp(token, "enumeration") ||
 112                 !strcmp(token, "interface") ||
 113                 !strcmp(token, "structure")) {
 114                 level |= SC_FOLDLEVELHEADERFLAG;
 115                 return 1;
 116         }
 117         if (!strcmp(token, "endprocedure") ||
 118                 !strcmp(token, "endenumeration") ||
 119                 !strcmp(token, "endinterface") ||
 120                 !strcmp(token, "endstructure")) {
 121                 return -1;
 122         }
 123         return 0;
 124 }
 125
 126 static int CheckFreeFoldPoint(char const *token, int &level) {
 127         if (!strcmp(token, "function") ||
 128                 !strcmp(token, "sub") ||
 129                 !strcmp(token, "type")) {
 130                 level |= SC_FOLDLEVELHEADERFLAG;
 131                 return 1;
 132         }
 133         if (!strcmp(token, "end function") ||
 134                 !strcmp(token, "end sub") ||
 135                 !strcmp(token, "end type")) {
 136                 return -1;
 137         }
 138         return 0;
 139 }
 140
 141 // An individual named option for use in an OptionSet
 142
 143 // Options used for LexerBasic
 144 struct OptionsBasic {
 145         bool fold;
 146         bool foldSyntaxBased;
 147         bool foldCommentExplicit;
 148         std::string foldExplicitStart;
 149         std::string foldExplicitEnd;
 150         bool foldExplicitAnywhere;
 151         bool foldCompact;
 152         OptionsBasic() {
 153                 fold = false;
 154                 foldSyntaxBased = true;
 155                 foldCommentExplicit = false;
 156                 foldExplicitStart = "";
 157                 foldExplicitEnd   = "";
 158                 foldExplicitAnywhere = false;
 159                 foldCompact = true;
 160         }
 161 };
 162
 163 static const char * const blitzbasicWordListDesc[] = {
 164         "BlitzBasic Keywords",
 165         "user1",
 166         "user2",
 167         "user3",
 168         0
 169 };
 170
 171 static const char * const purebasicWordListDesc[] = {
 172         "PureBasic Keywords",
 173         "PureBasic PreProcessor Keywords",
 174         "user defined 1",
 175         "user defined 2",
 176         0
 177 };
 178
 179 static const char * const freebasicWordListDesc[] = {
 180         "FreeBasic Keywords",
 181         "FreeBasic PreProcessor Keywords",
 182         "user defined 1",
 183         "user defined 2",
 184         0
 185 };
 186
 187 struct OptionSetBasic : public OptionSet<OptionsBasic> {
 188         OptionSetBasic(const char * const wordListDescriptions[]) {
 189                 DefineProperty("fold", &OptionsBasic::fold);
 190
 191                 DefineProperty("fold.basic.syntax.based", &OptionsBasic::foldSyntaxBased,
 192                         "Set this property to 0 to disable syntax based folding.");
 193
 194                 DefineProperty("fold.basic.comment.explicit", &OptionsBasic::foldCommentExplicit,
 195                         "This option enables folding explicit fold points when using the Basic lexer. "
 196                         "Explicit fold points allows adding extra folding by placing a ;{ (BB/PB) or '{ (FB) comment at the start "
 197                         "and a ;} (BB/PB) or '} (FB) at the end of a section that should be folded.");
 198
 199                 DefineProperty("fold.basic.explicit.start", &OptionsBasic::foldExplicitStart,
 200                         "The string to use for explicit fold start points, replacing the standard ;{ (BB/PB) or '{ (FB).");
 201
 202                 DefineProperty("fold.basic.explicit.end", &OptionsBasic::foldExplicitEnd,
 203                         "The string to use for explicit fold end points, replacing the standard ;} (BB/PB) or '} (FB).");
 204
 205                 DefineProperty("fold.basic.explicit.anywhere", &OptionsBasic::foldExplicitAnywhere,
 206                         "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
 207
 208                 DefineProperty("fold.compact", &OptionsBasic::foldCompact);
 209
 210                 DefineWordListSets(wordListDescriptions);
 211         }
 212 };
 213
 214 class LexerBasic : public ILexer {
 215         char comment_char;
 216         int (*CheckFoldPoint)(char const *, int &);
 217         WordList keywordlists[4];
 218         OptionsBasic options;
 219         OptionSetBasic osBasic;
 220 public:
 221         LexerBasic(char comment_char_, int (*CheckFoldPoint_)(char const *, int &), const char * const wordListDescriptions[]) :
 222                    comment_char(comment_char_),
 223                    CheckFoldPoint(CheckFoldPoint_),
 224                    osBasic(wordListDescriptions) {
 225         }
 226         virtual ~LexerBasic() {
 227         }
 228         void SCI_METHOD Release() {
 229                 delete this;
 230         }
 231         int SCI_METHOD Version() const {
 232                 return lvOriginal;
 233         }
 234         const char * SCI_METHOD PropertyNames() {
 235                 return osBasic.PropertyNames();
 236         }
 237         int SCI_METHOD PropertyType(const char *name) {
 238                 return osBasic.PropertyType(name);
 239         }
 240         const char * SCI_METHOD DescribeProperty(const char *name) {
 241                 return osBasic.DescribeProperty(name);
 242         }
 243         int SCI_METHOD PropertySet(const char *key, const char *val);
 244         const char * SCI_METHOD DescribeWordListSets() {
 245                 return osBasic.DescribeWordListSets();
 246         }
 247         int SCI_METHOD WordListSet(int n, const char *wl);
 248         void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
 249         void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
 250
 251         void * SCI_METHOD PrivateCall(int, void *) {
 252                 return 0;
 253         }
 254         static ILexer *LexerFactoryBlitzBasic() {
 255                 return new LexerBasic(';', CheckBlitzFoldPoint, blitzbasicWordListDesc);
 256         }
 257         static ILexer *LexerFactoryPureBasic() {
 258                 return new LexerBasic(';', CheckPureFoldPoint, purebasicWordListDesc);
 259         }
 260         static ILexer *LexerFactoryFreeBasic() {
 261                 return new LexerBasic('\'', CheckFreeFoldPoint, freebasicWordListDesc );
 262         }
 263 };
 264
 265 int SCI_METHOD LexerBasic::PropertySet(const char *key, const char *val) {
 266         if (osBasic.PropertySet(&options, key, val)) {
 267                 return 0;
 268         }
 269         return -1;
 270 }
 271
 272 int SCI_METHOD LexerBasic::WordListSet(int n, const char *wl) {
 273         WordList *wordListN = 0;
 274         switch (n) {
 275         case 0:
 276                 wordListN = &keywordlists[0];
 277                 break;
 278         case 1:
 279                 wordListN = &keywordlists[1];
 280                 break;
 281         case 2:
 282                 wordListN = &keywordlists[2];
 283                 break;
 284         case 3:
 285                 wordListN = &keywordlists[3];
 286                 break;
 287         }
 288         int firstModification = -1;
 289         if (wordListN) {
 290                 WordList wlNew;
 291                 wlNew.Set(wl);
 292                 if (*wordListN != wlNew) {
 293                         wordListN->Set(wl);
 294                         firstModification = 0;
 295                 }
 296         }
 297         return firstModification;
 298 }
 299
 300 void SCI_METHOD LexerBasic::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
 301         LexAccessor styler(pAccess);
 302
 303         bool wasfirst = true, isfirst = true; // true if first token in a line
 304         styler.StartAt(startPos);
 305
 306         StyleContext sc(startPos, length, initStyle, styler);
 307
 308         // Can't use sc.More() here else we miss the last character
 309         for (; ; sc.Forward()) {
 310                 if (sc.state == SCE_B_IDENTIFIER) {
 311                         if (!IsIdentifier(sc.ch)) {
 312                                 // Labels
 313                                 if (wasfirst && sc.Match(':')) {
 314                                         sc.ChangeState(SCE_B_LABEL);
 315                                         sc.ForwardSetState(SCE_B_DEFAULT);
 316                                 } else {
 317                                         char s[100];
 318                                         int kstates[4] = {
 319                                                 SCE_B_KEYWORD,
 320                                                 SCE_B_KEYWORD2,
 321                                                 SCE_B_KEYWORD3,
 322                                                 SCE_B_KEYWORD4,
 323                                         };
 324                                         sc.GetCurrentLowered(s, sizeof(s));
 325                                         for (int i = 0; i < 4; i++) {
 326                                                 if (keywordlists[i].InList(s)) {
 327                                                         sc.ChangeState(kstates[i]);
 328                                                 }
 329                                         }
 330                                         // Types, must set them as operator else they will be
 331                                         // matched as number/constant
 332                                         if (sc.Match('.') || sc.Match('$') || sc.Match('%') ||
 333                                                 sc.Match('#')) {
 334                                                 sc.SetState(SCE_B_OPERATOR);
 335                                         } else {
 336                                                 sc.SetState(SCE_B_DEFAULT);
 337                                         }
 338                                 }
 339                         }
 340                 } else if (sc.state == SCE_B_OPERATOR) {
 341                         if (!IsOperator(sc.ch) || sc.Match('#'))
 342                                 sc.SetState(SCE_B_DEFAULT);
 343                 } else if (sc.state == SCE_B_LABEL) {
 344                         if (!IsIdentifier(sc.ch))
 345                                 sc.SetState(SCE_B_DEFAULT);
 346                 } else if (sc.state == SCE_B_CONSTANT) {
 347                         if (!IsIdentifier(sc.ch))
 348                                 sc.SetState(SCE_B_DEFAULT);
 349                 } else if (sc.state == SCE_B_NUMBER) {
 350                         if (!IsDigit(sc.ch))
 351                                 sc.SetState(SCE_B_DEFAULT);
 352                 } else if (sc.state == SCE_B_HEXNUMBER) {
 353                         if (!IsHexDigit(sc.ch))
 354                                 sc.SetState(SCE_B_DEFAULT);
 355                 } else if (sc.state == SCE_B_BINNUMBER) {
 356                         if (!IsBinDigit(sc.ch))
 357                                 sc.SetState(SCE_B_DEFAULT);
 358                 } else if (sc.state == SCE_B_STRING) {
 359                         if (sc.ch == '"') {
 360                                 sc.ForwardSetState(SCE_B_DEFAULT);
 361                         }
 362                         if (sc.atLineEnd) {
 363                                 sc.ChangeState(SCE_B_ERROR);
 364                                 sc.SetState(SCE_B_DEFAULT);
 365                         }
 366                 } else if (sc.state == SCE_B_COMMENT || sc.state == SCE_B_PREPROCESSOR) {
 367                         if (sc.atLineEnd) {
 368                                 sc.SetState(SCE_B_DEFAULT);
 369                         }
 370                 }
 371
 372                 if (sc.atLineStart)
 373                         isfirst = true;
 374
 375                 if (sc.state == SCE_B_DEFAULT || sc.state == SCE_B_ERROR) {
 376                         if (isfirst && sc.Match('.')) {
 377                                 sc.SetState(SCE_B_LABEL);
 378                         } else if (isfirst && sc.Match('#')) {
 379                                 wasfirst = isfirst;
 380                                 sc.SetState(SCE_B_IDENTIFIER);
 381                         } else if (sc.Match(comment_char)) {
 382                                 // Hack to make deprecated QBASIC '$Include show
 383                                 // up in freebasic with SCE_B_PREPROCESSOR.
 384                                 if (comment_char == '\'' && sc.Match(comment_char, '$'))
 385                                         sc.SetState(SCE_B_PREPROCESSOR);
 386                                 else
 387                                         sc.SetState(SCE_B_COMMENT);
 388                         } else if (sc.Match('"')) {
 389                                 sc.SetState(SCE_B_STRING);
 390                         } else if (IsDigit(sc.ch)) {
 391                                 sc.SetState(SCE_B_NUMBER);
 392                         } else if (sc.Match('$')) {
 393                                 sc.SetState(SCE_B_HEXNUMBER);
 394                         } else if (sc.Match('%')) {
 395                                 sc.SetState(SCE_B_BINNUMBER);
 396                         } else if (sc.Match('#')) {
 397                                 sc.SetState(SCE_B_CONSTANT);
 398                         } else if (IsOperator(sc.ch)) {
 399                                 sc.SetState(SCE_B_OPERATOR);
 400                         } else if (IsIdentifier(sc.ch)) {
 401                                 wasfirst = isfirst;
 402                                 sc.SetState(SCE_B_IDENTIFIER);
 403                         } else if (!IsSpace(sc.ch)) {
 404                                 sc.SetState(SCE_B_ERROR);
 405                         }
 406                 }
 407
 408                 if (!IsSpace(sc.ch))
 409                         isfirst = false;
 410
 411                 if (!sc.More())
 412                         break;
 413         }
 414         sc.Complete();
 415 }
 416
 417
 418 void SCI_METHOD LexerBasic::Fold(unsigned int startPos, int length, int /* initStyle */, IDocument *pAccess) {
 419
 420         if (!options.fold)
 421                 return;
 422
 423         LexAccessor styler(pAccess);
 424
 425         int line = styler.GetLine(startPos);
 426         int level = styler.LevelAt(line);
 427         int go = 0, done = 0;
 428         int endPos = startPos + length;
 429         char word[256];
 430         int wordlen = 0;
 431         const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
 432         int cNext = styler[startPos];
 433
 434         // Scan for tokens at the start of the line (they may include
 435         // whitespace, for tokens like "End Function"
 436         for (int i = startPos; i < endPos; i++) {
 437                 int c = cNext;
 438                 cNext = styler.SafeGetCharAt(i + 1);
 439                 bool atEOL = (c == '\r' && cNext != '\n') || (c == '\n');
 440                 if (options.foldSyntaxBased && !done && !go) {
 441                         if (wordlen) { // are we scanning a token already?
 442                                 word[wordlen] = static_cast<char>(LowerCase(c));
 443                                 if (!IsIdentifier(c)) { // done with token
 444                                         word[wordlen] = '\0';
 445                                         go = CheckFoldPoint(word, level);
 446                                         if (!go) {
 447                                                 // Treat any whitespace as single blank, for
 448                                                 // things like "End   Function".
 449                                                 if (IsSpace(c) && IsIdentifier(word[wordlen - 1])) {
 450                                                         word[wordlen] = ' ';
 451                                                         if (wordlen < 255)
 452                                                                 wordlen++;
 453                                                 }
 454                                                 else // done with this line
 455                                                         done = 1;
 456                                         }
 457                                 } else if (wordlen < 255) {
 458                                         wordlen++;
 459                                 }
 460                         } else { // start scanning at first non-whitespace character
 461                                 if (!IsSpace(c)) {
 462                                         if (IsIdentifier(c)) {
 463                                                 word[0] = static_cast<char>(LowerCase(c));
 464                                                 wordlen = 1;
 465                                         } else // done with this line
 466                                                 done = 1;
 467                                 }
 468                         }
 469                 }
 470                 if (options.foldCommentExplicit && ((styler.StyleAt(i) == SCE_B_COMMENT) || options.foldExplicitAnywhere)) {
 471                         if (userDefinedFoldMarkers) {
 472                                 if (styler.Match(i, options.foldExplicitStart.c_str())) {
 473                                         level |= SC_FOLDLEVELHEADERFLAG;
 474                                         go = 1;
 475                                 } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
 476                                         go = -1;
 477                                 }
 478                         } else {
 479                                 if (c == comment_char) {
 480                                         if (cNext == '{') {
 481                                                 level |= SC_FOLDLEVELHEADERFLAG;
 482                                                 go = 1;
 483                                         } else if (cNext == '}') {
 484                                                 go = -1;
 485                                         }
 486                                 }
 487                         }
 488                 }
 489                 if (atEOL) { // line end
 490                         if (!done && wordlen == 0 && options.foldCompact) // line was only space
 491                                 level |= SC_FOLDLEVELWHITEFLAG;
 492                         if (level != styler.LevelAt(line))
 493                                 styler.SetLevel(line, level);
 494                         level += go;
 495                         line++;
 496                         // reset state
 497                         wordlen = 0;
 498                         level &= ~SC_FOLDLEVELHEADERFLAG;
 499                         level &= ~SC_FOLDLEVELWHITEFLAG;
 500                         go = 0;
 501                         done = 0;
 502                 }
 503         }
 504 }
 505
 506 LexerModule lmBlitzBasic(SCLEX_BLITZBASIC, LexerBasic::LexerFactoryBlitzBasic, "blitzbasic", blitzbasicWordListDesc);
 507
 508 LexerModule lmPureBasic(SCLEX_PUREBASIC, LexerBasic::LexerFactoryPureBasic, "purebasic", purebasicWordListDesc);
 509
 510 LexerModule lmFreeBasic(SCLEX_FREEBASIC, LexerBasic::LexerFactoryFreeBasic, "freebasic", freebasicWordListDesc);