src/stc/scintilla/lexers/LexOScript.cxx

// Scintilla source code edit control
/** @file LexOScript.cxx
 ** Lexer for OScript sources; ocx files and/or OSpace dumps.
 ** OScript is a programming language used to develop applications for the
 ** Livelink server platform.
 **/
// Written by Ferdinand Prantl <prantlf@gmail.com>, inspired by the code from
// LexVB.cxx and LexPascal.cxx. The License.txt file describes the conditions
// under which this software may be distributed.

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdarg.h>
#include <assert.h>
#include <ctype.h>

#include "ILexer.h"
#include "Scintilla.h"
#include "SciLexer.h"

#include "WordList.h"
#include "LexAccessor.h"
#include "Accessor.h"
#include "StyleContext.h"
#include "CharacterSet.h"
#include "LexerModule.h"

#ifdef SCI_NAMESPACE
using namespace Scintilla;
#endif

// -----------------------------------------
// Functions classifying a single character.

// This function is generic and should be probably moved to CharSet.h where
// IsAlphaNumeric the others reside.
inline bool IsAlpha(int ch) {
	return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
}

static inline bool IsIdentifierChar(int ch) {
	// Identifiers cannot contain non-ASCII letters; a word with non-English
	// language-specific characters cannot be an identifier.
	return IsAlphaNumeric(ch) || ch == '_';
}

static inline bool IsIdentifierStart(int ch) {
	// Identifiers cannot contain non-ASCII letters; a word with non-English
	// language-specific characters cannot be an identifier.
	return IsAlpha(ch) || ch == '_';
}

static inline bool IsNumberChar(int ch, int chNext) {
	// Numeric constructs are not checked for lexical correctness. They are
	// expected to look like +1.23-E9 but actually any bunch of the following
	// characters will be styled as number.
	// KNOWN PROBLEM: if you put + or - operators immediately after a number
	// and the next operand starts with the letter E, the operator will not be
	// recognized and it will be styled together with the preceding number.
	// This should not occur; at least not often. The coding style recommends
	// putting spaces around operators.
	return IsADigit(ch) || toupper(ch) == 'E' || ch == '.' ||
		   ((ch == '-' || ch == '+') && toupper(chNext) == 'E');
}

// This function checks for the start or a natural number without any symbols
// or operators as a prefix; the IsPrefixedNumberStart should be called
// immediately after this one to cover all possible numeric constructs.
static inline bool IsNaturalNumberStart(int ch) {
	return IsADigit(ch) != 0;
}

static inline bool IsPrefixedNumberStart(int ch, int chNext) {
	// KNOWN PROBLEM: if you put + or - operators immediately before a number
	// the operator will not be recognized and it will be styled together with
	// the succeeding number. This should not occur; at least not often. The
	// coding style recommends putting spaces around operators.
	return (ch == '.' || ch == '-' || ch == '+') && IsADigit(chNext);
}

static inline bool IsOperator(int ch) {
	return strchr("%^&*()-+={}[]:;<>,/?!.~|\\", ch) != NULL;
}

// ---------------------------------------------------------------
// Functions classifying a token currently processed in the lexer.

// Checks if the current line starts with the preprocessor directive used
// usually to introduce documentation comments: #ifdef DOC. This method is
// supposed to be called if the line has been recognized as a preprocessor
// directive already.
static bool IsDocCommentStart(StyleContext &sc) {
	// Check the line back to its start only if the end looks promising.
	if (sc.LengthCurrent() == 10 && !IsAlphaNumeric(sc.ch)) {
		char s[11];
		sc.GetCurrentLowered(s, sizeof(s));
		return strcmp(s, "#ifdef doc") == 0;
	}
	return false;
}

// Checks if the current line starts with the preprocessor directive that
// is complementary to the #ifdef DOC start: #endif. This method is supposed
// to be called if the current state point to the documentation comment.
// QUESTIONAL ASSUMPTION: The complete #endif directive is not checked; just
// the starting #e. However, there is no other preprocessor directive with
// the same starting letter and thus this optimization should always work.
static bool IsDocCommentEnd(StyleContext &sc) {
	return sc.ch == '#' && sc.chNext == 'e';
}

class IdentifierClassifier {
	WordList &keywords;  // Passed from keywords property.
	WordList &constants; // Passed from keywords2 property.
	WordList &operators; // Passed from keywords3 property.
	WordList &types;     // Passed from keywords4 property.
	WordList &functions; // Passed from keywords5 property.
	WordList &objects;   // Passed from keywords6 property.

	IdentifierClassifier(IdentifierClassifier const&);
	IdentifierClassifier& operator=(IdentifierClassifier const&);

public:
	IdentifierClassifier(WordList *keywordlists[]) :
		keywords(*keywordlists[0]), constants(*keywordlists[1]),
		operators(*keywordlists[2]), types(*keywordlists[3]),
		functions(*keywordlists[4]), objects(*keywordlists[5])
	{}

	void ClassifyIdentifier(StyleContext &sc) {
		// Opening parenthesis following an identifier makes it a possible
		// function call.
		// KNOWN PROBLEM: If some whitespace is inserted between the
		// identifier and the parenthesis they will not be able to be
		// recognized as a function call. This should not occur; at
		// least not often. Such coding style would be weird.
		if (sc.Match('(')) {
			char s[100];
			sc.GetCurrentLowered(s, sizeof(s));
			// Before an opening brace can be control statements and
			// operators too; function call is the last option.
			if (keywords.InList(s)) {
				sc.ChangeState(SCE_OSCRIPT_KEYWORD);
			} else if (operators.InList(s)) {
				sc.ChangeState(SCE_OSCRIPT_OPERATOR);
			} else if (functions.InList(s)) {
				sc.ChangeState(SCE_OSCRIPT_FUNCTION);
			} else {
				sc.ChangeState(SCE_OSCRIPT_METHOD);
			}
			sc.SetState(SCE_OSCRIPT_OPERATOR);
		} else {
			char s[100];
			sc.GetCurrentLowered(s, sizeof(s));
			// A dot following an identifier means an access to an object
			// member. The related object identifier can be special.
			// KNOWN PROBLEM: If there is whitespace between the identifier
			// and the following dot, the identifier will not be recognized
			// as an object in an object member access. If it is one of the
			// listed static objects it will not be styled.
			if (sc.Match('.') && objects.InList(s)) {
				sc.ChangeState(SCE_OSCRIPT_OBJECT);
				sc.SetState(SCE_OSCRIPT_OPERATOR);
			} else {
				if (keywords.InList(s)) {
					sc.ChangeState(SCE_OSCRIPT_KEYWORD);
				} else if (constants.InList(s)) {
					sc.ChangeState(SCE_OSCRIPT_CONSTANT);
				} else if (operators.InList(s)) {
					sc.ChangeState(SCE_OSCRIPT_OPERATOR);
				} else if (types.InList(s)) {
					sc.ChangeState(SCE_OSCRIPT_TYPE);
				} else if (functions.InList(s)) {
					sc.ChangeState(SCE_OSCRIPT_FUNCTION);
				}
				sc.SetState(SCE_OSCRIPT_DEFAULT);
			}
		}
	}
};

// ------------------------------------------------
// Function colourising an excerpt of OScript code.

static void ColouriseOScriptDoc(unsigned int startPos, int length,
								int initStyle, WordList *keywordlists[],
								Accessor &styler) {
	// I wonder how whole-line styles ended by EOLN can escape the resetting
	// code in the loop below and overflow to the next line. Let us make sure
	// that a new line does not start with them carried from the previous one.
	// NOTE: An overflowing string is intentionally not checked; it reminds
	// the developer that the string must be ended on the same line.
	if (initStyle == SCE_OSCRIPT_LINE_COMMENT ||
			initStyle == SCE_OSCRIPT_PREPROCESSOR) {
		initStyle = SCE_OSCRIPT_DEFAULT;
	}

	styler.StartAt(startPos);
	StyleContext sc(startPos, length, initStyle, styler);
	IdentifierClassifier identifierClassifier(keywordlists);

	// It starts with true at the beginning of a line and changes to false as
	// soon as the first non-whitespace character has been processed.
	bool isFirstToken = true;
	// It starts with true at the beginning of a line and changes to false as
	// soon as the first identifier on the line is passed by.
	bool isFirstIdentifier = true;
	// It becomes false when #ifdef DOC (the preprocessor directive often
	// used to start a documentation comment) is encountered and remain false
	// until the end of the documentation block is not detected. This is done
	// by checking for the complementary #endif preprocessor directive.
	bool endDocComment = false;

	for (; sc.More(); sc.Forward()) {

		if (sc.atLineStart) {
			isFirstToken = true;
			isFirstIdentifier = true;
		// Detect the current state is neither whitespace nor identifier. It
		// means that no next identifier can be the first token on the line.
		} else if (isFirstIdentifier && sc.state != SCE_OSCRIPT_DEFAULT &&
				   sc.state != SCE_OSCRIPT_IDENTIFIER) {
			isFirstIdentifier = false;
		}

		// Check if the current state should be changed.
		if (sc.state == SCE_OSCRIPT_OPERATOR) {
			// Multiple-symbol operators are marked by single characters.
			sc.SetState(SCE_OSCRIPT_DEFAULT);
		} else if (sc.state == SCE_OSCRIPT_IDENTIFIER) {
			if (!IsIdentifierChar(sc.ch)) {
				// Colon after an identifier makes it a label if it is the
				// first token on the line.
				// KNOWN PROBLEM: If some whitespace is inserted between the
				// identifier and the colon they will not be recognized as a
				// label. This should not occur; at least not often. It would
				// make the code structure less legible and examples in the
				// Livelink documentation do not show it.
				if (sc.Match(':') && isFirstIdentifier) {
					sc.ChangeState(SCE_OSCRIPT_LABEL);
					sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
				} else {
					identifierClassifier.ClassifyIdentifier(sc);
				}
				// Avoid a sequence of two words be mistaken for a label. A
				// switch case would be an example.
				isFirstIdentifier = false;
			}
		} else if (sc.state == SCE_OSCRIPT_GLOBAL) {
			if (!IsIdentifierChar(sc.ch)) {
				sc.SetState(SCE_OSCRIPT_DEFAULT);
			}
		} else if (sc.state == SCE_OSCRIPT_PROPERTY) {
			if (!IsIdentifierChar(sc.ch)) {
				// Any member access introduced by the dot operator is
				// initially marked as a property access. If an opening
				// parenthesis is detected later it is changed to method call.
				// KNOWN PROBLEM: The same as at the function call recognition
				// for SCE_OSCRIPT_IDENTIFIER above.
				if (sc.Match('(')) {
					sc.ChangeState(SCE_OSCRIPT_METHOD);
				}
				sc.SetState(SCE_OSCRIPT_DEFAULT);
			}
		} else if (sc.state == SCE_OSCRIPT_NUMBER) {
			if (!IsNumberChar(sc.ch, sc.chNext)) {
				sc.SetState(SCE_OSCRIPT_DEFAULT);
			}
		} else if (sc.state == SCE_OSCRIPT_SINGLEQUOTE_STRING) {
			if (sc.ch == '\'') {
				// Two consequential apostrophes convert to a single one.
				if (sc.chNext == '\'') {
					sc.Forward();
				} else {
					sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
				}
			} else if (sc.atLineEnd) {
				sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
			}
		} else if (sc.state == SCE_OSCRIPT_DOUBLEQUOTE_STRING) {
			if (sc.ch == '\"') {
				// Two consequential quotation marks convert to a single one.
				if (sc.chNext == '\"') {
					sc.Forward();
				} else {
					sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
				}
			} else if (sc.atLineEnd) {
				sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
			}
		} else if (sc.state == SCE_OSCRIPT_BLOCK_COMMENT) {
			if (sc.Match('*', '/')) {
				sc.Forward();
				sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
			}
		} else if (sc.state == SCE_OSCRIPT_LINE_COMMENT) {
			if (sc.atLineEnd) {
				sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
			}
		} else if (sc.state == SCE_OSCRIPT_PREPROCESSOR) {
			if (IsDocCommentStart(sc)) {
				sc.ChangeState(SCE_OSCRIPT_DOC_COMMENT);
				endDocComment = false;
			} else if (sc.atLineEnd) {
				sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
			}
		} else if (sc.state == SCE_OSCRIPT_DOC_COMMENT) {
			// KNOWN PROBLEM: The first line detected that would close a
			// conditional preprocessor block (#endif) the documentation
			// comment block will end. (Nested #if-#endif blocks are not
			// supported. Hopefully it will not occur often that a line
			// within the text block would stat with #endif.
			if (isFirstToken && IsDocCommentEnd(sc)) {
				endDocComment = true;
			} else if (sc.atLineEnd && endDocComment) {
				sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
			}
		}

		// Check what state starts with the current character.
		if (sc.state == SCE_OSCRIPT_DEFAULT) {
			if (sc.Match('\'')) {
				sc.SetState(SCE_OSCRIPT_SINGLEQUOTE_STRING);
			} else if (sc.Match('\"')) {
				sc.SetState(SCE_OSCRIPT_DOUBLEQUOTE_STRING);
			} else if (sc.Match('/', '/')) {
				sc.SetState(SCE_OSCRIPT_LINE_COMMENT);
				sc.Forward();
			} else if (sc.Match('/', '*')) {
				sc.SetState(SCE_OSCRIPT_BLOCK_COMMENT);
				sc.Forward();
			} else if (isFirstToken && sc.Match('#')) {
				sc.SetState(SCE_OSCRIPT_PREPROCESSOR);
			} else if (sc.Match('$')) {
				// Both process-global ($xxx) and thread-global ($$xxx)
				// variables are handled as one global.
				sc.SetState(SCE_OSCRIPT_GLOBAL);
			} else if (IsNaturalNumberStart(sc.ch)) {
				sc.SetState(SCE_OSCRIPT_NUMBER);
			} else if (IsPrefixedNumberStart(sc.ch, sc.chNext)) {
				sc.SetState(SCE_OSCRIPT_NUMBER);
				sc.Forward();
			} else if (sc.Match('.') && IsIdentifierStart(sc.chNext)) {
				// Every object member access is marked as a property access
				// initially. The decision between property and method is made
				// after parsing the identifier and looking what comes then.
				// KNOWN PROBLEM: If there is whitespace between the following
				// identifier and the dot, the dot will not be recognized
				// as a member accessing operator. In turn, the identifier
				// will not be recognizable as a property or a method too.
				sc.SetState(SCE_OSCRIPT_OPERATOR);
				sc.Forward();
				sc.SetState(SCE_OSCRIPT_PROPERTY);
			} else if (IsIdentifierStart(sc.ch)) {
				sc.SetState(SCE_OSCRIPT_IDENTIFIER);
			} else if (IsOperator(sc.ch)) {
				sc.SetState(SCE_OSCRIPT_OPERATOR);
			}
		}

		if (isFirstToken && !IsASpaceOrTab(sc.ch)) {
			isFirstToken = false;
		}
	}

	sc.Complete();
}

// ------------------------------------------
// Functions supporting OScript code folding.

static inline bool IsBlockComment(int style) {
	return style == SCE_OSCRIPT_BLOCK_COMMENT;
}

static bool IsLineComment(int line, Accessor &styler) {
	int pos = styler.LineStart(line);
	int eolPos = styler.LineStart(line + 1) - 1;
	for (int i = pos; i < eolPos; i++) {
		char ch = styler[i];
		char chNext = styler.SafeGetCharAt(i + 1);
		int style = styler.StyleAt(i);
		if (ch == '/' && chNext == '/' && style == SCE_OSCRIPT_LINE_COMMENT) {
			return true;
		} else if (!IsASpaceOrTab(ch)) {
			return false;
		}
	}
	return false;
}

static inline bool IsPreprocessor(int style) {
	return style == SCE_OSCRIPT_PREPROCESSOR ||
		   style == SCE_OSCRIPT_DOC_COMMENT;
}

static void GetRangeLowered(unsigned int start, unsigned int end,
							Accessor &styler, char *s, unsigned int len) {
	unsigned int i = 0;
	while (i < end - start + 1 && i < len - 1) {
		s[i] = static_cast<char>(tolower(styler[start + i]));
		i++;
	}
	s[i] = '\0';
}

static void GetForwardWordLowered(unsigned int start, Accessor &styler,
								  char *s, unsigned int len) {
	unsigned int i = 0;
	while (i < len - 1 && IsAlpha(styler.SafeGetCharAt(start + i))) {
		s[i] = static_cast<char>(tolower(styler.SafeGetCharAt(start + i)));
		i++;
	}
	s[i] = '\0';
}

static void UpdatePreprocessorFoldLevel(int &levelCurrent,
		unsigned int startPos, Accessor &styler) {
	char s[7]; // Size of the longest possible keyword + null.
	GetForwardWordLowered(startPos, styler, s, sizeof(s));

	if (strcmp(s, "ifdef") == 0 ||
		strcmp(s, "ifndef") == 0) {
		levelCurrent++;
	} else if (strcmp(s, "endif") == 0) {
		levelCurrent--;
		if (levelCurrent < SC_FOLDLEVELBASE) {
			levelCurrent = SC_FOLDLEVELBASE;
		}
	}
}

static void UpdateKeywordFoldLevel(int &levelCurrent, unsigned int lastStart,
		unsigned int currentPos, Accessor &styler) {
	char s[9];
	GetRangeLowered(lastStart, currentPos, styler, s, sizeof(s));

	if (strcmp(s, "if") == 0 || strcmp(s, "for") == 0 ||
		strcmp(s, "switch") == 0 || strcmp(s, "function") == 0 ||
		strcmp(s, "while") == 0 || strcmp(s, "repeat") == 0) {
		levelCurrent++;
	} else if (strcmp(s, "end") == 0 || strcmp(s, "until") == 0) {
		levelCurrent--;
		if (levelCurrent < SC_FOLDLEVELBASE) {
			levelCurrent = SC_FOLDLEVELBASE;
		}
	}
}

// ------------------------------
// Function folding OScript code.

static void FoldOScriptDoc(unsigned int startPos, int length, int initStyle,
						   WordList *[], Accessor &styler) {
	bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
	bool foldPreprocessor = styler.GetPropertyInt("fold.preprocessor") != 0;
	bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
	int endPos = startPos + length;
	int visibleChars = 0;
	int lineCurrent = styler.GetLine(startPos);
	int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
	int levelCurrent = levelPrev;
	char chNext = styler[startPos];
	int styleNext = styler.StyleAt(startPos);
	int style = initStyle;
	int lastStart = 0;

	for (int i = startPos; i < endPos; i++) {
		char ch = chNext;
		chNext = styler.SafeGetCharAt(i + 1);
		int stylePrev = style;
		style = styleNext;
		styleNext = styler.StyleAt(i + 1);
		bool atLineEnd = (ch == '\r' && chNext != '\n') || (ch == '\n');

		if (foldComment && IsBlockComment(style)) {
			if (!IsBlockComment(stylePrev)) {
				levelCurrent++;
			} else if (!IsBlockComment(styleNext) && !atLineEnd) {
				// Comments do not end at end of line and the next character
				// may not be styled.
				levelCurrent--;
			}
		}
		if (foldComment && atLineEnd && IsLineComment(lineCurrent, styler)) {
			if (!IsLineComment(lineCurrent - 1, styler) &&
				IsLineComment(lineCurrent + 1, styler))
				levelCurrent++;
			else if (IsLineComment(lineCurrent - 1, styler) &&
					 !IsLineComment(lineCurrent+1, styler))
				levelCurrent--;
		}
		if (foldPreprocessor) {
			if (ch == '#' && IsPreprocessor(style)) {
				UpdatePreprocessorFoldLevel(levelCurrent, i + 1, styler);
			}
		}

		if (stylePrev != SCE_OSCRIPT_KEYWORD && style == SCE_OSCRIPT_KEYWORD) {
			lastStart = i;
		}
		if (stylePrev == SCE_OSCRIPT_KEYWORD) {
			if(IsIdentifierChar(ch) && !IsIdentifierChar(chNext)) {
				UpdateKeywordFoldLevel(levelCurrent, lastStart, i, styler);
			}
		}

		if (!IsASpace(ch))
			visibleChars++;

		if (atLineEnd) {
			int level = levelPrev;
			if (visibleChars == 0 && foldCompact)
				level |= SC_FOLDLEVELWHITEFLAG;
			if ((levelCurrent > levelPrev) && (visibleChars > 0))
				level |= SC_FOLDLEVELHEADERFLAG;
			if (level != styler.LevelAt(lineCurrent)) {
				styler.SetLevel(lineCurrent, level);
			}
			lineCurrent++;
			levelPrev = levelCurrent;
			visibleChars = 0;
		}
	}

	// If we did not reach EOLN in the previous loop, store the line level and
	// whitespace information. The rest will be filled in later.
	int lev = levelPrev;
	if (visibleChars == 0 && foldCompact)
		lev |= SC_FOLDLEVELWHITEFLAG;
	styler.SetLevel(lineCurrent, lev);
}

// --------------------------------------------
// Declaration of the OScript lexer descriptor.

static const char * const oscriptWordListDesc[] = {
	"Keywords and reserved words",
	"Literal constants",
	"Literal operators",
	"Built-in value and reference types",
	"Built-in global functions",
	"Built-in static objects",
	0
};

LexerModule lmOScript(SCLEX_OSCRIPT, ColouriseOScriptDoc, "oscript", FoldOScriptDoc, oscriptWordListDesc);
Commit	Line	Data
	1	// Scintilla source code edit control
	2	/** @file LexOScript.cxx
	3	** Lexer for OScript sources; ocx files and/or OSpace dumps.
	4	** OScript is a programming language used to develop applications for the
	5	** Livelink server platform.
	6	**/
	7	// Written by Ferdinand Prantl <prantlf@gmail.com>, inspired by the code from
	8	// LexVB.cxx and LexPascal.cxx. The License.txt file describes the conditions
	9	// under which this software may be distributed.
	10
	11	#include <stdlib.h>
	12	#include <string.h>
	13	#include <stdio.h>
	14	#include <stdarg.h>
	15	#include <assert.h>
	16	#include <ctype.h>
	17
	18	#include "ILexer.h"
	19	#include "Scintilla.h"
	20	#include "SciLexer.h"
	21
	22	#include "WordList.h"
	23	#include "LexAccessor.h"
	24	#include "Accessor.h"
	25	#include "StyleContext.h"
	26	#include "CharacterSet.h"
	27	#include "LexerModule.h"
	28
	29	#ifdef SCI_NAMESPACE
	30	using namespace Scintilla;
	31	#endif
	32
	33	// -----------------------------------------
	34	// Functions classifying a single character.
	35
	36	// This function is generic and should be probably moved to CharSet.h where
	37	// IsAlphaNumeric the others reside.
	38	inline bool IsAlpha(int ch) {
	39	return (ch >= 'a' && ch <= 'z') \|\| (ch >= 'A' && ch <= 'Z');
	40	}
	41
	42	static inline bool IsIdentifierChar(int ch) {
	43	// Identifiers cannot contain non-ASCII letters; a word with non-English
	44	// language-specific characters cannot be an identifier.
	45	return IsAlphaNumeric(ch) \|\| ch == '_';
	46	}
	47
	48	static inline bool IsIdentifierStart(int ch) {
	49	// Identifiers cannot contain non-ASCII letters; a word with non-English
	50	// language-specific characters cannot be an identifier.
	51	return IsAlpha(ch) \|\| ch == '_';
	52	}
	53
	54	static inline bool IsNumberChar(int ch, int chNext) {
	55	// Numeric constructs are not checked for lexical correctness. They are
	56	// expected to look like +1.23-E9 but actually any bunch of the following
	57	// characters will be styled as number.
	58	// KNOWN PROBLEM: if you put + or - operators immediately after a number
	59	// and the next operand starts with the letter E, the operator will not be
	60	// recognized and it will be styled together with the preceding number.
	61	// This should not occur; at least not often. The coding style recommends
	62	// putting spaces around operators.
	63	return IsADigit(ch) \|\| toupper(ch) == 'E' \|\| ch == '.' \|\|
	64	((ch == '-' \|\| ch == '+') && toupper(chNext) == 'E');
	65	}
	66
	67	// This function checks for the start or a natural number without any symbols
	68	// or operators as a prefix; the IsPrefixedNumberStart should be called
	69	// immediately after this one to cover all possible numeric constructs.
	70	static inline bool IsNaturalNumberStart(int ch) {
	71	return IsADigit(ch) != 0;
	72	}
	73
	74	static inline bool IsPrefixedNumberStart(int ch, int chNext) {
	75	// KNOWN PROBLEM: if you put + or - operators immediately before a number
	76	// the operator will not be recognized and it will be styled together with
	77	// the succeeding number. This should not occur; at least not often. The
	78	// coding style recommends putting spaces around operators.
	79	return (ch == '.' \|\| ch == '-' \|\| ch == '+') && IsADigit(chNext);
	80	}
	81
	82	static inline bool IsOperator(int ch) {
	83	return strchr("%^&*()-+={}[]:;<>,/?!.~\|\\", ch) != NULL;
	84	}
	85
	86	// ---------------------------------------------------------------
	87	// Functions classifying a token currently processed in the lexer.
	88
	89	// Checks if the current line starts with the preprocessor directive used
	90	// usually to introduce documentation comments: #ifdef DOC. This method is
	91	// supposed to be called if the line has been recognized as a preprocessor
	92	// directive already.
	93	static bool IsDocCommentStart(StyleContext &sc) {
	94	// Check the line back to its start only if the end looks promising.
	95	if (sc.LengthCurrent() == 10 && !IsAlphaNumeric(sc.ch)) {
	96	char s[11];
	97	sc.GetCurrentLowered(s, sizeof(s));
	98	return strcmp(s, "#ifdef doc") == 0;
	99	}
	100	return false;
	101	}
	102
	103	// Checks if the current line starts with the preprocessor directive that
	104	// is complementary to the #ifdef DOC start: #endif. This method is supposed
	105	// to be called if the current state point to the documentation comment.
	106	// QUESTIONAL ASSUMPTION: The complete #endif directive is not checked; just
	107	// the starting #e. However, there is no other preprocessor directive with
	108	// the same starting letter and thus this optimization should always work.
	109	static bool IsDocCommentEnd(StyleContext &sc) {
	110	return sc.ch == '#' && sc.chNext == 'e';
	111	}
	112
	113	class IdentifierClassifier {
	114	WordList &keywords; // Passed from keywords property.
	115	WordList &constants; // Passed from keywords2 property.
	116	WordList &operators; // Passed from keywords3 property.
	117	WordList &types; // Passed from keywords4 property.
	118	WordList &functions; // Passed from keywords5 property.
	119	WordList &objects; // Passed from keywords6 property.
	120
	121	IdentifierClassifier(IdentifierClassifier const&);
	122	IdentifierClassifier& operator=(IdentifierClassifier const&);
	123
	124	public:
	125	IdentifierClassifier(WordList *keywordlists[]) :
	126	keywords(keywordlists[0]), constants(keywordlists[1]),
	127	operators(keywordlists[2]), types(keywordlists[3]),
	128	functions(keywordlists[4]), objects(keywordlists[5])
	129	{}
	130
	131	void ClassifyIdentifier(StyleContext &sc) {
	132	// Opening parenthesis following an identifier makes it a possible
	133	// function call.
	134	// KNOWN PROBLEM: If some whitespace is inserted between the
	135	// identifier and the parenthesis they will not be able to be
	136	// recognized as a function call. This should not occur; at
	137	// least not often. Such coding style would be weird.
	138	if (sc.Match('(')) {
	139	char s[100];
	140	sc.GetCurrentLowered(s, sizeof(s));
	141	// Before an opening brace can be control statements and
	142	// operators too; function call is the last option.
	143	if (keywords.InList(s)) {
	144	sc.ChangeState(SCE_OSCRIPT_KEYWORD);
	145	} else if (operators.InList(s)) {
	146	sc.ChangeState(SCE_OSCRIPT_OPERATOR);
	147	} else if (functions.InList(s)) {
	148	sc.ChangeState(SCE_OSCRIPT_FUNCTION);
	149	} else {
	150	sc.ChangeState(SCE_OSCRIPT_METHOD);
	151	}
	152	sc.SetState(SCE_OSCRIPT_OPERATOR);
	153	} else {
	154	char s[100];
	155	sc.GetCurrentLowered(s, sizeof(s));
	156	// A dot following an identifier means an access to an object
	157	// member. The related object identifier can be special.
	158	// KNOWN PROBLEM: If there is whitespace between the identifier
	159	// and the following dot, the identifier will not be recognized
	160	// as an object in an object member access. If it is one of the
	161	// listed static objects it will not be styled.
	162	if (sc.Match('.') && objects.InList(s)) {
	163	sc.ChangeState(SCE_OSCRIPT_OBJECT);
	164	sc.SetState(SCE_OSCRIPT_OPERATOR);
	165	} else {
	166	if (keywords.InList(s)) {
	167	sc.ChangeState(SCE_OSCRIPT_KEYWORD);
	168	} else if (constants.InList(s)) {
	169	sc.ChangeState(SCE_OSCRIPT_CONSTANT);
	170	} else if (operators.InList(s)) {
	171	sc.ChangeState(SCE_OSCRIPT_OPERATOR);
	172	} else if (types.InList(s)) {
	173	sc.ChangeState(SCE_OSCRIPT_TYPE);
	174	} else if (functions.InList(s)) {
	175	sc.ChangeState(SCE_OSCRIPT_FUNCTION);
	176	}
	177	sc.SetState(SCE_OSCRIPT_DEFAULT);
	178	}
	179	}
	180	}
	181	};
	182
	183	// ------------------------------------------------
	184	// Function colourising an excerpt of OScript code.
	185
	186	static void ColouriseOScriptDoc(unsigned int startPos, int length,
	187	int initStyle, WordList *keywordlists[],
	188	Accessor &styler) {
	189	// I wonder how whole-line styles ended by EOLN can escape the resetting
	190	// code in the loop below and overflow to the next line. Let us make sure
	191	// that a new line does not start with them carried from the previous one.
	192	// NOTE: An overflowing string is intentionally not checked; it reminds
	193	// the developer that the string must be ended on the same line.
	194	if (initStyle == SCE_OSCRIPT_LINE_COMMENT \|\|
	195	initStyle == SCE_OSCRIPT_PREPROCESSOR) {
	196	initStyle = SCE_OSCRIPT_DEFAULT;
	197	}
	198
	199	styler.StartAt(startPos);
	200	StyleContext sc(startPos, length, initStyle, styler);
	201	IdentifierClassifier identifierClassifier(keywordlists);
	202
	203	// It starts with true at the beginning of a line and changes to false as
	204	// soon as the first non-whitespace character has been processed.
	205	bool isFirstToken = true;
	206	// It starts with true at the beginning of a line and changes to false as
	207	// soon as the first identifier on the line is passed by.
	208	bool isFirstIdentifier = true;
	209	// It becomes false when #ifdef DOC (the preprocessor directive often
	210	// used to start a documentation comment) is encountered and remain false
	211	// until the end of the documentation block is not detected. This is done
	212	// by checking for the complementary #endif preprocessor directive.
	213	bool endDocComment = false;
	214
	215	for (; sc.More(); sc.Forward()) {
	216
	217	if (sc.atLineStart) {
	218	isFirstToken = true;
	219	isFirstIdentifier = true;
	220	// Detect the current state is neither whitespace nor identifier. It
	221	// means that no next identifier can be the first token on the line.
	222	} else if (isFirstIdentifier && sc.state != SCE_OSCRIPT_DEFAULT &&
	223	sc.state != SCE_OSCRIPT_IDENTIFIER) {
	224	isFirstIdentifier = false;
	225	}
	226
	227	// Check if the current state should be changed.
	228	if (sc.state == SCE_OSCRIPT_OPERATOR) {
	229	// Multiple-symbol operators are marked by single characters.
	230	sc.SetState(SCE_OSCRIPT_DEFAULT);
	231	} else if (sc.state == SCE_OSCRIPT_IDENTIFIER) {
	232	if (!IsIdentifierChar(sc.ch)) {
	233	// Colon after an identifier makes it a label if it is the
	234	// first token on the line.
	235	// KNOWN PROBLEM: If some whitespace is inserted between the
	236	// identifier and the colon they will not be recognized as a
	237	// label. This should not occur; at least not often. It would
	238	// make the code structure less legible and examples in the
	239	// Livelink documentation do not show it.
	240	if (sc.Match(':') && isFirstIdentifier) {
	241	sc.ChangeState(SCE_OSCRIPT_LABEL);
	242	sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
	243	} else {
	244	identifierClassifier.ClassifyIdentifier(sc);
	245	}
	246	// Avoid a sequence of two words be mistaken for a label. A
	247	// switch case would be an example.
	248	isFirstIdentifier = false;
	249	}
	250	} else if (sc.state == SCE_OSCRIPT_GLOBAL) {
	251	if (!IsIdentifierChar(sc.ch)) {
	252	sc.SetState(SCE_OSCRIPT_DEFAULT);
	253	}
	254	} else if (sc.state == SCE_OSCRIPT_PROPERTY) {
	255	if (!IsIdentifierChar(sc.ch)) {
	256	// Any member access introduced by the dot operator is
	257	// initially marked as a property access. If an opening
	258	// parenthesis is detected later it is changed to method call.
	259	// KNOWN PROBLEM: The same as at the function call recognition
	260	// for SCE_OSCRIPT_IDENTIFIER above.
	261	if (sc.Match('(')) {
	262	sc.ChangeState(SCE_OSCRIPT_METHOD);
	263	}
	264	sc.SetState(SCE_OSCRIPT_DEFAULT);
	265	}
	266	} else if (sc.state == SCE_OSCRIPT_NUMBER) {
	267	if (!IsNumberChar(sc.ch, sc.chNext)) {
	268	sc.SetState(SCE_OSCRIPT_DEFAULT);
	269	}
	270	} else if (sc.state == SCE_OSCRIPT_SINGLEQUOTE_STRING) {
	271	if (sc.ch == '\'') {
	272	// Two consequential apostrophes convert to a single one.
	273	if (sc.chNext == '\'') {
	274	sc.Forward();
	275	} else {
	276	sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
	277	}
	278	} else if (sc.atLineEnd) {
	279	sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
	280	}
	281	} else if (sc.state == SCE_OSCRIPT_DOUBLEQUOTE_STRING) {
	282	if (sc.ch == '\"') {
	283	// Two consequential quotation marks convert to a single one.
	284	if (sc.chNext == '\"') {
	285	sc.Forward();
	286	} else {
	287	sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
	288	}
	289	} else if (sc.atLineEnd) {
	290	sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
	291	}
	292	} else if (sc.state == SCE_OSCRIPT_BLOCK_COMMENT) {
	293	if (sc.Match('*', '/')) {
	294	sc.Forward();
	295	sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
	296	}
	297	} else if (sc.state == SCE_OSCRIPT_LINE_COMMENT) {
	298	if (sc.atLineEnd) {
	299	sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
	300	}
	301	} else if (sc.state == SCE_OSCRIPT_PREPROCESSOR) {
	302	if (IsDocCommentStart(sc)) {
	303	sc.ChangeState(SCE_OSCRIPT_DOC_COMMENT);
	304	endDocComment = false;
	305	} else if (sc.atLineEnd) {
	306	sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
	307	}
	308	} else if (sc.state == SCE_OSCRIPT_DOC_COMMENT) {
	309	// KNOWN PROBLEM: The first line detected that would close a
	310	// conditional preprocessor block (#endif) the documentation
	311	// comment block will end. (Nested #if-#endif blocks are not
	312	// supported. Hopefully it will not occur often that a line
	313	// within the text block would stat with #endif.
	314	if (isFirstToken && IsDocCommentEnd(sc)) {
	315	endDocComment = true;
	316	} else if (sc.atLineEnd && endDocComment) {
	317	sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
	318	}
	319	}
	320
	321	// Check what state starts with the current character.
	322	if (sc.state == SCE_OSCRIPT_DEFAULT) {
	323	if (sc.Match('\'')) {
	324	sc.SetState(SCE_OSCRIPT_SINGLEQUOTE_STRING);
	325	} else if (sc.Match('\"')) {
	326	sc.SetState(SCE_OSCRIPT_DOUBLEQUOTE_STRING);
	327	} else if (sc.Match('/', '/')) {
	328	sc.SetState(SCE_OSCRIPT_LINE_COMMENT);
	329	sc.Forward();
	330	} else if (sc.Match('/', '*')) {
	331	sc.SetState(SCE_OSCRIPT_BLOCK_COMMENT);
	332	sc.Forward();
	333	} else if (isFirstToken && sc.Match('#')) {
	334	sc.SetState(SCE_OSCRIPT_PREPROCESSOR);
	335	} else if (sc.Match('$')) {
	336	// Both process-global ($xxx) and thread-global ($$xxx)
	337	// variables are handled as one global.
	338	sc.SetState(SCE_OSCRIPT_GLOBAL);
	339	} else if (IsNaturalNumberStart(sc.ch)) {
	340	sc.SetState(SCE_OSCRIPT_NUMBER);
	341	} else if (IsPrefixedNumberStart(sc.ch, sc.chNext)) {
	342	sc.SetState(SCE_OSCRIPT_NUMBER);
	343	sc.Forward();
	344	} else if (sc.Match('.') && IsIdentifierStart(sc.chNext)) {
	345	// Every object member access is marked as a property access
	346	// initially. The decision between property and method is made
	347	// after parsing the identifier and looking what comes then.
	348	// KNOWN PROBLEM: If there is whitespace between the following
	349	// identifier and the dot, the dot will not be recognized
	350	// as a member accessing operator. In turn, the identifier
	351	// will not be recognizable as a property or a method too.
	352	sc.SetState(SCE_OSCRIPT_OPERATOR);
	353	sc.Forward();
	354	sc.SetState(SCE_OSCRIPT_PROPERTY);
	355	} else if (IsIdentifierStart(sc.ch)) {
	356	sc.SetState(SCE_OSCRIPT_IDENTIFIER);
	357	} else if (IsOperator(sc.ch)) {
	358	sc.SetState(SCE_OSCRIPT_OPERATOR);
	359	}
	360	}
	361
	362	if (isFirstToken && !IsASpaceOrTab(sc.ch)) {
	363	isFirstToken = false;
	364	}
	365	}
	366
	367	sc.Complete();
	368	}
	369
	370	// ------------------------------------------
	371	// Functions supporting OScript code folding.
	372
	373	static inline bool IsBlockComment(int style) {
	374	return style == SCE_OSCRIPT_BLOCK_COMMENT;
	375	}
	376
	377	static bool IsLineComment(int line, Accessor &styler) {
	378	int pos = styler.LineStart(line);
	379	int eolPos = styler.LineStart(line + 1) - 1;
	380	for (int i = pos; i < eolPos; i++) {
	381	char ch = styler[i];
	382	char chNext = styler.SafeGetCharAt(i + 1);
	383	int style = styler.StyleAt(i);
	384	if (ch == '/' && chNext == '/' && style == SCE_OSCRIPT_LINE_COMMENT) {
	385	return true;
	386	} else if (!IsASpaceOrTab(ch)) {
	387	return false;
	388	}
	389	}
	390	return false;
	391	}
	392
	393	static inline bool IsPreprocessor(int style) {
	394	return style == SCE_OSCRIPT_PREPROCESSOR \|\|
	395	style == SCE_OSCRIPT_DOC_COMMENT;
	396	}
	397
	398	static void GetRangeLowered(unsigned int start, unsigned int end,
	399	Accessor &styler, char *s, unsigned int len) {
	400	unsigned int i = 0;
	401	while (i < end - start + 1 && i < len - 1) {
	402	s[i] = static_cast<char>(tolower(styler[start + i]));
	403	i++;
	404	}
	405	s[i] = '\0';
	406	}
	407
	408	static void GetForwardWordLowered(unsigned int start, Accessor &styler,
	409	char *s, unsigned int len) {
	410	unsigned int i = 0;
	411	while (i < len - 1 && IsAlpha(styler.SafeGetCharAt(start + i))) {
	412	s[i] = static_cast<char>(tolower(styler.SafeGetCharAt(start + i)));
	413	i++;
	414	}
	415	s[i] = '\0';
	416	}
	417
	418	static void UpdatePreprocessorFoldLevel(int &levelCurrent,
	419	unsigned int startPos, Accessor &styler) {
	420	char s[7]; // Size of the longest possible keyword + null.
	421	GetForwardWordLowered(startPos, styler, s, sizeof(s));
	422
	423	if (strcmp(s, "ifdef") == 0 \|\|
	424	strcmp(s, "ifndef") == 0) {
	425	levelCurrent++;
	426	} else if (strcmp(s, "endif") == 0) {
	427	levelCurrent--;
	428	if (levelCurrent < SC_FOLDLEVELBASE) {
	429	levelCurrent = SC_FOLDLEVELBASE;
	430	}
	431	}
	432	}
	433
	434	static void UpdateKeywordFoldLevel(int &levelCurrent, unsigned int lastStart,
	435	unsigned int currentPos, Accessor &styler) {
	436	char s[9];
	437	GetRangeLowered(lastStart, currentPos, styler, s, sizeof(s));
	438
	439	if (strcmp(s, "if") == 0 \|\| strcmp(s, "for") == 0 \|\|
	440	strcmp(s, "switch") == 0 \|\| strcmp(s, "function") == 0 \|\|
	441	strcmp(s, "while") == 0 \|\| strcmp(s, "repeat") == 0) {
	442	levelCurrent++;
	443	} else if (strcmp(s, "end") == 0 \|\| strcmp(s, "until") == 0) {
	444	levelCurrent--;
	445	if (levelCurrent < SC_FOLDLEVELBASE) {
	446	levelCurrent = SC_FOLDLEVELBASE;
	447	}
	448	}
	449	}
	450
	451	// ------------------------------
	452	// Function folding OScript code.
	453
	454	static void FoldOScriptDoc(unsigned int startPos, int length, int initStyle,
	455	WordList *[], Accessor &styler) {
	456	bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
	457	bool foldPreprocessor = styler.GetPropertyInt("fold.preprocessor") != 0;
	458	bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
	459	int endPos = startPos + length;
	460	int visibleChars = 0;
	461	int lineCurrent = styler.GetLine(startPos);
	462	int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
	463	int levelCurrent = levelPrev;
	464	char chNext = styler[startPos];
	465	int styleNext = styler.StyleAt(startPos);
	466	int style = initStyle;
	467	int lastStart = 0;
	468
	469	for (int i = startPos; i < endPos; i++) {
	470	char ch = chNext;
	471	chNext = styler.SafeGetCharAt(i + 1);
	472	int stylePrev = style;
	473	style = styleNext;
	474	styleNext = styler.StyleAt(i + 1);
	475	bool atLineEnd = (ch == '\r' && chNext != '\n') \|\| (ch == '\n');
	476
	477	if (foldComment && IsBlockComment(style)) {
	478	if (!IsBlockComment(stylePrev)) {
	479	levelCurrent++;
	480	} else if (!IsBlockComment(styleNext) && !atLineEnd) {
	481	// Comments do not end at end of line and the next character
	482	// may not be styled.
	483	levelCurrent--;
	484	}
	485	}
	486	if (foldComment && atLineEnd && IsLineComment(lineCurrent, styler)) {
	487	if (!IsLineComment(lineCurrent - 1, styler) &&
	488	IsLineComment(lineCurrent + 1, styler))
	489	levelCurrent++;
	490	else if (IsLineComment(lineCurrent - 1, styler) &&
	491	!IsLineComment(lineCurrent+1, styler))
	492	levelCurrent--;
	493	}
	494	if (foldPreprocessor) {
	495	if (ch == '#' && IsPreprocessor(style)) {
	496	UpdatePreprocessorFoldLevel(levelCurrent, i + 1, styler);
	497	}
	498	}
	499
	500	if (stylePrev != SCE_OSCRIPT_KEYWORD && style == SCE_OSCRIPT_KEYWORD) {
	501	lastStart = i;
	502	}
	503	if (stylePrev == SCE_OSCRIPT_KEYWORD) {
	504	if(IsIdentifierChar(ch) && !IsIdentifierChar(chNext)) {
	505	UpdateKeywordFoldLevel(levelCurrent, lastStart, i, styler);
	506	}
	507	}
	508
	509	if (!IsASpace(ch))
	510	visibleChars++;
	511
	512	if (atLineEnd) {
	513	int level = levelPrev;
	514	if (visibleChars == 0 && foldCompact)
	515	level \|= SC_FOLDLEVELWHITEFLAG;
	516	if ((levelCurrent > levelPrev) && (visibleChars > 0))
	517	level \|= SC_FOLDLEVELHEADERFLAG;
	518	if (level != styler.LevelAt(lineCurrent)) {
	519	styler.SetLevel(lineCurrent, level);
	520	}
	521	lineCurrent++;
	522	levelPrev = levelCurrent;
	523	visibleChars = 0;
	524	}
	525	}
	526
	527	// If we did not reach EOLN in the previous loop, store the line level and
	528	// whitespace information. The rest will be filled in later.
	529	int lev = levelPrev;
	530	if (visibleChars == 0 && foldCompact)
	531	lev \|= SC_FOLDLEVELWHITEFLAG;
	532	styler.SetLevel(lineCurrent, lev);
	533	}
	534
	535	// --------------------------------------------
	536	// Declaration of the OScript lexer descriptor.
	537
	538	static const char * const oscriptWordListDesc[] = {
	539	"Keywords and reserved words",
	540	"Literal constants",
	541	"Literal operators",
	542	"Built-in value and reference types",
	543	"Built-in global functions",
	544	"Built-in static objects",
	545	0
	546	};
	547
	548	LexerModule lmOScript(SCLEX_OSCRIPT, ColouriseOScriptDoc, "oscript", FoldOScriptDoc, oscriptWordListDesc);