]> git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/lexers/LexA68k.cxx
simplify code so it always returns the same object
[wxWidgets.git] / src / stc / scintilla / lexers / LexA68k.cxx
1 // Scintilla source code edit control
2 /** @file LexA68k.cxx
3 ** Lexer for Assembler, just for the MASM syntax
4 ** Written by Martial Demolins AKA Folco
5 **/
6 // Copyright 2010 Martial Demolins <mdemolins(a)gmail.com>
7 // The License.txt file describes the conditions under which this software
8 // may be distributed.
9
10
11 #include <stdlib.h>
12 #include <string.h>
13 #include <stdio.h>
14 #include <stdarg.h>
15 #include <assert.h>
16 #include <ctype.h>
17
18 #include "ILexer.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21
22 #include "WordList.h"
23 #include "LexAccessor.h"
24 #include "Accessor.h"
25 #include "StyleContext.h"
26 #include "CharacterSet.h"
27 #include "LexerModule.h"
28
29 #ifdef SCI_NAMESPACE
30 using namespace Scintilla;
31 #endif
32
33
34 // Return values for GetOperatorType
35 #define NO_OPERATOR 0
36 #define OPERATOR_1CHAR 1
37 #define OPERATOR_2CHAR 2
38
39
40 /**
41 * IsIdentifierStart
42 *
43 * Return true if the given char is a valid identifier first char
44 */
45
46 static inline bool IsIdentifierStart (const int ch)
47 {
48 return (isalpha(ch) || (ch == '_') || (ch == '\\'));
49 }
50
51
52 /**
53 * IsIdentifierChar
54 *
55 * Return true if the given char is a valid identifier char
56 */
57
58 static inline bool IsIdentifierChar (const int ch)
59 {
60 return (isalnum(ch) || (ch == '_') || (ch == '@') || (ch == ':') || (ch == '.'));
61 }
62
63
64 /**
65 * GetOperatorType
66 *
67 * Return:
68 * NO_OPERATOR if char is not an operator
69 * OPERATOR_1CHAR if the operator is one char long
70 * OPERATOR_2CHAR if the operator is two chars long
71 */
72
73 static inline int GetOperatorType (const int ch1, const int ch2)
74 {
75 int OpType = NO_OPERATOR;
76
77 if ((ch1 == '+') || (ch1 == '-') || (ch1 == '*') || (ch1 == '/') || (ch1 == '#') ||
78 (ch1 == '(') || (ch1 == ')') || (ch1 == '~') || (ch1 == '&') || (ch1 == '|') || (ch1 == ','))
79 OpType = OPERATOR_1CHAR;
80
81 else if ((ch1 == ch2) && (ch1 == '<' || ch1 == '>'))
82 OpType = OPERATOR_2CHAR;
83
84 return OpType;
85 }
86
87
88 /**
89 * IsBin
90 *
91 * Return true if the given char is 0 or 1
92 */
93
94 static inline bool IsBin (const int ch)
95 {
96 return (ch == '0') || (ch == '1');
97 }
98
99
100 /**
101 * IsDoxygenChar
102 *
103 * Return true if the char may be part of a Doxygen keyword
104 */
105
106 static inline bool IsDoxygenChar (const int ch)
107 {
108 return isalpha(ch) || (ch == '$') || (ch == '[') || (ch == ']') || (ch == '{') || (ch == '}');
109 }
110
111
112 /**
113 * ColouriseA68kDoc
114 *
115 * Main function, which colourises a 68k source
116 */
117
118 static void ColouriseA68kDoc (unsigned int startPos, int length, int initStyle, WordList *keywordlists[], Accessor &styler)
119 {
120
121 // Get references to keywords lists
122 WordList &cpuInstruction = *keywordlists[0];
123 WordList &registers = *keywordlists[1];
124 WordList &directive = *keywordlists[2];
125 WordList &extInstruction = *keywordlists[3];
126 WordList &commentSpecial = *keywordlists[4];
127 WordList &doxygenKeyword = *keywordlists[5];
128
129
130 // Instanciate a context for our source
131 StyleContext sc(startPos, length, initStyle, styler);
132
133
134 /************************************************************
135 *
136 * Parse the text
137 *
138 ************************************************************/
139
140 for ( ; sc.More(); sc.Forward())
141 {
142 char Buffer[100];
143 int OpType;
144
145 // Reset style at beginning of line
146 if (sc.atLineStart)
147 sc.SetState(SCE_A68K_DEFAULT);
148
149
150 /************************************************************
151 *
152 * Handle current state if we are not in the "default style"
153 *
154 ************************************************************/
155
156 if (sc.state != SCE_A68K_DEFAULT)
157 {
158 // Check if current style continue.
159 // If this case, we loop because there is nothing else to do
160 if (((sc.state == SCE_A68K_NUMBER_DEC) && isdigit(sc.ch)) // Decimal number
161 || ((sc.state == SCE_A68K_NUMBER_BIN) && IsBin(sc.ch)) // Binary number
162 || ((sc.state == SCE_A68K_NUMBER_HEX) && isxdigit(sc.ch)) // Hexa number
163 || ((sc.state == SCE_A68K_MACRO_ARG) && isdigit(sc.ch)) // Arg of macro
164 || ((sc.state == SCE_A68K_STRING1) && (sc.ch != '\'')) // String single-quoted
165 || ((sc.state == SCE_A68K_STRING2) && (sc.ch != '\"')) // String double-quoted
166 || ((sc.state == SCE_A68K_MACRO_ARG) && isdigit(sc.ch)) // Macro argument
167 // Label. ' ' and '\t' are needed to handle macro declarations
168 || ((sc.state == SCE_A68K_LABEL) && (sc.ch != ':') && (sc.ch != ' ') && (sc.ch != '\t'))
169 || ((sc.state == SCE_A68K_IDENTIFIER) && (sc.ch < 0x80) && IsIdentifierChar(sc.ch)) // Identifier
170 || ((sc.state == SCE_A68K_COMMENT_DOXYGEN) && (sc.ch < 0x80) && IsDoxygenChar(sc.ch)) // Doxygen keyword
171 || ((sc.state == SCE_A68K_COMMENT_WORD) && (sc.ch < 0x80) && isalpha(sc.ch))) // Comment current word
172 {
173 continue;
174 }
175
176 // Check if some states terminate at the current char:
177 // we must include this char in the current style context
178 else if (((sc.state == SCE_A68K_STRING1) && (sc.ch < 0x80) && (sc.ch == '\'')) // String single-quoted
179 || ((sc.state == SCE_A68K_STRING2) && (sc.ch < 0x80) && (sc.ch == '\"')) // String double-quoted
180 || ((sc.state == SCE_A68K_LABEL) && (sc.ch < 0x80) && (sc.ch == ':'))) // Label
181 {
182 sc.ForwardSetState(SCE_A68K_DEFAULT);
183 }
184
185 // Check for special words or Doxygen keywords in comments
186 else if (sc.state == SCE_A68K_COMMENT)
187 {
188 if (sc.ch == '\\') {
189 sc.SetState(SCE_A68K_COMMENT_DOXYGEN);
190 }
191 else if ((sc.ch < 0x80) && isalpha(sc.ch)) {
192 sc.SetState(SCE_A68K_COMMENT_WORD);
193 }
194 continue;
195 }
196
197 // Check for special words in comment
198 else if ((sc.state == SCE_A68K_COMMENT_WORD) && (sc.ch < 0x80) && !isalpha(sc.ch))
199 {
200 sc.GetCurrent(Buffer, sizeof(Buffer));
201 if (commentSpecial.InList(Buffer)) {
202 sc.ChangeState(SCE_A68K_COMMENT_SPECIAL);
203 }
204 else {
205 sc.ChangeState(SCE_A68K_COMMENT);
206 }
207 sc.SetState(SCE_A68K_COMMENT);
208 continue;
209 }
210
211 // Check for Doxygen keywords
212 else if ((sc.state == SCE_A68K_COMMENT_DOXYGEN) && (sc.ch < 0x80) && !IsDoxygenChar(sc.ch))
213 {
214 sc.GetCurrentLowered(Buffer, sizeof(Buffer)); // Buffer the string of the current context
215 if (!doxygenKeyword.InList(Buffer)) {
216 sc.ChangeState(SCE_A68K_COMMENT);
217 }
218 sc.SetState(SCE_A68K_COMMENT);
219 continue;
220 }
221
222 // Check if we are in the case of a label which terminates without ':'
223 // It should be a macro declaration, not a label
224 else if ((sc.state == SCE_A68K_LABEL) && (sc.ch < 0x80) && ((sc.ch == ' ') || (sc.ch == '\t')))
225 {
226 sc.ChangeState(SCE_A68K_MACRO_DECLARATION);
227 }
228
229 // Check if we are at the end of an identifier
230 // In this case, colourise it if was a keyword.
231 else if ((sc.state == SCE_A68K_IDENTIFIER) && !IsIdentifierChar(sc.ch))
232 {
233 sc.GetCurrentLowered(Buffer, sizeof(Buffer)); // Buffer the string of the current context
234 if (cpuInstruction.InList(Buffer)) { // And check if it belongs to a keyword list
235 sc.ChangeState(SCE_A68K_CPUINSTRUCTION);
236 }
237 else if (extInstruction.InList(Buffer)) {
238 sc.ChangeState(SCE_A68K_EXTINSTRUCTION);
239 }
240 else if (registers.InList(Buffer)) {
241 sc.ChangeState(SCE_A68K_REGISTER);
242 }
243 else if (directive.InList(Buffer)) {
244 sc.ChangeState(SCE_A68K_DIRECTIVE);
245 }
246 }
247
248 // All special contexts are now handled.Come back to default style
249 sc.SetState(SCE_A68K_DEFAULT);
250 }
251
252
253 /************************************************************
254 *
255 * Check if we must enter a new state
256 *
257 ************************************************************/
258
259 // Label and macro identifiers start at the beginning of a line
260 // We set both as a label, but if it wasn't one (no ':' at the end),
261 // it will be changed as a macro identifier.
262 if (sc.atLineStart && (sc.ch < 0x80) && IsIdentifierStart(sc.ch)) {
263 sc.SetState(SCE_A68K_LABEL);
264 }
265 else if ((sc.ch < 0x80) && (sc.ch == ';')) { // Comment
266 sc.SetState(SCE_A68K_COMMENT);
267 }
268 else if ((sc.ch < 0x80) && isdigit(sc.ch)) { // Decimal numbers haven't prefix
269 sc.SetState(SCE_A68K_NUMBER_DEC);
270 }
271 else if ((sc.ch < 0x80) && (sc.ch == '%')) { // Binary numbers are prefixed with '%'
272 sc.SetState(SCE_A68K_NUMBER_BIN);
273 }
274 else if ((sc.ch < 0x80) && (sc.ch == '$')) { // Hexadecimal numbers are prefixed with '$'
275 sc.SetState(SCE_A68K_NUMBER_HEX);
276 }
277 else if ((sc.ch < 0x80) && (sc.ch == '\'')) { // String (single-quoted)
278 sc.SetState(SCE_A68K_STRING1);
279 }
280 else if ((sc.ch < 0x80) && (sc.ch == '\"')) { // String (double-quoted)
281 sc.SetState(SCE_A68K_STRING2);
282 }
283 else if ((sc.ch < 0x80) && (sc.ch == '\\') && (isdigit(sc.chNext))) { // Replacement symbols in macro
284 sc.SetState(SCE_A68K_MACRO_ARG);
285 }
286 else if ((sc.ch < 0x80) && IsIdentifierStart(sc.ch)) { // An identifier: constant, label, etc...
287 sc.SetState(SCE_A68K_IDENTIFIER);
288 }
289 else {
290 if (sc.ch < 0x80) {
291 OpType = GetOperatorType(sc.ch, sc.chNext); // Check if current char is an operator
292 if (OpType != NO_OPERATOR) {
293 sc.SetState(SCE_A68K_OPERATOR);
294 if (OpType == OPERATOR_2CHAR) { // Check if the operator is 2 bytes long
295 sc.ForwardSetState(SCE_A68K_OPERATOR); // (>> or <<)
296 }
297 }
298 }
299 }
300 } // End of for()
301 sc.Complete();
302 }
303
304
305 // Names of the keyword lists
306
307 static const char * const a68kWordListDesc[] =
308 {
309 "CPU instructions",
310 "Registers",
311 "Directives",
312 "Extended instructions",
313 "Comment special words",
314 "Doxygen keywords",
315 0
316 };
317
318 LexerModule lmA68k(SCLEX_A68K, ColouriseA68kDoc, "a68k", 0, a68kWordListDesc);