]>
Commit | Line | Data |
---|---|---|
1dcf666d RD |
1 | // Scintilla source code edit control |
2 | /** @file LexA68k.cxx | |
3 | ** Lexer for Assembler, just for the MASM syntax | |
4 | ** Written by Martial Demolins AKA Folco | |
5 | **/ | |
6 | // Copyright 2010 Martial Demolins <mdemolins(a)gmail.com> | |
7 | // The License.txt file describes the conditions under which this software | |
8 | // may be distributed. | |
9 | ||
10 | ||
11 | #include <stdlib.h> | |
12 | #include <string.h> | |
13 | #include <stdio.h> | |
14 | #include <stdarg.h> | |
15 | #include <assert.h> | |
16 | #include <ctype.h> | |
17 | ||
18 | #include "ILexer.h" | |
19 | #include "Scintilla.h" | |
20 | #include "SciLexer.h" | |
21 | ||
22 | #include "WordList.h" | |
23 | #include "LexAccessor.h" | |
24 | #include "Accessor.h" | |
25 | #include "StyleContext.h" | |
26 | #include "CharacterSet.h" | |
27 | #include "LexerModule.h" | |
28 | ||
29 | #ifdef SCI_NAMESPACE | |
30 | using namespace Scintilla; | |
31 | #endif | |
32 | ||
33 | ||
34 | // Return values for GetOperatorType | |
35 | #define NO_OPERATOR 0 | |
36 | #define OPERATOR_1CHAR 1 | |
37 | #define OPERATOR_2CHAR 2 | |
38 | ||
39 | ||
40 | /** | |
41 | * IsIdentifierStart | |
42 | * | |
43 | * Return true if the given char is a valid identifier first char | |
44 | */ | |
45 | ||
46 | static inline bool IsIdentifierStart (const int ch) | |
47 | { | |
48 | return (isalpha(ch) || (ch == '_') || (ch == '\\')); | |
49 | } | |
50 | ||
51 | ||
52 | /** | |
53 | * IsIdentifierChar | |
54 | * | |
55 | * Return true if the given char is a valid identifier char | |
56 | */ | |
57 | ||
58 | static inline bool IsIdentifierChar (const int ch) | |
59 | { | |
60 | return (isalnum(ch) || (ch == '_') || (ch == '@') || (ch == ':') || (ch == '.')); | |
61 | } | |
62 | ||
63 | ||
64 | /** | |
65 | * GetOperatorType | |
66 | * | |
67 | * Return: | |
68 | * NO_OPERATOR if char is not an operator | |
69 | * OPERATOR_1CHAR if the operator is one char long | |
70 | * OPERATOR_2CHAR if the operator is two chars long | |
71 | */ | |
72 | ||
73 | static inline int GetOperatorType (const int ch1, const int ch2) | |
74 | { | |
75 | int OpType = NO_OPERATOR; | |
76 | ||
77 | if ((ch1 == '+') || (ch1 == '-') || (ch1 == '*') || (ch1 == '/') || (ch1 == '#') || | |
78 | (ch1 == '(') || (ch1 == ')') || (ch1 == '~') || (ch1 == '&') || (ch1 == '|') || (ch1 == ',')) | |
79 | OpType = OPERATOR_1CHAR; | |
80 | ||
81 | else if ((ch1 == ch2) && (ch1 == '<' || ch1 == '>')) | |
82 | OpType = OPERATOR_2CHAR; | |
83 | ||
84 | return OpType; | |
85 | } | |
86 | ||
87 | ||
88 | /** | |
89 | * IsBin | |
90 | * | |
91 | * Return true if the given char is 0 or 1 | |
92 | */ | |
93 | ||
94 | static inline bool IsBin (const int ch) | |
95 | { | |
96 | return (ch == '0') || (ch == '1'); | |
97 | } | |
98 | ||
99 | ||
100 | /** | |
101 | * IsDoxygenChar | |
102 | * | |
103 | * Return true if the char may be part of a Doxygen keyword | |
104 | */ | |
105 | ||
106 | static inline bool IsDoxygenChar (const int ch) | |
107 | { | |
108 | return isalpha(ch) || (ch == '$') || (ch == '[') || (ch == ']') || (ch == '{') || (ch == '}'); | |
109 | } | |
110 | ||
111 | ||
112 | /** | |
113 | * ColouriseA68kDoc | |
114 | * | |
115 | * Main function, which colourises a 68k source | |
116 | */ | |
117 | ||
118 | static void ColouriseA68kDoc (unsigned int startPos, int length, int initStyle, WordList *keywordlists[], Accessor &styler) | |
119 | { | |
120 | ||
121 | // Get references to keywords lists | |
122 | WordList &cpuInstruction = *keywordlists[0]; | |
123 | WordList ®isters = *keywordlists[1]; | |
124 | WordList &directive = *keywordlists[2]; | |
125 | WordList &extInstruction = *keywordlists[3]; | |
126 | WordList &commentSpecial = *keywordlists[4]; | |
127 | WordList &doxygenKeyword = *keywordlists[5]; | |
128 | ||
129 | ||
130 | // Instanciate a context for our source | |
131 | StyleContext sc(startPos, length, initStyle, styler); | |
132 | ||
133 | ||
134 | /************************************************************ | |
135 | * | |
136 | * Parse the text | |
137 | * | |
138 | ************************************************************/ | |
139 | ||
140 | for ( ; sc.More(); sc.Forward()) | |
141 | { | |
142 | char Buffer[100]; | |
143 | int OpType; | |
144 | ||
145 | // Reset style at beginning of line | |
146 | if (sc.atLineStart) | |
147 | sc.SetState(SCE_A68K_DEFAULT); | |
148 | ||
149 | ||
150 | /************************************************************ | |
151 | * | |
152 | * Handle current state if we are not in the "default style" | |
153 | * | |
154 | ************************************************************/ | |
155 | ||
156 | if (sc.state != SCE_A68K_DEFAULT) | |
157 | { | |
158 | // Check if current style continue. | |
159 | // If this case, we loop because there is nothing else to do | |
160 | if (((sc.state == SCE_A68K_NUMBER_DEC) && isdigit(sc.ch)) // Decimal number | |
161 | || ((sc.state == SCE_A68K_NUMBER_BIN) && IsBin(sc.ch)) // Binary number | |
162 | || ((sc.state == SCE_A68K_NUMBER_HEX) && isxdigit(sc.ch)) // Hexa number | |
163 | || ((sc.state == SCE_A68K_MACRO_ARG) && isdigit(sc.ch)) // Arg of macro | |
164 | || ((sc.state == SCE_A68K_STRING1) && (sc.ch != '\'')) // String single-quoted | |
165 | || ((sc.state == SCE_A68K_STRING2) && (sc.ch != '\"')) // String double-quoted | |
166 | || ((sc.state == SCE_A68K_MACRO_ARG) && isdigit(sc.ch)) // Macro argument | |
167 | // Label. ' ' and '\t' are needed to handle macro declarations | |
168 | || ((sc.state == SCE_A68K_LABEL) && (sc.ch != ':') && (sc.ch != ' ') && (sc.ch != '\t')) | |
169 | || ((sc.state == SCE_A68K_IDENTIFIER) && (sc.ch < 0x80) && IsIdentifierChar(sc.ch)) // Identifier | |
170 | || ((sc.state == SCE_A68K_COMMENT_DOXYGEN) && (sc.ch < 0x80) && IsDoxygenChar(sc.ch)) // Doxygen keyword | |
171 | || ((sc.state == SCE_A68K_COMMENT_WORD) && (sc.ch < 0x80) && isalpha(sc.ch))) // Comment current word | |
172 | { | |
173 | continue; | |
174 | } | |
175 | ||
176 | // Check if some states terminate at the current char: | |
177 | // we must include this char in the current style context | |
178 | else if (((sc.state == SCE_A68K_STRING1) && (sc.ch < 0x80) && (sc.ch == '\'')) // String single-quoted | |
179 | || ((sc.state == SCE_A68K_STRING2) && (sc.ch < 0x80) && (sc.ch == '\"')) // String double-quoted | |
180 | || ((sc.state == SCE_A68K_LABEL) && (sc.ch < 0x80) && (sc.ch == ':'))) // Label | |
181 | { | |
182 | sc.ForwardSetState(SCE_A68K_DEFAULT); | |
183 | } | |
184 | ||
185 | // Check for special words or Doxygen keywords in comments | |
186 | else if (sc.state == SCE_A68K_COMMENT) | |
187 | { | |
188 | if (sc.ch == '\\') { | |
189 | sc.SetState(SCE_A68K_COMMENT_DOXYGEN); | |
190 | } | |
191 | else if ((sc.ch < 0x80) && isalpha(sc.ch)) { | |
192 | sc.SetState(SCE_A68K_COMMENT_WORD); | |
193 | } | |
194 | continue; | |
195 | } | |
196 | ||
197 | // Check for special words in comment | |
198 | else if ((sc.state == SCE_A68K_COMMENT_WORD) && (sc.ch < 0x80) && !isalpha(sc.ch)) | |
199 | { | |
200 | sc.GetCurrent(Buffer, sizeof(Buffer)); | |
201 | if (commentSpecial.InList(Buffer)) { | |
202 | sc.ChangeState(SCE_A68K_COMMENT_SPECIAL); | |
203 | } | |
204 | else { | |
205 | sc.ChangeState(SCE_A68K_COMMENT); | |
206 | } | |
207 | sc.SetState(SCE_A68K_COMMENT); | |
208 | continue; | |
209 | } | |
210 | ||
211 | // Check for Doxygen keywords | |
212 | else if ((sc.state == SCE_A68K_COMMENT_DOXYGEN) && (sc.ch < 0x80) && !IsDoxygenChar(sc.ch)) | |
213 | { | |
214 | sc.GetCurrentLowered(Buffer, sizeof(Buffer)); // Buffer the string of the current context | |
215 | if (!doxygenKeyword.InList(Buffer)) { | |
216 | sc.ChangeState(SCE_A68K_COMMENT); | |
217 | } | |
218 | sc.SetState(SCE_A68K_COMMENT); | |
219 | continue; | |
220 | } | |
221 | ||
222 | // Check if we are in the case of a label which terminates without ':' | |
223 | // It should be a macro declaration, not a label | |
224 | else if ((sc.state == SCE_A68K_LABEL) && (sc.ch < 0x80) && ((sc.ch == ' ') || (sc.ch == '\t'))) | |
225 | { | |
226 | sc.ChangeState(SCE_A68K_MACRO_DECLARATION); | |
227 | } | |
228 | ||
229 | // Check if we are at the end of an identifier | |
230 | // In this case, colourise it if was a keyword. | |
231 | else if ((sc.state == SCE_A68K_IDENTIFIER) && !IsIdentifierChar(sc.ch)) | |
232 | { | |
233 | sc.GetCurrentLowered(Buffer, sizeof(Buffer)); // Buffer the string of the current context | |
234 | if (cpuInstruction.InList(Buffer)) { // And check if it belongs to a keyword list | |
235 | sc.ChangeState(SCE_A68K_CPUINSTRUCTION); | |
236 | } | |
237 | else if (extInstruction.InList(Buffer)) { | |
238 | sc.ChangeState(SCE_A68K_EXTINSTRUCTION); | |
239 | } | |
240 | else if (registers.InList(Buffer)) { | |
241 | sc.ChangeState(SCE_A68K_REGISTER); | |
242 | } | |
243 | else if (directive.InList(Buffer)) { | |
244 | sc.ChangeState(SCE_A68K_DIRECTIVE); | |
245 | } | |
246 | } | |
247 | ||
248 | // All special contexts are now handled.Come back to default style | |
249 | sc.SetState(SCE_A68K_DEFAULT); | |
250 | } | |
251 | ||
252 | ||
253 | /************************************************************ | |
254 | * | |
255 | * Check if we must enter a new state | |
256 | * | |
257 | ************************************************************/ | |
258 | ||
259 | // Label and macro identifiers start at the beginning of a line | |
260 | // We set both as a label, but if it wasn't one (no ':' at the end), | |
261 | // it will be changed as a macro identifier. | |
262 | if (sc.atLineStart && (sc.ch < 0x80) && IsIdentifierStart(sc.ch)) { | |
263 | sc.SetState(SCE_A68K_LABEL); | |
264 | } | |
265 | else if ((sc.ch < 0x80) && (sc.ch == ';')) { // Comment | |
266 | sc.SetState(SCE_A68K_COMMENT); | |
267 | } | |
268 | else if ((sc.ch < 0x80) && isdigit(sc.ch)) { // Decimal numbers haven't prefix | |
269 | sc.SetState(SCE_A68K_NUMBER_DEC); | |
270 | } | |
271 | else if ((sc.ch < 0x80) && (sc.ch == '%')) { // Binary numbers are prefixed with '%' | |
272 | sc.SetState(SCE_A68K_NUMBER_BIN); | |
273 | } | |
274 | else if ((sc.ch < 0x80) && (sc.ch == '$')) { // Hexadecimal numbers are prefixed with '$' | |
275 | sc.SetState(SCE_A68K_NUMBER_HEX); | |
276 | } | |
277 | else if ((sc.ch < 0x80) && (sc.ch == '\'')) { // String (single-quoted) | |
278 | sc.SetState(SCE_A68K_STRING1); | |
279 | } | |
280 | else if ((sc.ch < 0x80) && (sc.ch == '\"')) { // String (double-quoted) | |
281 | sc.SetState(SCE_A68K_STRING2); | |
282 | } | |
283 | else if ((sc.ch < 0x80) && (sc.ch == '\\') && (isdigit(sc.chNext))) { // Replacement symbols in macro | |
284 | sc.SetState(SCE_A68K_MACRO_ARG); | |
285 | } | |
286 | else if ((sc.ch < 0x80) && IsIdentifierStart(sc.ch)) { // An identifier: constant, label, etc... | |
287 | sc.SetState(SCE_A68K_IDENTIFIER); | |
288 | } | |
289 | else { | |
290 | if (sc.ch < 0x80) { | |
291 | OpType = GetOperatorType(sc.ch, sc.chNext); // Check if current char is an operator | |
292 | if (OpType != NO_OPERATOR) { | |
293 | sc.SetState(SCE_A68K_OPERATOR); | |
294 | if (OpType == OPERATOR_2CHAR) { // Check if the operator is 2 bytes long | |
295 | sc.ForwardSetState(SCE_A68K_OPERATOR); // (>> or <<) | |
296 | } | |
297 | } | |
298 | } | |
299 | } | |
300 | } // End of for() | |
301 | sc.Complete(); | |
302 | } | |
303 | ||
304 | ||
305 | // Names of the keyword lists | |
306 | ||
307 | static const char * const a68kWordListDesc[] = | |
308 | { | |
309 | "CPU instructions", | |
310 | "Registers", | |
311 | "Directives", | |
312 | "Extended instructions", | |
313 | "Comment special words", | |
314 | "Doxygen keywords", | |
315 | 0 | |
316 | }; | |
317 | ||
318 | LexerModule lmA68k(SCLEX_A68K, ColouriseA68kDoc, "a68k", 0, a68kWordListDesc); |