]>
Commit | Line | Data |
---|---|---|
9e96e16f RD |
1 | // Scintilla source code edit control |
2 | /** @file LexCOBOL.cxx | |
3 | ** Lexer for COBOL | |
4 | ** Based on LexPascal.cxx | |
5 | ** Written by Laurent le Tynevez | |
6 | ** Updated by Simon Steele <s.steele@pnotepad.org> September 2002 | |
7 | ** Updated by Mathias Rauen <scite@madshi.net> May 2003 (Delphi adjustments) | |
8 | ** Updated by Rod Falck, Aug 2006 Converted to COBOL | |
9 | **/ | |
10 | ||
11 | #include <stdlib.h> | |
12 | #include <string.h> | |
9e96e16f RD |
13 | #include <stdio.h> |
14 | #include <stdarg.h> | |
1dcf666d RD |
15 | #include <assert.h> |
16 | #include <ctype.h> | |
9e96e16f | 17 | |
1dcf666d | 18 | #include "ILexer.h" |
9e96e16f RD |
19 | #include "Scintilla.h" |
20 | #include "SciLexer.h" | |
1dcf666d RD |
21 | |
22 | #include "WordList.h" | |
23 | #include "LexAccessor.h" | |
24 | #include "Accessor.h" | |
9e96e16f | 25 | #include "StyleContext.h" |
1dcf666d RD |
26 | #include "CharacterSet.h" |
27 | #include "LexerModule.h" | |
9e96e16f RD |
28 | |
29 | #ifdef SCI_NAMESPACE | |
30 | using namespace Scintilla; | |
31 | #endif | |
32 | ||
33 | #define IN_DIVISION 0x01 | |
34 | #define IN_DECLARATIVES 0x02 | |
35 | #define IN_SECTION 0x04 | |
36 | #define IN_PARAGRAPH 0x08 | |
37 | #define IN_FLAGS 0xF | |
38 | #define NOT_HEADER 0x10 | |
39 | ||
40 | inline bool isCOBOLoperator(char ch) | |
41 | { | |
42 | return isoperator(ch); | |
43 | } | |
44 | ||
45 | inline bool isCOBOLwordchar(char ch) | |
46 | { | |
47 | return isascii(ch) && (isalnum(ch) || ch == '-'); | |
48 | ||
49 | } | |
50 | ||
51 | inline bool isCOBOLwordstart(char ch) | |
52 | { | |
53 | return isascii(ch) && isalnum(ch); | |
54 | } | |
55 | ||
56 | static int CountBits(int nBits) | |
57 | { | |
58 | int count = 0; | |
59 | for (int i = 0; i < 32; ++i) | |
60 | { | |
61 | count += nBits & 1; | |
62 | nBits >>= 1; | |
63 | } | |
64 | return count; | |
65 | } | |
66 | ||
67 | static void getRange(unsigned int start, | |
68 | unsigned int end, | |
69 | Accessor &styler, | |
70 | char *s, | |
71 | unsigned int len) { | |
72 | unsigned int i = 0; | |
73 | while ((i < end - start + 1) && (i < len-1)) { | |
74 | s[i] = static_cast<char>(tolower(styler[start + i])); | |
75 | i++; | |
76 | } | |
77 | s[i] = '\0'; | |
78 | } | |
79 | ||
80 | static void ColourTo(Accessor &styler, unsigned int end, unsigned int attr) { | |
81 | styler.ColourTo(end, attr); | |
82 | } | |
83 | ||
84 | ||
85 | static int classifyWordCOBOL(unsigned int start, unsigned int end, /*WordList &keywords*/WordList *keywordlists[], Accessor &styler, int nContainment, bool *bAarea) { | |
86 | int ret = 0; | |
87 | ||
88 | WordList& a_keywords = *keywordlists[0]; | |
89 | WordList& b_keywords = *keywordlists[1]; | |
90 | WordList& c_keywords = *keywordlists[2]; | |
91 | ||
92 | char s[100]; | |
93 | getRange(start, end, styler, s, sizeof(s)); | |
94 | ||
95 | char chAttr = SCE_C_IDENTIFIER; | |
1dcf666d | 96 | if (isdigit(s[0]) || (s[0] == '.') || (s[0] == 'v')) { |
9e96e16f RD |
97 | chAttr = SCE_C_NUMBER; |
98 | char *p = s + 1; | |
99 | while (*p) { | |
1dcf666d | 100 | if ((!isdigit(*p) && (*p) != 'v') && isCOBOLwordchar(*p)) { |
9e96e16f RD |
101 | chAttr = SCE_C_IDENTIFIER; |
102 | break; | |
103 | } | |
104 | ++p; | |
105 | } | |
106 | } | |
107 | else { | |
108 | if (a_keywords.InList(s)) { | |
109 | chAttr = SCE_C_WORD; | |
110 | } | |
111 | else if (b_keywords.InList(s)) { | |
112 | chAttr = SCE_C_WORD2; | |
113 | } | |
114 | else if (c_keywords.InList(s)) { | |
115 | chAttr = SCE_C_UUID; | |
116 | } | |
117 | } | |
118 | if (*bAarea) { | |
119 | if (strcmp(s, "division") == 0) { | |
120 | ret = IN_DIVISION; | |
121 | // we've determined the containment, anything else is just ignored for those purposes | |
122 | *bAarea = false; | |
123 | } else if (strcmp(s, "declaratives") == 0) { | |
124 | ret = IN_DIVISION | IN_DECLARATIVES; | |
125 | if (nContainment & IN_DECLARATIVES) | |
126 | ret |= NOT_HEADER | IN_SECTION; | |
127 | // we've determined the containment, anything else is just ignored for those purposes | |
128 | *bAarea = false; | |
129 | } else if (strcmp(s, "section") == 0) { | |
130 | ret = (nContainment &~ IN_PARAGRAPH) | IN_SECTION; | |
131 | // we've determined the containment, anything else is just ignored for those purposes | |
132 | *bAarea = false; | |
133 | } else if (strcmp(s, "end") == 0 && (nContainment & IN_DECLARATIVES)) { | |
134 | ret = IN_DIVISION | IN_DECLARATIVES | IN_SECTION | NOT_HEADER; | |
135 | } else { | |
136 | ret = nContainment | IN_PARAGRAPH; | |
137 | } | |
138 | } | |
139 | ColourTo(styler, end, chAttr); | |
140 | return ret; | |
141 | } | |
142 | ||
143 | static void ColouriseCOBOLDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[], | |
144 | Accessor &styler) { | |
145 | ||
146 | styler.StartAt(startPos); | |
147 | ||
148 | int state = initStyle; | |
149 | if (state == SCE_C_CHARACTER) // Does not leak onto next line | |
150 | state = SCE_C_DEFAULT; | |
151 | char chPrev = ' '; | |
152 | char chNext = styler[startPos]; | |
153 | unsigned int lengthDoc = startPos + length; | |
154 | ||
155 | int nContainment; | |
156 | ||
157 | int currentLine = styler.GetLine(startPos); | |
158 | if (currentLine > 0) { | |
159 | styler.SetLineState(currentLine, styler.GetLineState(currentLine-1)); | |
160 | nContainment = styler.GetLineState(currentLine); | |
161 | nContainment &= ~NOT_HEADER; | |
162 | } else { | |
163 | styler.SetLineState(currentLine, 0); | |
164 | nContainment = 0; | |
165 | } | |
166 | ||
167 | styler.StartSegment(startPos); | |
168 | bool bNewLine = true; | |
169 | bool bAarea = !isspacechar(chNext); | |
170 | int column = 0; | |
171 | for (unsigned int i = startPos; i < lengthDoc; i++) { | |
172 | char ch = chNext; | |
173 | ||
174 | chNext = styler.SafeGetCharAt(i + 1); | |
175 | ||
176 | ++column; | |
177 | ||
178 | if (bNewLine) { | |
179 | column = 0; | |
180 | } | |
181 | if (column <= 1 && !bAarea) { | |
182 | bAarea = !isspacechar(ch); | |
183 | } | |
184 | bool bSetNewLine = false; | |
185 | if ((ch == '\r' && chNext != '\n') || (ch == '\n')) { | |
186 | // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix) | |
187 | // Avoid triggering two times on Dos/Win | |
188 | // End of line | |
189 | if (state == SCE_C_CHARACTER) { | |
190 | ColourTo(styler, i, state); | |
191 | state = SCE_C_DEFAULT; | |
192 | } | |
193 | styler.SetLineState(currentLine, nContainment); | |
194 | currentLine++; | |
195 | bSetNewLine = true; | |
196 | if (nContainment & NOT_HEADER) | |
197 | nContainment &= ~(NOT_HEADER | IN_DECLARATIVES | IN_SECTION); | |
198 | } | |
199 | ||
200 | if (styler.IsLeadByte(ch)) { | |
201 | chNext = styler.SafeGetCharAt(i + 2); | |
202 | chPrev = ' '; | |
203 | i += 1; | |
204 | continue; | |
205 | } | |
206 | ||
207 | if (state == SCE_C_DEFAULT) { | |
1dcf666d | 208 | if (isCOBOLwordstart(ch) || (ch == '$' && isascii(chNext) && isalpha(chNext))) { |
9e96e16f RD |
209 | ColourTo(styler, i-1, state); |
210 | state = SCE_C_IDENTIFIER; | |
1dcf666d RD |
211 | } else if (column == 6 && ch == '*') { |
212 | // Cobol comment line: asterisk in column 7. | |
213 | ColourTo(styler, i-1, state); | |
214 | state = SCE_C_COMMENTLINE; | |
215 | } else if (ch == '*' && chNext == '>') { | |
216 | // Cobol inline comment: asterisk, followed by greater than. | |
217 | ColourTo(styler, i-1, state); | |
218 | state = SCE_C_COMMENTLINE; | |
9e96e16f RD |
219 | } else if (column == 0 && ch == '*' && chNext != '*') { |
220 | ColourTo(styler, i-1, state); | |
221 | state = SCE_C_COMMENTLINE; | |
222 | } else if (column == 0 && ch == '/' && chNext != '*') { | |
223 | ColourTo(styler, i-1, state); | |
224 | state = SCE_C_COMMENTLINE; | |
225 | } else if (column == 0 && ch == '*' && chNext == '*') { | |
226 | ColourTo(styler, i-1, state); | |
227 | state = SCE_C_COMMENTDOC; | |
228 | } else if (column == 0 && ch == '/' && chNext == '*') { | |
229 | ColourTo(styler, i-1, state); | |
230 | state = SCE_C_COMMENTDOC; | |
231 | } else if (ch == '"') { | |
232 | ColourTo(styler, i-1, state); | |
233 | state = SCE_C_STRING; | |
234 | } else if (ch == '\'') { | |
235 | ColourTo(styler, i-1, state); | |
236 | state = SCE_C_CHARACTER; | |
237 | } else if (ch == '?' && column == 0) { | |
238 | ColourTo(styler, i-1, state); | |
239 | state = SCE_C_PREPROCESSOR; | |
240 | } else if (isCOBOLoperator(ch)) { | |
241 | ColourTo(styler, i-1, state); | |
242 | ColourTo(styler, i, SCE_C_OPERATOR); | |
243 | } | |
244 | } else if (state == SCE_C_IDENTIFIER) { | |
245 | if (!isCOBOLwordchar(ch)) { | |
246 | int lStateChange = classifyWordCOBOL(styler.GetStartSegment(), i - 1, keywordlists, styler, nContainment, &bAarea); | |
247 | ||
248 | if(lStateChange != 0) { | |
249 | styler.SetLineState(currentLine, lStateChange); | |
250 | nContainment = lStateChange; | |
251 | } | |
252 | ||
253 | state = SCE_C_DEFAULT; | |
254 | chNext = styler.SafeGetCharAt(i + 1); | |
255 | if (ch == '"') { | |
256 | state = SCE_C_STRING; | |
257 | } else if (ch == '\'') { | |
258 | state = SCE_C_CHARACTER; | |
259 | } else if (isCOBOLoperator(ch)) { | |
260 | ColourTo(styler, i, SCE_C_OPERATOR); | |
261 | } | |
262 | } | |
263 | } else { | |
264 | if (state == SCE_C_PREPROCESSOR) { | |
265 | if ((ch == '\r' || ch == '\n') && !(chPrev == '\\' || chPrev == '\r')) { | |
266 | ColourTo(styler, i-1, state); | |
267 | state = SCE_C_DEFAULT; | |
268 | } | |
269 | } else if (state == SCE_C_COMMENT) { | |
270 | if (ch == '\r' || ch == '\n') { | |
271 | ColourTo(styler, i, state); | |
272 | state = SCE_C_DEFAULT; | |
273 | } | |
274 | } else if (state == SCE_C_COMMENTDOC) { | |
275 | if (ch == '\r' || ch == '\n') { | |
276 | if (((i > styler.GetStartSegment() + 2) || ( | |
277 | (initStyle == SCE_C_COMMENTDOC) && | |
278 | (styler.GetStartSegment() == static_cast<unsigned int>(startPos))))) { | |
279 | ColourTo(styler, i, state); | |
280 | state = SCE_C_DEFAULT; | |
281 | } | |
282 | } | |
283 | } else if (state == SCE_C_COMMENTLINE) { | |
284 | if (ch == '\r' || ch == '\n') { | |
285 | ColourTo(styler, i-1, state); | |
286 | state = SCE_C_DEFAULT; | |
287 | } | |
288 | } else if (state == SCE_C_STRING) { | |
289 | if (ch == '"') { | |
290 | ColourTo(styler, i, state); | |
291 | state = SCE_C_DEFAULT; | |
292 | } | |
293 | } else if (state == SCE_C_CHARACTER) { | |
294 | if (ch == '\'') { | |
295 | ColourTo(styler, i, state); | |
296 | state = SCE_C_DEFAULT; | |
297 | } | |
298 | } | |
299 | } | |
300 | chPrev = ch; | |
301 | bNewLine = bSetNewLine; | |
302 | if (bNewLine) | |
303 | { | |
304 | bAarea = false; | |
305 | } | |
306 | } | |
307 | ColourTo(styler, lengthDoc - 1, state); | |
308 | } | |
309 | ||
310 | static void FoldCOBOLDoc(unsigned int startPos, int length, int, WordList *[], | |
311 | Accessor &styler) { | |
312 | bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0; | |
313 | unsigned int endPos = startPos + length; | |
314 | int visibleChars = 0; | |
315 | int lineCurrent = styler.GetLine(startPos); | |
316 | int levelPrev = lineCurrent > 0 ? styler.LevelAt(lineCurrent - 1) & SC_FOLDLEVELNUMBERMASK : 0xFFF; | |
317 | char chNext = styler[startPos]; | |
318 | ||
319 | bool bNewLine = true; | |
320 | bool bAarea = !isspacechar(chNext); | |
321 | int column = 0; | |
322 | bool bComment = false; | |
323 | for (unsigned int i = startPos; i < endPos; i++) { | |
324 | char ch = chNext; | |
325 | chNext = styler.SafeGetCharAt(i + 1); | |
326 | ++column; | |
327 | ||
328 | if (bNewLine) { | |
329 | column = 0; | |
330 | bComment = (ch == '*' || ch == '/' || ch == '?'); | |
331 | } | |
332 | if (column <= 1 && !bAarea) { | |
333 | bAarea = !isspacechar(ch); | |
334 | } | |
335 | bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); | |
336 | if (atEOL) { | |
337 | int nContainment = styler.GetLineState(lineCurrent); | |
338 | int lev = CountBits(nContainment & IN_FLAGS) | SC_FOLDLEVELBASE; | |
339 | if (bAarea && !bComment) | |
340 | --lev; | |
341 | if (visibleChars == 0 && foldCompact) | |
342 | lev |= SC_FOLDLEVELWHITEFLAG; | |
343 | if ((bAarea) && (visibleChars > 0) && !(nContainment & NOT_HEADER) && !bComment) | |
344 | lev |= SC_FOLDLEVELHEADERFLAG; | |
345 | if (lev != styler.LevelAt(lineCurrent)) { | |
346 | styler.SetLevel(lineCurrent, lev); | |
347 | } | |
348 | if ((lev & SC_FOLDLEVELNUMBERMASK) <= (levelPrev & SC_FOLDLEVELNUMBERMASK)) { | |
349 | // this level is at the same level or less than the previous line | |
350 | // therefore these is nothing for the previous header to collapse, so remove the header | |
351 | styler.SetLevel(lineCurrent - 1, levelPrev & ~SC_FOLDLEVELHEADERFLAG); | |
352 | } | |
353 | levelPrev = lev; | |
354 | visibleChars = 0; | |
355 | bAarea = false; | |
356 | bNewLine = true; | |
357 | lineCurrent++; | |
358 | } else { | |
359 | bNewLine = false; | |
360 | } | |
361 | ||
362 | ||
363 | if (!isspacechar(ch)) | |
364 | visibleChars++; | |
365 | } | |
366 | ||
367 | // Fill in the real level of the next line, keeping the current flags as they will be filled in later | |
368 | int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK; | |
369 | styler.SetLevel(lineCurrent, levelPrev | flagsNext); | |
370 | } | |
371 | ||
372 | static const char * const COBOLWordListDesc[] = { | |
373 | "A Keywords", | |
374 | "B Keywords", | |
375 | "Extended Keywords", | |
376 | 0 | |
377 | }; | |
378 | ||
379 | LexerModule lmCOBOL(SCLEX_COBOL, ColouriseCOBOLDoc, "COBOL", FoldCOBOLDoc, COBOLWordListDesc); |