git.saurik.com Git - wxWidgets.git/blame - src/stc/scintilla/lexers/LexCOBOL.cxx

Commit	Line	Data
9e96e16f RD	1	// Scintilla source code edit control
	2	/** @file LexCOBOL.cxx
	3	** Lexer for COBOL
	4	** Based on LexPascal.cxx
	5	** Written by Laurent le Tynevez
	6	** Updated by Simon Steele <s.steele@pnotepad.org> September 2002
	7	** Updated by Mathias Rauen <scite@madshi.net> May 2003 (Delphi adjustments)
	8	** Updated by Rod Falck, Aug 2006 Converted to COBOL
	9	**/
	10
	11	#include <stdlib.h>
	12	#include <string.h>
9e96e16f RD	13	#include <stdio.h>
9e96e16f RD	14	#include <stdarg.h>
1dcf666d RD	15	#include <assert.h>
1dcf666d RD	16	#include <ctype.h>
9e96e16f	17
1dcf666d	18	#include "ILexer.h"
9e96e16f RD	19	#include "Scintilla.h"
9e96e16f RD	20	#include "SciLexer.h"
1dcf666d RD	21
	22	#include "WordList.h"
	23	#include "LexAccessor.h"
	24	#include "Accessor.h"
9e96e16f	25	#include "StyleContext.h"
1dcf666d RD	26	#include "CharacterSet.h"
1dcf666d RD	27	#include "LexerModule.h"
9e96e16f RD	28
	29	#ifdef SCI_NAMESPACE
	30	using namespace Scintilla;
	31	#endif
	32
	33	#define IN_DIVISION 0x01
	34	#define IN_DECLARATIVES 0x02
	35	#define IN_SECTION 0x04
	36	#define IN_PARAGRAPH 0x08
	37	#define IN_FLAGS 0xF
	38	#define NOT_HEADER 0x10
	39
	40	inline bool isCOBOLoperator(char ch)
	41	{
	42	return isoperator(ch);
	43	}
	44
	45	inline bool isCOBOLwordchar(char ch)
	46	{
	47	return isascii(ch) && (isalnum(ch) \|\| ch == '-');
	48
	49	}
	50
	51	inline bool isCOBOLwordstart(char ch)
	52	{
	53	return isascii(ch) && isalnum(ch);
	54	}
	55
	56	static int CountBits(int nBits)
	57	{
	58	int count = 0;
	59	for (int i = 0; i < 32; ++i)
	60	{
	61	count += nBits & 1;
	62	nBits >>= 1;
	63	}
	64	return count;
	65	}
	66
	67	static void getRange(unsigned int start,
	68	unsigned int end,
	69	Accessor &styler,
	70	char *s,
	71	unsigned int len) {
	72	unsigned int i = 0;
	73	while ((i < end - start + 1) && (i < len-1)) {
	74	s[i] = static_cast<char>(tolower(styler[start + i]));
	75	i++;
	76	}
	77	s[i] = '\0';
	78	}
	79
	80	static void ColourTo(Accessor &styler, unsigned int end, unsigned int attr) {
	81	styler.ColourTo(end, attr);
	82	}
	83
	84
	85	static int classifyWordCOBOL(unsigned int start, unsigned int end, /WordList &keywords/WordList keywordlists[], Accessor &styler, int nContainment, bool bAarea) {
	86	int ret = 0;
	87
	88	WordList& a_keywords = *keywordlists[0];
	89	WordList& b_keywords = *keywordlists[1];
	90	WordList& c_keywords = *keywordlists[2];
	91
92	char s[100];
93	getRange(start, end, styler, s, sizeof(s));
94
95	char chAttr = SCE_C_IDENTIFIER;
1dcf666d	96	if (isdigit(s[0]) \|\| (s[0] == '.') \|\| (s[0] == 'v')) {
9e96e16f RD	97	chAttr = SCE_C_NUMBER;
	98	char *p = s + 1;
	99	while (*p) {
1dcf666d	100	if ((!isdigit(p) && (p) != 'v') && isCOBOLwordchar(*p)) {
9e96e16f RD	101	chAttr = SCE_C_IDENTIFIER;
	102	break;
	103	}
	104	++p;
	105	}
	106	}
	107	else {
	108	if (a_keywords.InList(s)) {
	109	chAttr = SCE_C_WORD;
	110	}
	111	else if (b_keywords.InList(s)) {
	112	chAttr = SCE_C_WORD2;
	113	}
	114	else if (c_keywords.InList(s)) {
	115	chAttr = SCE_C_UUID;
	116	}
	117	}
	118	if (*bAarea) {
	119	if (strcmp(s, "division") == 0) {
	120	ret = IN_DIVISION;
	121	// we've determined the containment, anything else is just ignored for those purposes
	122	*bAarea = false;
	123	} else if (strcmp(s, "declaratives") == 0) {
	124	ret = IN_DIVISION \| IN_DECLARATIVES;
	125	if (nContainment & IN_DECLARATIVES)
	126	ret \|= NOT_HEADER \| IN_SECTION;
	127	// we've determined the containment, anything else is just ignored for those purposes
	128	*bAarea = false;
	129	} else if (strcmp(s, "section") == 0) {
	130	ret = (nContainment &~ IN_PARAGRAPH) \| IN_SECTION;
	131	// we've determined the containment, anything else is just ignored for those purposes
	132	*bAarea = false;
	133	} else if (strcmp(s, "end") == 0 && (nContainment & IN_DECLARATIVES)) {
	134	ret = IN_DIVISION \| IN_DECLARATIVES \| IN_SECTION \| NOT_HEADER;
	135	} else {
	136	ret = nContainment \| IN_PARAGRAPH;
	137	}
	138	}
	139	ColourTo(styler, end, chAttr);
	140	return ret;
	141	}
	142
	143	static void ColouriseCOBOLDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
	144	Accessor &styler) {
	145
	146	styler.StartAt(startPos);
	147
	148	int state = initStyle;
	149	if (state == SCE_C_CHARACTER) // Does not leak onto next line
	150	state = SCE_C_DEFAULT;
	151	char chPrev = ' ';
	152	char chNext = styler[startPos];
	153	unsigned int lengthDoc = startPos + length;
	154
	155	int nContainment;
	156
	157	int currentLine = styler.GetLine(startPos);
	158	if (currentLine > 0) {
	159	styler.SetLineState(currentLine, styler.GetLineState(currentLine-1));
	160	nContainment = styler.GetLineState(currentLine);
	161	nContainment &= ~NOT_HEADER;
	162	} else {
	163	styler.SetLineState(currentLine, 0);
	164	nContainment = 0;
165	}
166
167	styler.StartSegment(startPos);
168	bool bNewLine = true;
169	bool bAarea = !isspacechar(chNext);
170	int column = 0;
171	for (unsigned int i = startPos; i < lengthDoc; i++) {
172	char ch = chNext;
173
174	chNext = styler.SafeGetCharAt(i + 1);
175
176	++column;
177
178	if (bNewLine) {
179	column = 0;
180	}
181	if (column <= 1 && !bAarea) {
182	bAarea = !isspacechar(ch);
183	}
184	bool bSetNewLine = false;
185	if ((ch == '\r' && chNext != '\n') \|\| (ch == '\n')) {
186	// Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
187	// Avoid triggering two times on Dos/Win
188	// End of line
189	if (state == SCE_C_CHARACTER) {
190	ColourTo(styler, i, state);
191	state = SCE_C_DEFAULT;
192	}
193	styler.SetLineState(currentLine, nContainment);
194	currentLine++;
195	bSetNewLine = true;
196	if (nContainment & NOT_HEADER)
197	nContainment &= ~(NOT_HEADER \| IN_DECLARATIVES \| IN_SECTION);
198	}
199
200	if (styler.IsLeadByte(ch)) {
201	chNext = styler.SafeGetCharAt(i + 2);
202	chPrev = ' ';
203	i += 1;
204	continue;
205	}
206
207	if (state == SCE_C_DEFAULT) {
1dcf666d	208	if (isCOBOLwordstart(ch) \|\| (ch == '$' && isascii(chNext) && isalpha(chNext))) {
9e96e16f RD	209	ColourTo(styler, i-1, state);
9e96e16f RD	210	state = SCE_C_IDENTIFIER;
1dcf666d RD	211	} else if (column == 6 && ch == '*') {
	212	// Cobol comment line: asterisk in column 7.
	213	ColourTo(styler, i-1, state);
	214	state = SCE_C_COMMENTLINE;
	215	} else if (ch == '*' && chNext == '>') {
	216	// Cobol inline comment: asterisk, followed by greater than.
	217	ColourTo(styler, i-1, state);
	218	state = SCE_C_COMMENTLINE;
9e96e16f RD	219	} else if (column == 0 && ch == '' && chNext != '') {
	220	ColourTo(styler, i-1, state);
	221	state = SCE_C_COMMENTLINE;
	222	} else if (column == 0 && ch == '/' && chNext != '*') {
	223	ColourTo(styler, i-1, state);
	224	state = SCE_C_COMMENTLINE;
	225	} else if (column == 0 && ch == '' && chNext == '') {
	226	ColourTo(styler, i-1, state);
	227	state = SCE_C_COMMENTDOC;
	228	} else if (column == 0 && ch == '/' && chNext == '*') {
	229	ColourTo(styler, i-1, state);
	230	state = SCE_C_COMMENTDOC;
	231	} else if (ch == '"') {
	232	ColourTo(styler, i-1, state);
	233	state = SCE_C_STRING;
	234	} else if (ch == '\'') {
	235	ColourTo(styler, i-1, state);
	236	state = SCE_C_CHARACTER;
	237	} else if (ch == '?' && column == 0) {
	238	ColourTo(styler, i-1, state);
	239	state = SCE_C_PREPROCESSOR;
	240	} else if (isCOBOLoperator(ch)) {
	241	ColourTo(styler, i-1, state);
	242	ColourTo(styler, i, SCE_C_OPERATOR);
	243	}
	244	} else if (state == SCE_C_IDENTIFIER) {
	245	if (!isCOBOLwordchar(ch)) {
	246	int lStateChange = classifyWordCOBOL(styler.GetStartSegment(), i - 1, keywordlists, styler, nContainment, &bAarea);
	247
	248	if(lStateChange != 0) {
	249	styler.SetLineState(currentLine, lStateChange);
	250	nContainment = lStateChange;
	251	}
	252
	253	state = SCE_C_DEFAULT;
	254	chNext = styler.SafeGetCharAt(i + 1);
	255	if (ch == '"') {
	256	state = SCE_C_STRING;
	257	} else if (ch == '\'') {
	258	state = SCE_C_CHARACTER;
	259	} else if (isCOBOLoperator(ch)) {
	260	ColourTo(styler, i, SCE_C_OPERATOR);
	261	}
	262	}
	263	} else {
	264	if (state == SCE_C_PREPROCESSOR) {
	265	if ((ch == '\r' \|\| ch == '\n') && !(chPrev == '\\' \|\| chPrev == '\r')) {
	266	ColourTo(styler, i-1, state);
	267	state = SCE_C_DEFAULT;
	268	}
	269	} else if (state == SCE_C_COMMENT) {
	270	if (ch == '\r' \|\| ch == '\n') {
	271	ColourTo(styler, i, state);
	272	state = SCE_C_DEFAULT;
	273	}
	274	} else if (state == SCE_C_COMMENTDOC) {
	275	if (ch == '\r' \|\| ch == '\n') {
	276	if (((i > styler.GetStartSegment() + 2) \|\| (
	277	(initStyle == SCE_C_COMMENTDOC) &&
	278	(styler.GetStartSegment() == static_cast<unsigned int>(startPos))))) {
	279	ColourTo(styler, i, state);
	280	state = SCE_C_DEFAULT;
	281	}
	282	}
283	} else if (state == SCE_C_COMMENTLINE) {
284	if (ch == '\r' \|\| ch == '\n') {
285	ColourTo(styler, i-1, state);
286	state = SCE_C_DEFAULT;
287	}
288	} else if (state == SCE_C_STRING) {
289	if (ch == '"') {
290	ColourTo(styler, i, state);
291	state = SCE_C_DEFAULT;
292	}
293	} else if (state == SCE_C_CHARACTER) {
294	if (ch == '\'') {
295	ColourTo(styler, i, state);
296	state = SCE_C_DEFAULT;
297	}
298	}
299	}
300	chPrev = ch;
301	bNewLine = bSetNewLine;
302	if (bNewLine)
303	{
304	bAarea = false;
305	}
306	}
307	ColourTo(styler, lengthDoc - 1, state);
308	}
309
310	static void FoldCOBOLDoc(unsigned int startPos, int length, int, WordList *[],
311	Accessor &styler) {
312	bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
313	unsigned int endPos = startPos + length;
314	int visibleChars = 0;
315	int lineCurrent = styler.GetLine(startPos);
316	int levelPrev = lineCurrent > 0 ? styler.LevelAt(lineCurrent - 1) & SC_FOLDLEVELNUMBERMASK : 0xFFF;
317	char chNext = styler[startPos];
318
319	bool bNewLine = true;
320	bool bAarea = !isspacechar(chNext);
321	int column = 0;
322	bool bComment = false;
323	for (unsigned int i = startPos; i < endPos; i++) {
324	char ch = chNext;
325	chNext = styler.SafeGetCharAt(i + 1);
326	++column;
327
328	if (bNewLine) {
329	column = 0;
330	bComment = (ch == '*' \|\| ch == '/' \|\| ch == '?');
331	}
332	if (column <= 1 && !bAarea) {
333	bAarea = !isspacechar(ch);
334	}
335	bool atEOL = (ch == '\r' && chNext != '\n') \|\| (ch == '\n');
336	if (atEOL) {
337	int nContainment = styler.GetLineState(lineCurrent);
338	int lev = CountBits(nContainment & IN_FLAGS) \| SC_FOLDLEVELBASE;
339	if (bAarea && !bComment)
340	--lev;
341	if (visibleChars == 0 && foldCompact)
342	lev \|= SC_FOLDLEVELWHITEFLAG;
343	if ((bAarea) && (visibleChars > 0) && !(nContainment & NOT_HEADER) && !bComment)
344	lev \|= SC_FOLDLEVELHEADERFLAG;
345	if (lev != styler.LevelAt(lineCurrent)) {
346	styler.SetLevel(lineCurrent, lev);
347	}
348	if ((lev & SC_FOLDLEVELNUMBERMASK) <= (levelPrev & SC_FOLDLEVELNUMBERMASK)) {
349	// this level is at the same level or less than the previous line
350	// therefore these is nothing for the previous header to collapse, so remove the header
351	styler.SetLevel(lineCurrent - 1, levelPrev & ~SC_FOLDLEVELHEADERFLAG);
352	}
353	levelPrev = lev;
354	visibleChars = 0;
355	bAarea = false;
356	bNewLine = true;
357	lineCurrent++;
358	} else {
359	bNewLine = false;
360	}
361
362
363	if (!isspacechar(ch))
364	visibleChars++;
365	}
366
367	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
368	int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
369	styler.SetLevel(lineCurrent, levelPrev \| flagsNext);
370	}
371
372	static const char * const COBOLWordListDesc[] = {
373	"A Keywords",
374	"B Keywords",
375	"Extended Keywords",
376	0
377	};
378
379	LexerModule lmCOBOL(SCLEX_COBOL, ColouriseCOBOLDoc, "COBOL", FoldCOBOLDoc, COBOLWordListDesc);