[wxWidgets.git] / src / stc / scintilla / lexlib / WordList.cxx

// Scintilla source code edit control
/** @file KeyWords.cxx
 ** Colourise for particular languages.
 **/
// Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
// The License.txt file describes the conditions under which this software may be distributed.

#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdio.h>
#include <stdarg.h>

#include <algorithm>

#include "WordList.h"

#ifdef SCI_NAMESPACE
using namespace Scintilla;
#endif

/**
 * Creates an array that points into each word in the string and puts \0 terminators
 * after each word.
 */
static char **ArrayFromWordList(char *wordlist, int *len, bool onlyLineEnds = false) {
	int prev = '\n';
	int words = 0;
	// For rapid determination of whether a character is a separator, build
	// a look up table.
	bool wordSeparator[256];
	for (int i=0; i<256; i++) {
		wordSeparator[i] = false;
	}
	wordSeparator['\r'] = true;
	wordSeparator['\n'] = true;
	if (!onlyLineEnds) {
		wordSeparator[' '] = true;
		wordSeparator['\t'] = true;
	}
	for (int j = 0; wordlist[j]; j++) {
		int curr = static_cast<unsigned char>(wordlist[j]);
		if (!wordSeparator[curr] && wordSeparator[prev])
			words++;
		prev = curr;
	}
	char **keywords = new char *[words + 1];
	if (keywords) {
		words = 0;
		prev = '\0';
		size_t slen = strlen(wordlist);
		for (size_t k = 0; k < slen; k++) {
			if (!wordSeparator[static_cast<unsigned char>(wordlist[k])]) {
				if (!prev) {
					keywords[words] = &wordlist[k];
					words++;
				}
			} else {
				wordlist[k] = '\0';
			}
			prev = wordlist[k];
		}
		keywords[words] = &wordlist[slen];
		*len = words;
	} else {
		*len = 0;
	}
	return keywords;
}

bool WordList::operator!=(const WordList &other) const {
	if (len != other.len)
		return true;
	for (int i=0; i<len; i++) {
		if (strcmp(words[i], other.words[i]) != 0)
			return true;
	}
	return false;
}

void WordList::Clear() {
	if (words) {
		delete []list;
		delete []words;
	}
	words = 0;
	list = 0;
	len = 0;
}

#ifdef _MSC_VER

static bool cmpWords(const char *a, const char *b) {
	return strcmp(a, b) == -1;
}

#else

static int cmpWords(const void *a, const void *b) {
	return strcmp(*static_cast<const char * const *>(a), *static_cast<const char * const *>(b));
}

static void SortWordList(char **words, unsigned int len) {
	qsort(reinterpret_cast<void *>(words), len, sizeof(*words), cmpWords);
}

#endif

void WordList::Set(const char *s) {
	Clear();
	list = new char[strlen(s) + 1];
	strcpy(list, s);
	words = ArrayFromWordList(list, &len, onlyLineEnds);
#ifdef _MSC_VER
	std::sort(words, words + len, cmpWords);
#else
	SortWordList(words, len);
#endif
	for (unsigned int k = 0; k < (sizeof(starts) / sizeof(starts[0])); k++)
		starts[k] = -1;
	for (int l = len - 1; l >= 0; l--) {
		unsigned char indexChar = words[l][0];
		starts[indexChar] = l;
	}
}

/** Check whether a string is in the list.
 * List elements are either exact matches or prefixes.
 * Prefix elements start with '^' and match all strings that start with the rest of the element
 * so '^GTK_' matches 'GTK_X', 'GTK_MAJOR_VERSION', and 'GTK_'.
 */
bool WordList::InList(const char *s) const {
	if (0 == words)
		return false;
	unsigned char firstChar = s[0];
	int j = starts[firstChar];
	if (j >= 0) {
		while (static_cast<unsigned char>(words[j][0]) == firstChar) {
			if (s[1] == words[j][1]) {
				const char *a = words[j] + 1;
				const char *b = s + 1;
				while (*a && *a == *b) {
					a++;
					b++;
				}
				if (!*a && !*b)
					return true;
			}
			j++;
		}
	}
	j = starts['^'];
	if (j >= 0) {
		while (words[j][0] == '^') {
			const char *a = words[j] + 1;
			const char *b = s;
			while (*a && *a == *b) {
				a++;
				b++;
			}
			if (!*a)
				return true;
			j++;
		}
	}
	return false;
}

/** similar to InList, but word s can be a substring of keyword.
 * eg. the keyword define is defined as def~ine. This means the word must start
 * with def to be a keyword, but also defi, defin and define are valid.
 * The marker is ~ in this case.
 */
bool WordList::InListAbbreviated(const char *s, const char marker) const {
	if (0 == words)
		return false;
	unsigned char firstChar = s[0];
	int j = starts[firstChar];
	if (j >= 0) {
		while (static_cast<unsigned char>(words[j][0]) == firstChar) {
			bool isSubword = false;
			int start = 1;
			if (words[j][1] == marker) {
				isSubword = true;
				start++;
			}
			if (s[1] == words[j][start]) {
				const char *a = words[j] + start;
				const char *b = s + 1;
				while (*a && *a == *b) {
					a++;
					if (*a == marker) {
						isSubword = true;
						a++;
					}
					b++;
				}
				if ((!*a || isSubword) && !*b)
					return true;
			}
			j++;
		}
	}
	j = starts['^'];
	if (j >= 0) {
		while (words[j][0] == '^') {
			const char *a = words[j] + 1;
			const char *b = s;
			while (*a && *a == *b) {
				a++;
				b++;
			}
			if (!*a)
				return true;
			j++;
		}
	}
	return false;
}
Commit	Line	Data
1dcf666d RD	1	// Scintilla source code edit control
	2	/** @file KeyWords.cxx
	3	** Colourise for particular languages.
	4	**/
	5	// Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
	6	// The License.txt file describes the conditions under which this software may be distributed.
	7
	8	#include <stdlib.h>
	9	#include <string.h>
	10	#include <ctype.h>
	11	#include <stdio.h>
	12	#include <stdarg.h>
	13
	14	#include <algorithm>
	15
	16	#include "WordList.h"
	17
	18	#ifdef SCI_NAMESPACE
	19	using namespace Scintilla;
	20	#endif
	21
	22	/**
	23	* Creates an array that points into each word in the string and puts \0 terminators
	24	* after each word.
	25	*/
	26	static char *ArrayFromWordList(char wordlist, int *len, bool onlyLineEnds = false) {
	27	int prev = '\n';
	28	int words = 0;
	29	// For rapid determination of whether a character is a separator, build
	30	// a look up table.
	31	bool wordSeparator[256];
	32	for (int i=0; i<256; i++) {
	33	wordSeparator[i] = false;
	34	}
	35	wordSeparator['\r'] = true;
	36	wordSeparator['\n'] = true;
	37	if (!onlyLineEnds) {
	38	wordSeparator[' '] = true;
	39	wordSeparator['\t'] = true;
	40	}
	41	for (int j = 0; wordlist[j]; j++) {
	42	int curr = static_cast<unsigned char>(wordlist[j]);
	43	if (!wordSeparator[curr] && wordSeparator[prev])
	44	words++;
	45	prev = curr;
	46	}
	47	char *keywords = new char [words + 1];
	48	if (keywords) {
	49	words = 0;
	50	prev = '\0';
	51	size_t slen = strlen(wordlist);
	52	for (size_t k = 0; k < slen; k++) {
	53	if (!wordSeparator[static_cast<unsigned char>(wordlist[k])]) {
	54	if (!prev) {
	55	keywords[words] = &wordlist[k];
	56	words++;
	57	}
	58	} else {
	59	wordlist[k] = '\0';
	60	}
	61	prev = wordlist[k];
	62	}
	63	keywords[words] = &wordlist[slen];
	64	*len = words;
65	} else {
66	*len = 0;
67	}
68	return keywords;
69	}
70
71	bool WordList::operator!=(const WordList &other) const {
72	if (len != other.len)
73	return true;
74	for (int i=0; i<len; i++) {
75	if (strcmp(words[i], other.words[i]) != 0)
76	return true;
77	}
78	return false;
79	}
80
81	void WordList::Clear() {
82	if (words) {
83	delete []list;
84	delete []words;
85	}
86	words = 0;
87	list = 0;
88	len = 0;
89	}
90
91	#ifdef _MSC_VER
92
93	static bool cmpWords(const char a, const char b) {
94	return strcmp(a, b) == -1;
95	}
96
97	#else
98
99	static int cmpWords(const void a, const void b) {
100	return strcmp(static_cast<const char const >(a), static_cast<const char * const *>(b));
101	}
102
103	static void SortWordList(char **words, unsigned int len) {
104	qsort(reinterpret_cast<void >(words), len, sizeof(words), cmpWords);
105	}
106
107	#endif
108
109	void WordList::Set(const char *s) {
110	Clear();
111	list = new char[strlen(s) + 1];
112	strcpy(list, s);
113	words = ArrayFromWordList(list, &len, onlyLineEnds);
114	#ifdef _MSC_VER
115	std::sort(words, words + len, cmpWords);
116	#else
117	SortWordList(words, len);
118	#endif
119	for (unsigned int k = 0; k < (sizeof(starts) / sizeof(starts[0])); k++)
120	starts[k] = -1;
121	for (int l = len - 1; l >= 0; l--) {
122	unsigned char indexChar = words[l][0];
123	starts[indexChar] = l;
124	}
125	}
126
127	/** Check whether a string is in the list.
128	* List elements are either exact matches or prefixes.
129	* Prefix elements start with '^' and match all strings that start with the rest of the element
130	* so '^GTK_' matches 'GTK_X', 'GTK_MAJOR_VERSION', and 'GTK_'.
131	*/
132	bool WordList::InList(const char *s) const {
133	if (0 == words)
134	return false;
135	unsigned char firstChar = s[0];
136	int j = starts[firstChar];
137	if (j >= 0) {
138	while (static_cast<unsigned char>(words[j][0]) == firstChar) {
139	if (s[1] == words[j][1]) {
140	const char *a = words[j] + 1;
141	const char *b = s + 1;
142	while (a && a == *b) {
143	a++;
144	b++;
145	}
146	if (!a && !b)
147	return true;
148	}
149	j++;
150	}
151	}
152	j = starts['^'];
153	if (j >= 0) {
154	while (words[j][0] == '^') {
155	const char *a = words[j] + 1;
156	const char *b = s;
157	while (a && a == *b) {
158	a++;
159	b++;
160	}
161	if (!*a)
162	return true;
163	j++;
164	}
165	}
166	return false;
167	}
168
169	/** similar to InList, but word s can be a substring of keyword.
170	* eg. the keyword define is defined as def~ine. This means the word must start
171	* with def to be a keyword, but also defi, defin and define are valid.
172	* The marker is ~ in this case.
173	*/
174	bool WordList::InListAbbreviated(const char *s, const char marker) const {
175	if (0 == words)
176	return false;
177	unsigned char firstChar = s[0];
178	int j = starts[firstChar];
179	if (j >= 0) {
180	while (static_cast<unsigned char>(words[j][0]) == firstChar) {
181	bool isSubword = false;
182	int start = 1;
183	if (words[j][1] == marker) {
184	isSubword = true;
185	start++;
186	}
187	if (s[1] == words[j][start]) {
188	const char *a = words[j] + start;
189	const char *b = s + 1;
190	while (a && a == *b) {
191	a++;
192	if (*a == marker) {
193	isSubword = true;
194	a++;
195	}
196	b++;
197	}
198	if ((!a \|\| isSubword) && !b)
199	return true;
200	}
201	j++;
202	}
203	}
204	j = starts['^'];
205	if (j >= 0) {
206	while (words[j][0] == '^') {
207	const char *a = words[j] + 1;
208	const char *b = s;
209	while (a && a == *b) {
210	a++;
211	b++;
212	}
213	if (!*a)
214	return true;
215	j++;
216	}
217	}
218	return false;
219	}