]> git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/lexlib/WordList.cxx
Initial copy of Scintilla 3.21 code
[wxWidgets.git] / src / stc / scintilla / lexlib / WordList.cxx
1 // Scintilla source code edit control
2 /** @file KeyWords.cxx
3 ** Colourise for particular languages.
4 **/
5 // Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include <stdio.h>
12 #include <stdarg.h>
13
14 #include <algorithm>
15
16 #include "WordList.h"
17
18 #ifdef SCI_NAMESPACE
19 using namespace Scintilla;
20 #endif
21
22 /**
23 * Creates an array that points into each word in the string and puts \0 terminators
24 * after each word.
25 */
26 static char **ArrayFromWordList(char *wordlist, int *len, bool onlyLineEnds = false) {
27 int prev = '\n';
28 int words = 0;
29 // For rapid determination of whether a character is a separator, build
30 // a look up table.
31 bool wordSeparator[256];
32 for (int i=0; i<256; i++) {
33 wordSeparator[i] = false;
34 }
35 wordSeparator['\r'] = true;
36 wordSeparator['\n'] = true;
37 if (!onlyLineEnds) {
38 wordSeparator[' '] = true;
39 wordSeparator['\t'] = true;
40 }
41 for (int j = 0; wordlist[j]; j++) {
42 int curr = static_cast<unsigned char>(wordlist[j]);
43 if (!wordSeparator[curr] && wordSeparator[prev])
44 words++;
45 prev = curr;
46 }
47 char **keywords = new char *[words + 1];
48 if (keywords) {
49 words = 0;
50 prev = '\0';
51 size_t slen = strlen(wordlist);
52 for (size_t k = 0; k < slen; k++) {
53 if (!wordSeparator[static_cast<unsigned char>(wordlist[k])]) {
54 if (!prev) {
55 keywords[words] = &wordlist[k];
56 words++;
57 }
58 } else {
59 wordlist[k] = '\0';
60 }
61 prev = wordlist[k];
62 }
63 keywords[words] = &wordlist[slen];
64 *len = words;
65 } else {
66 *len = 0;
67 }
68 return keywords;
69 }
70
71 bool WordList::operator!=(const WordList &other) const {
72 if (len != other.len)
73 return true;
74 for (int i=0; i<len; i++) {
75 if (strcmp(words[i], other.words[i]) != 0)
76 return true;
77 }
78 return false;
79 }
80
81 void WordList::Clear() {
82 if (words) {
83 delete []list;
84 delete []words;
85 }
86 words = 0;
87 list = 0;
88 len = 0;
89 }
90
91 #ifdef _MSC_VER
92
93 static bool cmpWords(const char *a, const char *b) {
94 return strcmp(a, b) == -1;
95 }
96
97 #else
98
99 static int cmpWords(const void *a, const void *b) {
100 return strcmp(*static_cast<const char * const *>(a), *static_cast<const char * const *>(b));
101 }
102
103 static void SortWordList(char **words, unsigned int len) {
104 qsort(reinterpret_cast<void *>(words), len, sizeof(*words), cmpWords);
105 }
106
107 #endif
108
109 void WordList::Set(const char *s) {
110 Clear();
111 list = new char[strlen(s) + 1];
112 strcpy(list, s);
113 words = ArrayFromWordList(list, &len, onlyLineEnds);
114 #ifdef _MSC_VER
115 std::sort(words, words + len, cmpWords);
116 #else
117 SortWordList(words, len);
118 #endif
119 for (unsigned int k = 0; k < (sizeof(starts) / sizeof(starts[0])); k++)
120 starts[k] = -1;
121 for (int l = len - 1; l >= 0; l--) {
122 unsigned char indexChar = words[l][0];
123 starts[indexChar] = l;
124 }
125 }
126
127 /** Check whether a string is in the list.
128 * List elements are either exact matches or prefixes.
129 * Prefix elements start with '^' and match all strings that start with the rest of the element
130 * so '^GTK_' matches 'GTK_X', 'GTK_MAJOR_VERSION', and 'GTK_'.
131 */
132 bool WordList::InList(const char *s) const {
133 if (0 == words)
134 return false;
135 unsigned char firstChar = s[0];
136 int j = starts[firstChar];
137 if (j >= 0) {
138 while (static_cast<unsigned char>(words[j][0]) == firstChar) {
139 if (s[1] == words[j][1]) {
140 const char *a = words[j] + 1;
141 const char *b = s + 1;
142 while (*a && *a == *b) {
143 a++;
144 b++;
145 }
146 if (!*a && !*b)
147 return true;
148 }
149 j++;
150 }
151 }
152 j = starts['^'];
153 if (j >= 0) {
154 while (words[j][0] == '^') {
155 const char *a = words[j] + 1;
156 const char *b = s;
157 while (*a && *a == *b) {
158 a++;
159 b++;
160 }
161 if (!*a)
162 return true;
163 j++;
164 }
165 }
166 return false;
167 }
168
169 /** similar to InList, but word s can be a substring of keyword.
170 * eg. the keyword define is defined as def~ine. This means the word must start
171 * with def to be a keyword, but also defi, defin and define are valid.
172 * The marker is ~ in this case.
173 */
174 bool WordList::InListAbbreviated(const char *s, const char marker) const {
175 if (0 == words)
176 return false;
177 unsigned char firstChar = s[0];
178 int j = starts[firstChar];
179 if (j >= 0) {
180 while (static_cast<unsigned char>(words[j][0]) == firstChar) {
181 bool isSubword = false;
182 int start = 1;
183 if (words[j][1] == marker) {
184 isSubword = true;
185 start++;
186 }
187 if (s[1] == words[j][start]) {
188 const char *a = words[j] + start;
189 const char *b = s + 1;
190 while (*a && *a == *b) {
191 a++;
192 if (*a == marker) {
193 isSubword = true;
194 a++;
195 }
196 b++;
197 }
198 if ((!*a || isSubword) && !*b)
199 return true;
200 }
201 j++;
202 }
203 }
204 j = starts['^'];
205 if (j >= 0) {
206 while (words[j][0] == '^') {
207 const char *a = words[j] + 1;
208 const char *b = s;
209 while (*a && *a == *b) {
210 a++;
211 b++;
212 }
213 if (!*a)
214 return true;
215 j++;
216 }
217 }
218 return false;
219 }