]>
Commit | Line | Data |
---|---|---|
1dcf666d RD |
1 | // Scintilla source code edit control |
2 | /** @file KeyWords.cxx | |
3 | ** Colourise for particular languages. | |
4 | **/ | |
5 | // Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org> | |
6 | // The License.txt file describes the conditions under which this software may be distributed. | |
7 | ||
8 | #include <stdlib.h> | |
9 | #include <string.h> | |
10 | #include <ctype.h> | |
11 | #include <stdio.h> | |
12 | #include <stdarg.h> | |
13 | ||
14 | #include <algorithm> | |
15 | ||
16 | #include "WordList.h" | |
17 | ||
18 | #ifdef SCI_NAMESPACE | |
19 | using namespace Scintilla; | |
20 | #endif | |
21 | ||
22 | /** | |
23 | * Creates an array that points into each word in the string and puts \0 terminators | |
24 | * after each word. | |
25 | */ | |
26 | static char **ArrayFromWordList(char *wordlist, int *len, bool onlyLineEnds = false) { | |
27 | int prev = '\n'; | |
28 | int words = 0; | |
29 | // For rapid determination of whether a character is a separator, build | |
30 | // a look up table. | |
31 | bool wordSeparator[256]; | |
32 | for (int i=0; i<256; i++) { | |
33 | wordSeparator[i] = false; | |
34 | } | |
35 | wordSeparator['\r'] = true; | |
36 | wordSeparator['\n'] = true; | |
37 | if (!onlyLineEnds) { | |
38 | wordSeparator[' '] = true; | |
39 | wordSeparator['\t'] = true; | |
40 | } | |
41 | for (int j = 0; wordlist[j]; j++) { | |
42 | int curr = static_cast<unsigned char>(wordlist[j]); | |
43 | if (!wordSeparator[curr] && wordSeparator[prev]) | |
44 | words++; | |
45 | prev = curr; | |
46 | } | |
47 | char **keywords = new char *[words + 1]; | |
48 | if (keywords) { | |
49 | words = 0; | |
50 | prev = '\0'; | |
51 | size_t slen = strlen(wordlist); | |
52 | for (size_t k = 0; k < slen; k++) { | |
53 | if (!wordSeparator[static_cast<unsigned char>(wordlist[k])]) { | |
54 | if (!prev) { | |
55 | keywords[words] = &wordlist[k]; | |
56 | words++; | |
57 | } | |
58 | } else { | |
59 | wordlist[k] = '\0'; | |
60 | } | |
61 | prev = wordlist[k]; | |
62 | } | |
63 | keywords[words] = &wordlist[slen]; | |
64 | *len = words; | |
65 | } else { | |
66 | *len = 0; | |
67 | } | |
68 | return keywords; | |
69 | } | |
70 | ||
71 | bool WordList::operator!=(const WordList &other) const { | |
72 | if (len != other.len) | |
73 | return true; | |
74 | for (int i=0; i<len; i++) { | |
75 | if (strcmp(words[i], other.words[i]) != 0) | |
76 | return true; | |
77 | } | |
78 | return false; | |
79 | } | |
80 | ||
81 | void WordList::Clear() { | |
82 | if (words) { | |
83 | delete []list; | |
84 | delete []words; | |
85 | } | |
86 | words = 0; | |
87 | list = 0; | |
88 | len = 0; | |
89 | } | |
90 | ||
91 | #ifdef _MSC_VER | |
92 | ||
93 | static bool cmpWords(const char *a, const char *b) { | |
94 | return strcmp(a, b) == -1; | |
95 | } | |
96 | ||
97 | #else | |
98 | ||
99 | static int cmpWords(const void *a, const void *b) { | |
100 | return strcmp(*static_cast<const char * const *>(a), *static_cast<const char * const *>(b)); | |
101 | } | |
102 | ||
103 | static void SortWordList(char **words, unsigned int len) { | |
104 | qsort(reinterpret_cast<void *>(words), len, sizeof(*words), cmpWords); | |
105 | } | |
106 | ||
107 | #endif | |
108 | ||
109 | void WordList::Set(const char *s) { | |
110 | Clear(); | |
111 | list = new char[strlen(s) + 1]; | |
112 | strcpy(list, s); | |
113 | words = ArrayFromWordList(list, &len, onlyLineEnds); | |
114 | #ifdef _MSC_VER | |
115 | std::sort(words, words + len, cmpWords); | |
116 | #else | |
117 | SortWordList(words, len); | |
118 | #endif | |
119 | for (unsigned int k = 0; k < (sizeof(starts) / sizeof(starts[0])); k++) | |
120 | starts[k] = -1; | |
121 | for (int l = len - 1; l >= 0; l--) { | |
122 | unsigned char indexChar = words[l][0]; | |
123 | starts[indexChar] = l; | |
124 | } | |
125 | } | |
126 | ||
127 | /** Check whether a string is in the list. | |
128 | * List elements are either exact matches or prefixes. | |
129 | * Prefix elements start with '^' and match all strings that start with the rest of the element | |
130 | * so '^GTK_' matches 'GTK_X', 'GTK_MAJOR_VERSION', and 'GTK_'. | |
131 | */ | |
132 | bool WordList::InList(const char *s) const { | |
133 | if (0 == words) | |
134 | return false; | |
135 | unsigned char firstChar = s[0]; | |
136 | int j = starts[firstChar]; | |
137 | if (j >= 0) { | |
138 | while (static_cast<unsigned char>(words[j][0]) == firstChar) { | |
139 | if (s[1] == words[j][1]) { | |
140 | const char *a = words[j] + 1; | |
141 | const char *b = s + 1; | |
142 | while (*a && *a == *b) { | |
143 | a++; | |
144 | b++; | |
145 | } | |
146 | if (!*a && !*b) | |
147 | return true; | |
148 | } | |
149 | j++; | |
150 | } | |
151 | } | |
152 | j = starts['^']; | |
153 | if (j >= 0) { | |
154 | while (words[j][0] == '^') { | |
155 | const char *a = words[j] + 1; | |
156 | const char *b = s; | |
157 | while (*a && *a == *b) { | |
158 | a++; | |
159 | b++; | |
160 | } | |
161 | if (!*a) | |
162 | return true; | |
163 | j++; | |
164 | } | |
165 | } | |
166 | return false; | |
167 | } | |
168 | ||
169 | /** similar to InList, but word s can be a substring of keyword. | |
170 | * eg. the keyword define is defined as def~ine. This means the word must start | |
171 | * with def to be a keyword, but also defi, defin and define are valid. | |
172 | * The marker is ~ in this case. | |
173 | */ | |
174 | bool WordList::InListAbbreviated(const char *s, const char marker) const { | |
175 | if (0 == words) | |
176 | return false; | |
177 | unsigned char firstChar = s[0]; | |
178 | int j = starts[firstChar]; | |
179 | if (j >= 0) { | |
180 | while (static_cast<unsigned char>(words[j][0]) == firstChar) { | |
181 | bool isSubword = false; | |
182 | int start = 1; | |
183 | if (words[j][1] == marker) { | |
184 | isSubword = true; | |
185 | start++; | |
186 | } | |
187 | if (s[1] == words[j][start]) { | |
188 | const char *a = words[j] + start; | |
189 | const char *b = s + 1; | |
190 | while (*a && *a == *b) { | |
191 | a++; | |
192 | if (*a == marker) { | |
193 | isSubword = true; | |
194 | a++; | |
195 | } | |
196 | b++; | |
197 | } | |
198 | if ((!*a || isSubword) && !*b) | |
199 | return true; | |
200 | } | |
201 | j++; | |
202 | } | |
203 | } | |
204 | j = starts['^']; | |
205 | if (j >= 0) { | |
206 | while (words[j][0] == '^') { | |
207 | const char *a = words[j] + 1; | |
208 | const char *b = s; | |
209 | while (*a && *a == *b) { | |
210 | a++; | |
211 | b++; | |
212 | } | |
213 | if (!*a) | |
214 | return true; | |
215 | j++; | |
216 | } | |
217 | } | |
218 | return false; | |
219 | } |