]> git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/src/PropSet.cxx
guard code for mac / quickdraw
[wxWidgets.git] / src / stc / scintilla / src / PropSet.cxx
1 // SciTE - Scintilla based Text Editor
2 /** @file PropSet.cxx
3 ** A Java style properties file module.
4 **/
5 // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7
8 // Maintain a dictionary of properties
9
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13
14 #include "Platform.h"
15
16 #include "PropSet.h"
17
18 // The comparison and case changing functions here assume ASCII
19 // or extended ASCII such as the normal Windows code page.
20
21 static inline char MakeUpperCase(char ch) {
22 if (ch < 'a' || ch > 'z')
23 return ch;
24 else
25 return static_cast<char>(ch - 'a' + 'A');
26 }
27
28 static inline bool IsLetter(char ch) {
29 return ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'));
30 }
31
32 inline bool IsASpace(unsigned int ch) {
33 return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d));
34 }
35
36 int CompareCaseInsensitive(const char *a, const char *b) {
37 while (*a && *b) {
38 if (*a != *b) {
39 char upperA = MakeUpperCase(*a);
40 char upperB = MakeUpperCase(*b);
41 if (upperA != upperB)
42 return upperA - upperB;
43 }
44 a++;
45 b++;
46 }
47 // Either *a or *b is nul
48 return *a - *b;
49 }
50
51 int CompareNCaseInsensitive(const char *a, const char *b, size_t len) {
52 while (*a && *b && len) {
53 if (*a != *b) {
54 char upperA = MakeUpperCase(*a);
55 char upperB = MakeUpperCase(*b);
56 if (upperA != upperB)
57 return upperA - upperB;
58 }
59 a++;
60 b++;
61 len--;
62 }
63 if (len == 0)
64 return 0;
65 else
66 // Either *a or *b is nul
67 return *a - *b;
68 }
69
70 bool EqualCaseInsensitive(const char *a, const char *b) {
71 return 0 == CompareCaseInsensitive(a, b);
72 }
73
74 // Since the CaseInsensitive functions declared in SString
75 // are implemented here, I will for now put the non-inline
76 // implementations of the SString members here as well, so
77 // that I can quickly see what effect this has.
78
79 SString::SString(int i) : sizeGrowth(sizeGrowthDefault) {
80 char number[32];
81 sprintf(number, "%0d", i);
82 s = StringAllocate(number);
83 sSize = sLen = (s) ? strlen(s) : 0;
84 }
85
86 SString::SString(double d, int precision) : sizeGrowth(sizeGrowthDefault) {
87 char number[32];
88 sprintf(number, "%.*f", precision, d);
89 s = StringAllocate(number);
90 sSize = sLen = (s) ? strlen(s) : 0;
91 }
92
93 bool SString::grow(lenpos_t lenNew) {
94 while (sizeGrowth * 6 < lenNew) {
95 sizeGrowth *= 2;
96 }
97 char *sNew = new char[lenNew + sizeGrowth + 1];
98 if (sNew) {
99 if (s) {
100 memcpy(sNew, s, sLen);
101 delete []s;
102 }
103 s = sNew;
104 s[sLen] = '\0';
105 sSize = lenNew + sizeGrowth;
106 }
107 return sNew != 0;
108 }
109
110 SString &SString::assign(const char *sOther, lenpos_t sSize_) {
111 if (!sOther) {
112 sSize_ = 0;
113 } else if (sSize_ == measure_length) {
114 sSize_ = strlen(sOther);
115 }
116 if (sSize > 0 && sSize_ <= sSize) { // Does not allocate new buffer if the current is big enough
117 if (s && sSize_) {
118 memcpy(s, sOther, sSize_);
119 }
120 s[sSize_] = '\0';
121 sLen = sSize_;
122 } else {
123 delete []s;
124 s = StringAllocate(sOther, sSize_);
125 if (s) {
126 sSize = sSize_; // Allow buffer bigger than real string, thus providing space to grow
127 sLen = sSize_;
128 } else {
129 sSize = sLen = 0;
130 }
131 }
132 return *this;
133 }
134
135 bool SString::operator==(const SString &sOther) const {
136 if ((s == 0) && (sOther.s == 0))
137 return true;
138 if ((s == 0) || (sOther.s == 0))
139 return false;
140 return strcmp(s, sOther.s) == 0;
141 }
142
143 bool SString::operator==(const char *sOther) const {
144 if ((s == 0) && (sOther == 0))
145 return true;
146 if ((s == 0) || (sOther == 0))
147 return false;
148 return strcmp(s, sOther) == 0;
149 }
150
151 SString SString::substr(lenpos_t subPos, lenpos_t subLen) const {
152 if (subPos >= sLen) {
153 return SString(); // return a null string if start index is out of bounds
154 }
155 if ((subLen == measure_length) || (subPos + subLen > sLen)) {
156 subLen = sLen - subPos; // can't substr past end of source string
157 }
158 return SString(s, subPos, subPos + subLen);
159 }
160
161 SString &SString::lowercase(lenpos_t subPos, lenpos_t subLen) {
162 if ((subLen == measure_length) || (subPos + subLen > sLen)) {
163 subLen = sLen - subPos; // don't apply past end of string
164 }
165 for (lenpos_t i = subPos; i < subPos + subLen; i++) {
166 if (s[i] < 'A' || s[i] > 'Z')
167 continue;
168 else
169 s[i] = static_cast<char>(s[i] - 'A' + 'a');
170 }
171 return *this;
172 }
173
174 SString &SString::uppercase(lenpos_t subPos, lenpos_t subLen) {
175 if ((subLen == measure_length) || (subPos + subLen > sLen)) {
176 subLen = sLen - subPos; // don't apply past end of string
177 }
178 for (lenpos_t i = subPos; i < subPos + subLen; i++) {
179 if (s[i] < 'a' || s[i] > 'z')
180 continue;
181 else
182 s[i] = static_cast<char>(s[i] - 'a' + 'A');
183 }
184 return *this;
185 }
186
187 SString &SString::append(const char *sOther, lenpos_t sLenOther, char sep) {
188 if (!sOther) {
189 return *this;
190 }
191 if (sLenOther == measure_length) {
192 sLenOther = strlen(sOther);
193 }
194 int lenSep = 0;
195 if (sLen && sep) { // Only add a separator if not empty
196 lenSep = 1;
197 }
198 lenpos_t lenNew = sLen + sLenOther + lenSep;
199 // Conservative about growing the buffer: don't do it, unless really needed
200 if ((lenNew < sSize) || (grow(lenNew))) {
201 if (lenSep) {
202 s[sLen] = sep;
203 sLen++;
204 }
205 memcpy(&s[sLen], sOther, sLenOther);
206 sLen += sLenOther;
207 s[sLen] = '\0';
208 }
209 return *this;
210 }
211
212 SString &SString::insert(lenpos_t pos, const char *sOther, lenpos_t sLenOther) {
213 if (!sOther || pos > sLen) {
214 return *this;
215 }
216 if (sLenOther == measure_length) {
217 sLenOther = strlen(sOther);
218 }
219 lenpos_t lenNew = sLen + sLenOther;
220 // Conservative about growing the buffer: don't do it, unless really needed
221 if ((lenNew < sSize) || grow(lenNew)) {
222 lenpos_t moveChars = sLen - pos + 1;
223 for (lenpos_t i = moveChars; i > 0; i--) {
224 s[pos + sLenOther + i - 1] = s[pos + i - 1];
225 }
226 memcpy(s + pos, sOther, sLenOther);
227 sLen = lenNew;
228 }
229 return *this;
230 }
231
232 /**
233 * Remove @a len characters from the @a pos position, included.
234 * Characters at pos + len and beyond replace characters at pos.
235 * If @a len is 0, or greater than the length of the string
236 * starting at @a pos, the string is just truncated at @a pos.
237 */
238 void SString::remove(lenpos_t pos, lenpos_t len) {
239 if (pos >= sLen) {
240 return;
241 }
242 if (len < 1 || pos + len >= sLen) {
243 s[pos] = '\0';
244 sLen = pos;
245 } else {
246 for (lenpos_t i = pos; i < sLen - len + 1; i++) {
247 s[i] = s[i+len];
248 }
249 sLen -= len;
250 }
251 }
252
253 bool SString::startswith(const char *prefix) {
254 lenpos_t lenPrefix = strlen(prefix);
255 if (lenPrefix > sLen) {
256 return false;
257 }
258 return strncmp(s, prefix, lenPrefix) == 0;
259 }
260
261 bool SString::endswith(const char *suffix) {
262 lenpos_t lenSuffix = strlen(suffix);
263 if (lenSuffix > sLen) {
264 return false;
265 }
266 return strncmp(s + sLen - lenSuffix, suffix, lenSuffix) == 0;
267 }
268
269 int SString::search(const char *sFind, lenpos_t start) const {
270 if (start < sLen) {
271 const char *sFound = strstr(s + start, sFind);
272 if (sFound) {
273 return sFound - s;
274 }
275 }
276 return -1;
277 }
278
279 int SString::substitute(char chFind, char chReplace) {
280 int c = 0;
281 char *t = s;
282 while (t) {
283 t = strchr(t, chFind);
284 if (t) {
285 *t = chReplace;
286 t++;
287 c++;
288 }
289 }
290 return c;
291 }
292
293 int SString::substitute(const char *sFind, const char *sReplace) {
294 int c = 0;
295 lenpos_t lenFind = strlen(sFind);
296 lenpos_t lenReplace = strlen(sReplace);
297 int posFound = search(sFind);
298 while (posFound >= 0) {
299 remove(posFound, lenFind);
300 insert(posFound, sReplace, lenReplace);
301 posFound = search(sFind, posFound + lenReplace);
302 c++;
303 }
304 return c;
305 }
306
307 char *SContainer::StringAllocate(lenpos_t len) {
308 if (len != measure_length) {
309 return new char[len + 1];
310 } else {
311 return 0;
312 }
313 }
314
315 char *SContainer::StringAllocate(const char *s, lenpos_t len) {
316 if (s == 0) {
317 return 0;
318 }
319 if (len == measure_length) {
320 len = strlen(s);
321 }
322 char *sNew = new char[len + 1];
323 if (sNew) {
324 memcpy(sNew, s, len);
325 sNew[len] = '\0';
326 }
327 return sNew;
328 }
329
330 // End SString functions
331
332 bool PropSet::caseSensitiveFilenames = false;
333
334 PropSet::PropSet() {
335 superPS = 0;
336 for (int root = 0; root < hashRoots; root++)
337 props[root] = 0;
338 }
339
340 PropSet::~PropSet() {
341 superPS = 0;
342 Clear();
343 }
344
345 void PropSet::Set(const char *key, const char *val, int lenKey, int lenVal) {
346 if (!*key) // Empty keys are not supported
347 return;
348 if (lenKey == -1)
349 lenKey = static_cast<int>(strlen(key));
350 if (lenVal == -1)
351 lenVal = static_cast<int>(strlen(val));
352 unsigned int hash = HashString(key, lenKey);
353 for (Property *p = props[hash % hashRoots]; p; p = p->next) {
354 if ((hash == p->hash) &&
355 ((strlen(p->key) == static_cast<unsigned int>(lenKey)) &&
356 (0 == strncmp(p->key, key, lenKey)))) {
357 // Replace current value
358 delete [](p->val);
359 p->val = StringDup(val, lenVal);
360 return;
361 }
362 }
363 // Not found
364 Property *pNew = new Property;
365 if (pNew) {
366 pNew->hash = hash;
367 pNew->key = StringDup(key, lenKey);
368 pNew->val = StringDup(val, lenVal);
369 pNew->next = props[hash % hashRoots];
370 props[hash % hashRoots] = pNew;
371 }
372 }
373
374 void PropSet::Set(const char *keyVal) {
375 while (IsASpace(*keyVal))
376 keyVal++;
377 const char *endVal = keyVal;
378 while (*endVal && (*endVal != '\n'))
379 endVal++;
380 const char *eqAt = strchr(keyVal, '=');
381 if (eqAt) {
382 Set(keyVal, eqAt + 1, eqAt-keyVal, endVal - eqAt - 1);
383 } else if (*keyVal) { // No '=' so assume '=1'
384 Set(keyVal, "1", endVal-keyVal, 1);
385 }
386 }
387
388 void PropSet::Unset(const char *key, int lenKey) {
389 if (!*key) // Empty keys are not supported
390 return;
391 if (lenKey == -1)
392 lenKey = static_cast<int>(strlen(key));
393 unsigned int hash = HashString(key, lenKey);
394 Property *pPrev = NULL;
395 for (Property *p = props[hash % hashRoots]; p; p = p->next) {
396 if ((hash == p->hash) &&
397 ((strlen(p->key) == static_cast<unsigned int>(lenKey)) &&
398 (0 == strncmp(p->key, key, lenKey)))) {
399 if (pPrev)
400 pPrev->next = p->next;
401 else
402 props[hash % hashRoots] = p->next;
403 if (p == enumnext)
404 enumnext = p->next; // Not that anyone should mix enum and Set / Unset.
405 delete [](p->key);
406 delete [](p->val);
407 delete p;
408 return;
409 } else {
410 pPrev = p;
411 }
412 }
413 }
414
415 void PropSet::SetMultiple(const char *s) {
416 const char *eol = strchr(s, '\n');
417 while (eol) {
418 Set(s);
419 s = eol + 1;
420 eol = strchr(s, '\n');
421 }
422 Set(s);
423 }
424
425 SString PropSet::Get(const char *key) {
426 unsigned int hash = HashString(key, strlen(key));
427 for (Property *p = props[hash % hashRoots]; p; p = p->next) {
428 if ((hash == p->hash) && (0 == strcmp(p->key, key))) {
429 return p->val;
430 }
431 }
432 if (superPS) {
433 // Failed here, so try in base property set
434 return superPS->Get(key);
435 } else {
436 return "";
437 }
438 }
439
440 bool PropSet::IncludesVar(const char *value, const char *key) {
441 const char *var = strstr(value, "$(");
442 while (var) {
443 if (isprefix(var + 2, key) && (var[2 + strlen(key)] == ')')) {
444 // Found $(key) which would lead to an infinite loop so exit
445 return true;
446 }
447 var = strstr(var + 2, ")");
448 if (var)
449 var = strstr(var + 1, "$(");
450 }
451 return false;
452 }
453
454
455 // There is some inconsistency between GetExpanded("foo") and Expand("$(foo)").
456 // A solution is to keep a stack of variables that have been expanded, so that
457 // recursive expansions can be skipped. For now I'll just use the C++ stack
458 // for that, through a recursive function and a simple chain of pointers.
459
460 struct VarChain {
461 VarChain(const char*var_=NULL, const VarChain *link_=NULL): var(var_), link(link_) {}
462
463 bool contains(const char *testVar) const {
464 return (var && (0 == strcmp(var, testVar)))
465 || (link && link->contains(testVar));
466 }
467
468 const char *var;
469 const VarChain *link;
470 };
471
472 static int ExpandAllInPlace(PropSet &props, SString &withVars, int maxExpands, const VarChain &blankVars = VarChain()) {
473 int varStart = withVars.search("$(");
474 while ((varStart >= 0) && (maxExpands > 0)) {
475 int varEnd = withVars.search(")", varStart+2);
476 if (varEnd < 0) {
477 break;
478 }
479
480 // For consistency, when we see '$(ab$(cde))', expand the inner variable first,
481 // regardless whether there is actually a degenerate variable named 'ab$(cde'.
482 int innerVarStart = withVars.search("$(", varStart+2);
483 while ((innerVarStart > varStart) && (innerVarStart < varEnd)) {
484 varStart = innerVarStart;
485 innerVarStart = withVars.search("$(", varStart+2);
486 }
487
488 SString var(withVars.c_str(), varStart + 2, varEnd);
489 SString val = props.Get(var.c_str());
490
491 if (blankVars.contains(var.c_str())) {
492 val.clear(); // treat blankVar as an empty string (e.g. to block self-reference)
493 }
494
495 if (--maxExpands >= 0) {
496 maxExpands = ExpandAllInPlace(props, val, maxExpands, VarChain(var.c_str(), &blankVars));
497 }
498
499 withVars.remove(varStart, varEnd-varStart+1);
500 withVars.insert(varStart, val.c_str(), val.length());
501
502 varStart = withVars.search("$(");
503 }
504
505 return maxExpands;
506 }
507
508
509 SString PropSet::GetExpanded(const char *key) {
510 SString val = Get(key);
511 ExpandAllInPlace(*this, val, 100, VarChain(key));
512 return val;
513 }
514
515 SString PropSet::Expand(const char *withVars, int maxExpands) {
516 SString val = withVars;
517 ExpandAllInPlace(*this, val, maxExpands);
518 return val;
519 }
520
521 int PropSet::GetInt(const char *key, int defaultValue) {
522 SString val = GetExpanded(key);
523 if (val.length())
524 return val.value();
525 return defaultValue;
526 }
527
528 bool isprefix(const char *target, const char *prefix) {
529 while (*target && *prefix) {
530 if (*target != *prefix)
531 return false;
532 target++;
533 prefix++;
534 }
535 if (*prefix)
536 return false;
537 else
538 return true;
539 }
540
541 static bool IsSuffix(const char *target, const char *suffix, bool caseSensitive) {
542 size_t lentarget = strlen(target);
543 size_t lensuffix = strlen(suffix);
544 if (lensuffix > lentarget)
545 return false;
546 if (caseSensitive) {
547 for (int i = static_cast<int>(lensuffix) - 1; i >= 0; i--) {
548 if (target[i + lentarget - lensuffix] != suffix[i])
549 return false;
550 }
551 } else {
552 for (int i = static_cast<int>(lensuffix) - 1; i >= 0; i--) {
553 if (MakeUpperCase(target[i + lentarget - lensuffix]) !=
554 MakeUpperCase(suffix[i]))
555 return false;
556 }
557 }
558 return true;
559 }
560
561 SString PropSet::GetWild(const char *keybase, const char *filename) {
562 for (int root = 0; root < hashRoots; root++) {
563 for (Property *p = props[root]; p; p = p->next) {
564 if (isprefix(p->key, keybase)) {
565 char * orgkeyfile = p->key + strlen(keybase);
566 char *keyfile = NULL;
567
568 if (strstr(orgkeyfile, "$(") == orgkeyfile) {
569 char *cpendvar = strchr(orgkeyfile, ')');
570 if (cpendvar) {
571 *cpendvar = '\0';
572 SString s = GetExpanded(orgkeyfile + 2);
573 *cpendvar = ')';
574 keyfile = StringDup(s.c_str());
575 }
576 }
577 char *keyptr = keyfile;
578
579 if (keyfile == NULL)
580 keyfile = orgkeyfile;
581
582 for (;;) {
583 char *del = strchr(keyfile, ';');
584 if (del == NULL)
585 del = keyfile + strlen(keyfile);
586 char delchr = *del;
587 *del = '\0';
588 if (*keyfile == '*') {
589 if (IsSuffix(filename, keyfile + 1, caseSensitiveFilenames)) {
590 *del = delchr;
591 delete []keyptr;
592 return p->val;
593 }
594 } else if (0 == strcmp(keyfile, filename)) {
595 *del = delchr;
596 delete []keyptr;
597 return p->val;
598 }
599 if (delchr == '\0')
600 break;
601 *del = delchr;
602 keyfile = del + 1;
603 }
604 delete []keyptr;
605
606 if (0 == strcmp(p->key, keybase)) {
607 return p->val;
608 }
609 }
610 }
611 }
612 if (superPS) {
613 // Failed here, so try in base property set
614 return superPS->GetWild(keybase, filename);
615 } else {
616 return "";
617 }
618 }
619
620
621
622 // GetNewExpand does not use Expand as it has to use GetWild with the filename for each
623 // variable reference found.
624 SString PropSet::GetNewExpand(const char *keybase, const char *filename) {
625 char *base = StringDup(GetWild(keybase, filename).c_str());
626 char *cpvar = strstr(base, "$(");
627 int maxExpands = 1000; // Avoid infinite expansion of recursive definitions
628 while (cpvar && (maxExpands > 0)) {
629 char *cpendvar = strchr(cpvar, ')');
630 if (cpendvar) {
631 int lenvar = cpendvar - cpvar - 2; // Subtract the $()
632 char *var = StringDup(cpvar + 2, lenvar);
633 SString val = GetWild(var, filename);
634 if (0 == strcmp(var, keybase))
635 val.clear(); // Self-references evaluate to empty string
636 size_t newlenbase = strlen(base) + val.length() - lenvar;
637 char *newbase = new char[newlenbase];
638 strncpy(newbase, base, cpvar - base);
639 strcpy(newbase + (cpvar - base), val.c_str());
640 strcpy(newbase + (cpvar - base) + val.length(), cpendvar + 1);
641 delete []var;
642 delete []base;
643 base = newbase;
644 }
645 cpvar = strstr(base, "$(");
646 maxExpands--;
647 }
648 SString sret = base;
649 delete []base;
650 return sret;
651 }
652
653 void PropSet::Clear() {
654 for (int root = 0; root < hashRoots; root++) {
655 Property *p = props[root];
656 while (p) {
657 Property *pNext = p->next;
658 p->hash = 0;
659 delete []p->key;
660 p->key = 0;
661 delete []p->val;
662 p->val = 0;
663 delete p;
664 p = pNext;
665 }
666 props[root] = 0;
667 }
668 }
669
670 char *PropSet::ToString() {
671 size_t len=0;
672 for (int r = 0; r < hashRoots; r++) {
673 for (Property *p = props[r]; p; p = p->next) {
674 len += strlen(p->key) + 1;
675 len += strlen(p->val) + 1;
676 }
677 }
678 if (len == 0)
679 len = 1; // Return as empty string
680 char *ret = new char [len];
681 if (ret) {
682 char *w = ret;
683 for (int root = 0; root < hashRoots; root++) {
684 for (Property *p = props[root]; p; p = p->next) {
685 strcpy(w, p->key);
686 w += strlen(p->key);
687 *w++ = '=';
688 strcpy(w, p->val);
689 w += strlen(p->val);
690 *w++ = '\n';
691 }
692 }
693 ret[len-1] = '\0';
694 }
695 return ret;
696 }
697
698 /**
699 * Initiate enumeration.
700 */
701 bool PropSet::GetFirst(char **key, char **val) {
702 for (int i = 0; i < hashRoots; i++) {
703 for (Property *p = props[i]; p; p = p->next) {
704 if (p) {
705 *key = p->key;
706 *val = p->val;
707 enumnext = p->next; // GetNext will begin here ...
708 enumhash = i; // ... in this block
709 return true;
710 }
711 }
712 }
713 return false;
714 }
715
716 /**
717 * Continue enumeration.
718 */
719 bool PropSet::GetNext(char ** key, char ** val) {
720 bool firstloop = true;
721
722 // search begins where we left it : in enumhash block
723 for (int i = enumhash; i < hashRoots; i++) {
724 if (!firstloop)
725 enumnext = props[i]; // Begin with first property in block
726 // else : begin where we left
727 firstloop = false;
728
729 for (Property *p = enumnext; p; p = p->next) {
730 if (p) {
731 *key = p->key;
732 *val = p->val;
733 enumnext = p->next; // for GetNext
734 enumhash = i;
735 return true;
736 }
737 }
738 }
739 return false;
740 }
741
742 /**
743 * Creates an array that points into each word in the string and puts \0 terminators
744 * after each word.
745 */
746 static char **ArrayFromWordList(char *wordlist, int *len, bool onlyLineEnds = false) {
747 int prev = '\n';
748 int words = 0;
749 // For rapid determination of whether a character is a separator, build
750 // a look up table.
751 bool wordSeparator[256];
752 for (int i=0;i<256; i++) {
753 wordSeparator[i] = false;
754 }
755 wordSeparator['\r'] = true;
756 wordSeparator['\n'] = true;
757 if (!onlyLineEnds) {
758 wordSeparator[' '] = true;
759 wordSeparator['\t'] = true;
760 }
761 for (int j = 0; wordlist[j]; j++) {
762 int curr = static_cast<unsigned char>(wordlist[j]);
763 if (!wordSeparator[curr] && wordSeparator[prev])
764 words++;
765 prev = curr;
766 }
767 char **keywords = new char *[words + 1];
768 if (keywords) {
769 words = 0;
770 prev = '\0';
771 size_t slen = strlen(wordlist);
772 for (size_t k = 0; k < slen; k++) {
773 if (!wordSeparator[static_cast<unsigned char>(wordlist[k])]) {
774 if (!prev) {
775 keywords[words] = &wordlist[k];
776 words++;
777 }
778 } else {
779 wordlist[k] = '\0';
780 }
781 prev = wordlist[k];
782 }
783 keywords[words] = &wordlist[slen];
784 *len = words;
785 } else {
786 *len = 0;
787 }
788 return keywords;
789 }
790
791 void WordList::Clear() {
792 if (words) {
793 delete []list;
794 delete []words;
795 delete []wordsNoCase;
796 }
797 words = 0;
798 wordsNoCase = 0;
799 list = 0;
800 len = 0;
801 sorted = false;
802 sortedNoCase = false;
803 }
804
805 void WordList::Set(const char *s) {
806 list = StringDup(s);
807 sorted = false;
808 sortedNoCase = false;
809 words = ArrayFromWordList(list, &len, onlyLineEnds);
810 wordsNoCase = new char * [len + 1];
811 memcpy(wordsNoCase, words, (len + 1) * sizeof (*words));
812 }
813
814 char *WordList::Allocate(int size) {
815 list = new char[size + 1];
816 list[size] = '\0';
817 return list;
818 }
819
820 void WordList::SetFromAllocated() {
821 sorted = false;
822 sortedNoCase = false;
823 words = ArrayFromWordList(list, &len, onlyLineEnds);
824 wordsNoCase = new char * [len + 1];
825 memcpy(wordsNoCase, words, (len + 1) * sizeof (*words));
826 }
827
828 int cmpString(const void *a1, const void *a2) {
829 // Can't work out the correct incantation to use modern casts here
830 return strcmp(*(char**)(a1), *(char**)(a2));
831 }
832
833 int cmpStringNoCase(const void *a1, const void *a2) {
834 // Can't work out the correct incantation to use modern casts here
835 return CompareCaseInsensitive(*(char**)(a1), *(char**)(a2));
836 }
837
838 static void SortWordList(char **words, unsigned int len) {
839 qsort(reinterpret_cast<void*>(words), len, sizeof(*words),
840 cmpString);
841 }
842
843 static void SortWordListNoCase(char **wordsNoCase, unsigned int len) {
844 qsort(reinterpret_cast<void*>(wordsNoCase), len, sizeof(*wordsNoCase),
845 cmpStringNoCase);
846 }
847
848 bool WordList::InList(const char *s) {
849 if (0 == words)
850 return false;
851 if (!sorted) {
852 sorted = true;
853 SortWordList(words, len);
854 for (unsigned int k = 0; k < (sizeof(starts) / sizeof(starts[0])); k++)
855 starts[k] = -1;
856 for (int l = len - 1; l >= 0; l--) {
857 unsigned char indexChar = words[l][0];
858 starts[indexChar] = l;
859 }
860 }
861 unsigned char firstChar = s[0];
862 int j = starts[firstChar];
863 if (j >= 0) {
864 while (words[j][0] == firstChar) {
865 if (s[1] == words[j][1]) {
866 const char *a = words[j] + 1;
867 const char *b = s + 1;
868 while (*a && *a == *b) {
869 a++;
870 b++;
871 }
872 if (!*a && !*b)
873 return true;
874 }
875 j++;
876 }
877 }
878 j = starts['^'];
879 if (j >= 0) {
880 while (words[j][0] == '^') {
881 const char *a = words[j] + 1;
882 const char *b = s;
883 while (*a && *a == *b) {
884 a++;
885 b++;
886 }
887 if (!*a)
888 return true;
889 j++;
890 }
891 }
892 return false;
893 }
894
895 /** similar to InList, but word s can be a substring of keyword.
896 * eg. the keyword define is defined as def~ine. This means the word must start
897 * with def to be a keyword, but also defi, defin and define are valid.
898 * The marker is ~ in this case.
899 */
900 bool WordList::InListAbbreviated(const char *s, const char marker) {
901 if (0 == words)
902 return false;
903 if (!sorted) {
904 sorted = true;
905 SortWordList(words, len);
906 for (unsigned int k = 0; k < (sizeof(starts) / sizeof(starts[0])); k++)
907 starts[k] = -1;
908 for (int l = len - 1; l >= 0; l--) {
909 unsigned char indexChar = words[l][0];
910 starts[indexChar] = l;
911 }
912 }
913 unsigned char firstChar = s[0];
914 int j = starts[firstChar];
915 if (j >= 0) {
916 while (words[j][0] == firstChar) {
917 bool isSubword = false;
918 int start = 1;
919 if (words[j][1] == marker) {
920 isSubword = true;
921 start++;
922 }
923 if (s[1] == words[j][start]) {
924 const char *a = words[j] + start;
925 const char *b = s + 1;
926 while (*a && *a == *b) {
927 a++;
928 if (*a == marker) {
929 isSubword = true;
930 a++;
931 }
932 b++;
933 }
934 if ((!*a || isSubword) && !*b)
935 return true;
936 }
937 j++;
938 }
939 }
940 j = starts['^'];
941 if (j >= 0) {
942 while (words[j][0] == '^') {
943 const char *a = words[j] + 1;
944 const char *b = s;
945 while (*a && *a == *b) {
946 a++;
947 b++;
948 }
949 if (!*a)
950 return true;
951 j++;
952 }
953 }
954 return false;
955 }
956
957 /**
958 * Returns an element (complete) of the wordlist array which has
959 * the same beginning as the passed string.
960 * The length of the word to compare is passed too.
961 * Letter case can be ignored or preserved (default).
962 */
963 const char *WordList::GetNearestWord(const char *wordStart, int searchLen, bool ignoreCase /*= false*/, SString wordCharacters /*='/0' */, int wordIndex /*= -1 */) {
964 int start = 0; // lower bound of the api array block to search
965 int end = len - 1; // upper bound of the api array block to search
966 int pivot; // index of api array element just being compared
967 int cond; // comparison result (in the sense of strcmp() result)
968 const char *word; // api array element just being compared
969
970 if (0 == words)
971 return NULL;
972 if (ignoreCase) {
973 if (!sortedNoCase) {
974 sortedNoCase = true;
975 SortWordListNoCase(wordsNoCase, len);
976 }
977 while (start <= end) { // binary searching loop
978 pivot = (start + end) >> 1;
979 word = wordsNoCase[pivot];
980 cond = CompareNCaseInsensitive(wordStart, word, searchLen);
981 if (!cond) {
982 // find first word
983 start = pivot;
984 while (start > 0 && !CompareNCaseInsensitive(wordStart, wordsNoCase[start-1], searchLen)) {
985 start--;
986 }
987 // find last word
988 end = pivot;
989 while (end < len-1 && !CompareNCaseInsensitive(wordStart, wordsNoCase[end+1], searchLen)) {
990 end++;
991 }
992
993 // Finds first word in a series of equal words
994 for (pivot = start; pivot <= end; pivot++) {
995 word = wordsNoCase[pivot];
996 if (!wordCharacters.contains(word[searchLen])) {
997 if (wordIndex <= 0) // Checks if a specific index was requested
998 return word; // result must not be freed with free()
999 wordIndex--;
1000 }
1001 }
1002 return NULL;
1003 }
1004 else if (cond > 0)
1005 start = pivot + 1;
1006 else if (cond < 0)
1007 end = pivot - 1;
1008 }
1009 } else { // preserve the letter case
1010 if (!sorted) {
1011 sorted = true;
1012 SortWordList(words, len);
1013 }
1014 while (start <= end) { // binary searching loop
1015 pivot = (start + end) >> 1;
1016 word = words[pivot];
1017 cond = strncmp(wordStart, word, searchLen);
1018 if (!cond) {
1019 // find first word
1020 start = pivot;
1021 while (start > 0 && !strncmp(wordStart, words[start-1], searchLen)) {
1022 start--;
1023 }
1024 // find last word
1025 end = pivot;
1026 while (end < len-1 && !strncmp(wordStart, words[end+1], searchLen)) {
1027 end++;
1028 }
1029
1030 // Finds first word in a series of equal words
1031 pivot = start;
1032 while (pivot <= end) {
1033 word = words[pivot];
1034 if (!wordCharacters.contains(word[searchLen])) {
1035 if (wordIndex <= 0) // Checks if a specific index was requested
1036 return word; // result must not be freed with free()
1037 wordIndex--;
1038 }
1039 pivot++;
1040 }
1041 return NULL;
1042 }
1043 else if (cond > 0)
1044 start = pivot + 1;
1045 else if (cond < 0)
1046 end = pivot - 1;
1047 }
1048 }
1049 return NULL;
1050 }
1051
1052 /**
1053 * Find the length of a 'word' which is actually an identifier in a string
1054 * which looks like "identifier(..." or "identifier" and where
1055 * there may be extra spaces after the identifier that should not be
1056 * counted in the length.
1057 */
1058 static unsigned int LengthWord(const char *word, char otherSeparator) {
1059 // Find a '('. If that fails go to the end of the string.
1060 const char *endWord = strchr(word, '(');
1061 if (!endWord && otherSeparator)
1062 endWord = strchr(word, otherSeparator);
1063 if (!endWord)
1064 endWord = word + strlen(word);
1065 // Last case always succeeds so endWord != 0
1066
1067 // Drop any space characters.
1068 if (endWord > word) {
1069 endWord--; // Back from the '(', otherSeparator, or '\0'
1070 // Move backwards over any spaces
1071 while ((endWord > word) && (IsASpace(*endWord))) {
1072 endWord--;
1073 }
1074 }
1075 return endWord - word;
1076 }
1077
1078 /**
1079 * Returns elements (first words of them) of the wordlist array which have
1080 * the same beginning as the passed string.
1081 * The length of the word to compare is passed too.
1082 * Letter case can be ignored or preserved (default).
1083 * If there are more words meeting the condition they are returned all of
1084 * them in the ascending order separated with spaces.
1085 *
1086 * NOTE: returned buffer has to be freed with delete[].
1087 */
1088 char *WordList::GetNearestWords(
1089 const char *wordStart,
1090 int searchLen,
1091 bool ignoreCase /*= false*/,
1092 char otherSeparator /*= '\0'*/,
1093 bool exactLen /*=false*/) {
1094 unsigned int wordlen; // length of the word part (before the '(' brace) of the api array element
1095 SString wordsNear;
1096 wordsNear.setsizegrowth(1000);
1097 int start = 0; // lower bound of the api array block to search
1098 int end = len - 1; // upper bound of the api array block to search
1099 int pivot; // index of api array element just being compared
1100 int cond; // comparison result (in the sense of strcmp() result)
1101
1102 if (0 == words)
1103 return NULL;
1104 if (ignoreCase) {
1105 if (!sortedNoCase) {
1106 sortedNoCase = true;
1107 SortWordListNoCase(wordsNoCase, len);
1108 }
1109 while (start <= end) { // Binary searching loop
1110 pivot = (start + end) / 2;
1111 cond = CompareNCaseInsensitive(wordStart, wordsNoCase[pivot], searchLen);
1112 if (!cond) {
1113 // Find first match
1114 while ((pivot > start) &&
1115 (0 == CompareNCaseInsensitive(wordStart,
1116 wordsNoCase[pivot-1], searchLen))) {
1117 --pivot;
1118 }
1119 // Grab each match
1120 while ((pivot <= end) &&
1121 (0 == CompareNCaseInsensitive(wordStart,
1122 wordsNoCase[pivot], searchLen))) {
1123 wordlen = LengthWord(wordsNoCase[pivot], otherSeparator) + 1;
1124 ++pivot;
1125 if (exactLen && wordlen != LengthWord(wordStart, otherSeparator) + 1)
1126 continue;
1127 wordsNear.append(wordsNoCase[pivot-1], wordlen, ' ');
1128 }
1129 return wordsNear.detach();
1130 } else if (cond < 0) {
1131 end = pivot - 1;
1132 } else if (cond > 0) {
1133 start = pivot + 1;
1134 }
1135 }
1136 } else { // Preserve the letter case
1137 if (!sorted) {
1138 sorted = true;
1139 SortWordList(words, len);
1140 }
1141 while (start <= end) { // Binary searching loop
1142 pivot = (start + end) / 2;
1143 cond = strncmp(wordStart, words[pivot], searchLen);
1144 if (!cond) {
1145 // Find first match
1146 while ((pivot > start) &&
1147 (0 == strncmp(wordStart,
1148 words[pivot-1], searchLen))) {
1149 --pivot;
1150 }
1151 // Grab each match
1152 while ((pivot <= end) &&
1153 (0 == strncmp(wordStart,
1154 words[pivot], searchLen))) {
1155 wordlen = LengthWord(words[pivot], otherSeparator) + 1;
1156 ++pivot;
1157 if (exactLen && wordlen != LengthWord(wordStart, otherSeparator) + 1)
1158 continue;
1159 wordsNear.append(words[pivot-1], wordlen, ' ');
1160 }
1161 return wordsNear.detach();
1162 } else if (cond < 0) {
1163 end = pivot - 1;
1164 } else if (cond > 0) {
1165 start = pivot + 1;
1166 }
1167 }
1168 }
1169 return NULL;
1170 }