]> git.saurik.com Git - wxWidgets.git/blob - contrib/src/stc/scintilla/src/PropSet.cxx
A fix for Scintilla's case insensitive sort routine
[wxWidgets.git] / contrib / src / stc / scintilla / src / PropSet.cxx
1 // SciTE - Scintilla based Text Editor
2 /** @file PropSet.cxx
3 ** A Java style properties file module.
4 **/
5 // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7
8 // Maintain a dictionary of properties
9
10 #include <stdlib.h>
11 #include <string.h>
12 #include <ctype.h>
13 #include <stdio.h>
14
15 #include "Platform.h"
16
17 #include "PropSet.h"
18
19 // The comparison and case changing functions here assume ASCII
20 // or extended ASCII such as the normal Windows code page.
21
22 static inline char MakeUpperCase(char ch) {
23 if (ch < 'a' || ch > 'z')
24 return ch;
25 else
26 return static_cast<char>(ch - 'a' + 'A');
27 }
28
29 static inline bool IsLetter(char ch) {
30 return ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'));
31 }
32
33
34 int CompareCaseInsensitive(const char *a, const char *b) {
35 while (*a && *b) {
36 if (*a != *b) {
37 if (IsLetter(*a) && IsLetter(*b)) {
38 char upperA = MakeUpperCase(*a);
39 char upperB = MakeUpperCase(*b);
40 if (upperA != upperB)
41 return upperA - upperB;
42 }
43 else {
44 return *a - *b;
45 }
46 }
47 a++;
48 b++;
49 }
50 // Either *a or *b is nul
51 return *a - *b;
52 }
53
54 int CompareNCaseInsensitive(const char *a, const char *b, int len) {
55 while (*a && *b && len) {
56 if (*a != *b) {
57 if (IsLetter(*a) && IsLetter(*b)) {
58 char upperA = MakeUpperCase(*a);
59 char upperB = MakeUpperCase(*b);
60 if (upperA != upperB)
61 return upperA - upperB;
62 }
63 else {
64 return *a - *b;
65 }
66 }
67 a++;
68 b++;
69 len--;
70 }
71 if (len == 0)
72 return 0;
73 else
74 // Either *a or *b is nul
75 return *a - *b;
76 }
77
78 bool EqualCaseInsensitive(const char *a, const char *b) {
79 return 0 == CompareCaseInsensitive(a, b);
80 }
81
82 inline unsigned int HashString(const char *s, int len) {
83 unsigned int ret = 0;
84 while (len--) {
85 ret <<= 4;
86 ret ^= *s;
87 s++;
88 }
89 return ret;
90 }
91
92 PropSet::PropSet() {
93 superPS = 0;
94 for (int root = 0; root < hashRoots; root++)
95 props[root] = 0;
96 }
97
98 PropSet::~PropSet() {
99 superPS = 0;
100 Clear();
101 }
102
103 void PropSet::Set(const char *key, const char *val, int lenKey, int lenVal) {
104 if (!*key) // Empty keys are not supported
105 return;
106 if (lenKey == -1)
107 lenKey = strlen(key);
108 if (lenVal == -1)
109 lenVal = strlen(val);
110 unsigned int hash = HashString(key, lenKey);
111 for (Property *p = props[hash % hashRoots]; p; p = p->next) {
112 if ((hash == p->hash) &&
113 ((strlen(p->key) == static_cast<unsigned int>(lenKey)) &&
114 (0 == strncmp(p->key, key, lenKey)))) {
115 // Replace current value
116 delete [](p->val);
117 p->val = StringDup(val, lenVal);
118 return ;
119 }
120 }
121 // Not found
122 Property *pNew = new Property;
123 if (pNew) {
124 pNew->hash = hash;
125 pNew->key = StringDup(key, lenKey);
126 pNew->val = StringDup(val, lenVal);
127 pNew->next = props[hash % hashRoots];
128 props[hash % hashRoots] = pNew;
129 }
130 }
131
132 void PropSet::Set(const char *keyVal) {
133 while (isspace(*keyVal))
134 keyVal++;
135 const char *endVal = keyVal;
136 while (*endVal && (*endVal != '\n'))
137 endVal++;
138 const char *eqAt = strchr(keyVal, '=');
139 if (eqAt) {
140 Set(keyVal, eqAt + 1, eqAt-keyVal, endVal - eqAt - 1);
141 } else if (*keyVal) { // No '=' so assume '=1'
142 Set(keyVal, "1", endVal-keyVal, 1);
143 }
144 }
145
146 void PropSet::SetMultiple(const char *s) {
147 const char *eol = strchr(s, '\n');
148 while (eol) {
149 Set(s);
150 s = eol + 1;
151 eol = strchr(s, '\n');
152 }
153 Set(s);
154 }
155
156 SString PropSet::Get(const char *key) {
157 unsigned int hash = HashString(key, strlen(key));
158 for (Property *p = props[hash % hashRoots]; p; p = p->next) {
159 if ((hash == p->hash) && (0 == strcmp(p->key, key))) {
160 return p->val;
161 }
162 }
163 if (superPS) {
164 // Failed here, so try in base property set
165 return superPS->Get(key);
166 } else {
167 return "";
168 }
169 }
170
171 static bool IncludesVar(const char *value, const char *key) {
172 const char *var = strstr(value, "$(");
173 while (var) {
174 if (isprefix(var + 2, key) && (var[2 + strlen(key)] == ')')) {
175 // Found $(key) which would lead to an infinite loop so exit
176 return true;
177 }
178 var = strstr(var + 2, ")");
179 if (var)
180 var = strstr(var + 1, "$(");
181 }
182 return false;
183 }
184
185 SString PropSet::GetExpanded(const char *key) {
186 SString val = Get(key);
187 if (IncludesVar(val.c_str(), key))
188 return val;
189 return Expand(val.c_str());
190 }
191
192 SString PropSet::Expand(const char *withVars) {
193 char *base = StringDup(withVars);
194 char *cpvar = strstr(base, "$(");
195 while (cpvar) {
196 char *cpendvar = strchr(cpvar, ')');
197 if (cpendvar) {
198 int lenvar = cpendvar - cpvar - 2; // Subtract the $()
199 char *var = StringDup(cpvar + 2, lenvar);
200 SString val = GetExpanded(var);
201 int newlenbase = strlen(base) + val.length() - lenvar;
202 char *newbase = new char[newlenbase];
203 strncpy(newbase, base, cpvar - base);
204 strcpy(newbase + (cpvar - base), val.c_str());
205 strcpy(newbase + (cpvar - base) + val.length(), cpendvar + 1);
206 delete []var;
207 delete []base;
208 base = newbase;
209 }
210 cpvar = strstr(base, "$(");
211 }
212 SString sret = base;
213 delete []base;
214 return sret;
215 }
216
217 int PropSet::GetInt(const char *key, int defaultValue) {
218 SString val = GetExpanded(key);
219 if (val.length())
220 return val.value();
221 return defaultValue;
222 }
223
224 bool isprefix(const char *target, const char *prefix) {
225 while (*target && *prefix) {
226 if (*target != *prefix)
227 return false;
228 target++;
229 prefix++;
230 }
231 if (*prefix)
232 return false;
233 else
234 return true;
235 }
236
237 static bool IsSuffixCaseInsensitive(const char *target, const char *suffix) {
238 int lentarget = strlen(target);
239 int lensuffix = strlen(suffix);
240 if (lensuffix > lentarget)
241 return false;
242 for (int i = lensuffix - 1; i >= 0; i--) {
243 if (MakeUpperCase(target[i + lentarget - lensuffix]) !=
244 MakeUpperCase(suffix[i]))
245 return false;
246 }
247 return true;
248 }
249
250 SString PropSet::GetWild(const char *keybase, const char *filename) {
251 for (int root = 0; root < hashRoots; root++) {
252 for (Property *p = props[root]; p; p = p->next) {
253 if (isprefix(p->key, keybase)) {
254 char * orgkeyfile = p->key + strlen(keybase);
255 char *keyfile = NULL;
256
257 if (strstr(orgkeyfile, "$(") == orgkeyfile) {
258 char *cpendvar = strchr(orgkeyfile, ')');
259 if (cpendvar) {
260 *cpendvar = '\0';
261 SString s = GetExpanded(orgkeyfile + 2);
262 *cpendvar = ')';
263 keyfile = StringDup(s.c_str());
264 }
265 }
266 char *keyptr = keyfile;
267
268 if (keyfile == NULL)
269 keyfile = orgkeyfile;
270
271 for (; ; ) {
272 char *del = strchr(keyfile, ';');
273 if (del == NULL)
274 del = keyfile + strlen(keyfile);
275 char delchr = *del;
276 *del = '\0';
277 if (*keyfile == '*') {
278 if (IsSuffixCaseInsensitive(filename, keyfile + 1)) {
279 *del = delchr;
280 delete []keyptr;
281 return p->val;
282 }
283 } else if (0 == strcmp(keyfile, filename)) {
284 *del = delchr;
285 delete []keyptr;
286 return p->val;
287 }
288 if (delchr == '\0')
289 break;
290 *del = delchr;
291 keyfile = del + 1;
292 }
293 delete []keyptr;
294
295 if (0 == strcmp(p->key, keybase)) {
296 return p->val;
297 }
298 }
299 }
300 }
301 if (superPS) {
302 // Failed here, so try in base property set
303 return superPS->GetWild(keybase, filename);
304 } else {
305 return "";
306 }
307 }
308
309 // GetNewExpand does not use Expand as it has to use GetWild with the filename for each
310 // variable reference found.
311 SString PropSet::GetNewExpand(const char *keybase, const char *filename) {
312 char *base = StringDup(GetWild(keybase, filename).c_str());
313 char *cpvar = strstr(base, "$(");
314 while (cpvar) {
315 char *cpendvar = strchr(cpvar, ')');
316 if (cpendvar) {
317 int lenvar = cpendvar - cpvar - 2; // Subtract the $()
318 char *var = StringDup(cpvar + 2, lenvar);
319 SString val = GetWild(var, filename);
320 int newlenbase = strlen(base) + val.length() - lenvar;
321 char *newbase = new char[newlenbase];
322 strncpy(newbase, base, cpvar - base);
323 strcpy(newbase + (cpvar - base), val.c_str());
324 strcpy(newbase + (cpvar - base) + val.length(), cpendvar + 1);
325 delete []var;
326 delete []base;
327 base = newbase;
328 }
329 cpvar = strstr(base, "$(");
330 }
331 SString sret = base;
332 delete []base;
333 return sret;
334 }
335
336 void PropSet::Clear() {
337 for (int root = 0; root < hashRoots; root++) {
338 Property *p = props[root];
339 while (p) {
340 Property *pNext = p->next;
341 p->hash = 0;
342 delete p->key;
343 p->key = 0;
344 delete p->val;
345 p->val = 0;
346 delete p;
347 p = pNext;
348 }
349 props[root] = 0;
350 }
351 }
352
353 char *PropSet::ToString() {
354 unsigned int len=0;
355 for (int r = 0; r < hashRoots; r++) {
356 for (Property *p = props[r]; p; p = p->next) {
357 len += strlen(p->key) + 1;
358 len += strlen(p->val) + 1;
359 }
360 }
361 if (len == 0)
362 len = 1; // Return as empty string
363 char *ret = new char [len];
364 if (ret) {
365 char *w = ret;
366 for (int root = 0; root < hashRoots; root++) {
367 for (Property *p = props[root]; p; p = p->next) {
368 strcpy(w, p->key);
369 w += strlen(p->key);
370 *w++ = '=';
371 strcpy(w, p->val);
372 w += strlen(p->val);
373 *w++ = '\n';
374 }
375 }
376 ret[len-1] = '\0';
377 }
378 return ret;
379 }
380
381 /**
382 * Initiate enumeration.
383 */
384 bool PropSet::GetFirst(char **key, char **val) {
385 for (int i = 0; i < hashRoots; i++) {
386 for (Property *p = props[i]; p; p = p->next) {
387 if (p) {
388 *key = p->key;
389 *val = p->val;
390 enumnext = p->next; // GetNext will begin here ...
391 enumhash = i; // ... in this block
392 return true;
393 }
394 }
395 }
396 return false;
397 }
398
399 /**
400 * Continue enumeration.
401 */
402 bool PropSet::GetNext(char ** key, char ** val) {
403 bool firstloop = true;
404
405 // search begins where we left it : in enumhash block
406 for (int i = enumhash; i < hashRoots; i++) {
407 if (!firstloop)
408 enumnext = props[i]; // Begin with first property in block
409 // else : begin where we left
410 firstloop = false;
411
412 for (Property *p = enumnext; p; p = p->next) {
413 if (p) {
414 *key = p->key;
415 *val = p->val;
416 enumnext = p->next; // for GetNext
417 enumhash = i;
418 return true;
419 }
420 }
421 }
422 return false;
423 }
424
425 static bool iswordsep(char ch, bool onlyLineEnds) {
426 if (!isspace(ch))
427 return false;
428 if (!onlyLineEnds)
429 return true;
430 return ch == '\r' || ch == '\n';
431 }
432
433 /**
434 * Creates an array that points into each word in the string and puts \0 terminators
435 * after each word.
436 */
437 static char **ArrayFromWordList(char *wordlist, int *len, bool onlyLineEnds = false) {
438 char prev = '\n';
439 int words = 0;
440 for (int j = 0; wordlist[j]; j++) {
441 if (!iswordsep(wordlist[j], onlyLineEnds) && iswordsep(prev, onlyLineEnds))
442 words++;
443 prev = wordlist[j];
444 }
445 char **keywords = new char * [words + 1];
446 if (keywords) {
447 words = 0;
448 prev = '\0';
449 int slen = strlen(wordlist);
450 for (int k = 0; k < slen; k++) {
451 if (!iswordsep(wordlist[k], onlyLineEnds)) {
452 if (!prev) {
453 keywords[words] = &wordlist[k];
454 words++;
455 }
456 } else {
457 wordlist[k] = '\0';
458 }
459 prev = wordlist[k];
460 }
461 keywords[words] = &wordlist[slen];
462 *len = words;
463 } else {
464 *len = 0;
465 }
466 return keywords;
467 }
468
469 void WordList::Clear() {
470 if (words) {
471 delete []list;
472 delete []words;
473 delete []wordsNoCase;
474 }
475 words = 0;
476 wordsNoCase = 0;
477 list = 0;
478 len = 0;
479 sorted = false;
480 }
481
482 void WordList::Set(const char *s) {
483 list = StringDup(s);
484 sorted = false;
485 words = ArrayFromWordList(list, &len, onlyLineEnds);
486 wordsNoCase = new char * [len + 1];
487 memcpy(wordsNoCase, words, (len + 1) * sizeof (*words));
488 }
489
490 char *WordList::Allocate(int size) {
491 list = new char[size + 1];
492 list[size] = '\0';
493 return list;
494 }
495
496 void WordList::SetFromAllocated() {
497 sorted = false;
498 words = ArrayFromWordList(list, &len, onlyLineEnds);
499 wordsNoCase = new char * [len + 1];
500 memcpy(wordsNoCase, words, (len + 1) * sizeof (*words));
501 }
502
503 int cmpString(const void *a1, const void *a2) {
504 // Can't work out the correct incantation to use modern casts here
505 return strcmp(*(char**)(a1), *(char**)(a2));
506 }
507
508 int cmpStringNoCase(const void *a1, const void *a2) {
509 // Can't work out the correct incantation to use modern casts here
510 return CompareCaseInsensitive(*(char**)(a1), *(char**)(a2));
511 }
512
513 static void SortWordList(char **words, char **wordsNoCase, unsigned int len) {
514 qsort(reinterpret_cast<void*>(words), len, sizeof(*words),
515 cmpString);
516 qsort(reinterpret_cast<void*>(wordsNoCase), len, sizeof(*wordsNoCase),
517 cmpStringNoCase);
518 }
519
520 bool WordList::InList(const char *s) {
521 if (0 == words)
522 return false;
523 if (!sorted) {
524 sorted = true;
525 SortWordList(words, wordsNoCase, len);
526 for (unsigned int k = 0; k < (sizeof(starts) / sizeof(starts[0])); k++)
527 starts[k] = -1;
528 for (int l = len - 1; l >= 0; l--) {
529 unsigned char indexChar = words[l][0];
530 starts[indexChar] = l;
531 }
532 }
533 unsigned char firstChar = s[0];
534 int j = starts[firstChar];
535 if (j >= 0) {
536 while (words[j][0] == firstChar) {
537 if (s[1] == words[j][1]) {
538 const char *a = words[j] + 1;
539 const char *b = s + 1;
540 while (*a && *a == *b) {
541 a++;
542 b++;
543 }
544 if (!*a && !*b)
545 return true;
546 }
547 j++;
548 }
549 }
550 j = starts['^'];
551 if (j >= 0) {
552 while (words[j][0] == '^') {
553 const char *a = words[j] + 1;
554 const char *b = s;
555 while (*a && *a == *b) {
556 a++;
557 b++;
558 }
559 if (!*a)
560 return true;
561 j++;
562 }
563 }
564 return false;
565 }
566
567 /**
568 * Returns an element (complete) of the wordlist array which has
569 * the same beginning as the passed string.
570 * The length of the word to compare is passed too.
571 * Letter case can be ignored or preserved (default).
572 */
573 const char *WordList::GetNearestWord(const char *wordStart, int searchLen /*= -1*/, bool ignoreCase /*= false*/) {
574 int start = 0; // lower bound of the api array block to search
575 int end = len - 1; // upper bound of the api array block to search
576 int pivot; // index of api array element just being compared
577 int cond; // comparison result (in the sense of strcmp() result)
578 const char *word; // api array element just being compared
579
580 if (0 == words)
581 return NULL;
582 if (!sorted) {
583 sorted = true;
584 SortWordList(words, wordsNoCase, len);
585 }
586 if (ignoreCase) {
587 while (start <= end) { // binary searching loop
588 pivot = (start + end) >> 1;
589 word = wordsNoCase[pivot];
590 cond = CompareNCaseInsensitive(wordStart, word, searchLen);
591 if (!cond && nonFuncChar(word[searchLen])) // maybe there should be a "non-word character" test here?
592 return word; // result must not be freed with free()
593 else if (cond > 0)
594 start = pivot + 1;
595 else if (cond <= 0)
596 end = pivot - 1;
597 }
598 } else { // preserve the letter case
599 while (start <= end) { // binary searching loop
600 pivot = (start + end) >> 1;
601 word = words[pivot];
602 cond = strncmp(wordStart, word, searchLen);
603 if (!cond && nonFuncChar(word[searchLen])) // maybe there should be a "non-word character" test here?
604 return word; // result must not be freed with free()
605 else if (cond > 0)
606 start = pivot + 1;
607 else if (cond <= 0)
608 end = pivot - 1;
609 }
610 }
611 return NULL;
612 }
613
614 /**
615 * Find the length of a 'word' which is actually an identifier in a string
616 * which looks like "identifier(..." or "identifier:" or "identifier" and where
617 * there may be extra spaces after the identifier that should not be
618 * counted in the length.
619 */
620 static unsigned int LengthWord(const char *word, char otherSeparator) {
621 // Find a '(', or ':'. If that fails go to the end of the string.
622 const char *endWord = strchr(word, '(');
623 if (!endWord)
624 endWord = strchr(word, ':');
625 if (!endWord && otherSeparator)
626 endWord = strchr(word, otherSeparator);
627 if (!endWord)
628 endWord = word + strlen(word);
629 // Last case always succeeds so endWord != 0
630
631 // Drop any space characters.
632 if (endWord > word) {
633 endWord--; // Back from the '(', ':', or '\0'
634 // Move backwards over any spaces
635 while ((endWord > word) && (isspace(*endWord))) {
636 endWord--;
637 }
638 }
639 return endWord - word;
640 }
641
642 /**
643 * Returns elements (first words of them) of the wordlist array which have
644 * the same beginning as the passed string.
645 * The length of the word to compare is passed too.
646 * Letter case can be ignored or preserved (default).
647 * If there are more words meeting the condition they are returned all of
648 * them in the ascending order separated with spaces.
649 *
650 * NOTE: returned buffer has to be freed with delete[].
651 */
652 char *WordList::GetNearestWords(
653 const char *wordStart,
654 int searchLen /*= -1*/,
655 bool ignoreCase /*= false*/,
656 char otherSeparator /*= '\0'*/) {
657 int wordlen; // length of the word part (before the '(' brace) of the api array element
658 SString wordsNear;
659 wordsNear.setsizegrowth(1000);
660 int start = 0; // lower bound of the api array block to search
661 int end = len - 1; // upper bound of the api array block to search
662 int pivot; // index of api array element just being compared
663 int cond; // comparison result (in the sense of strcmp() result)
664
665 if (0 == words)
666 return NULL;
667 if (!sorted) {
668 sorted = true;
669 SortWordList(words, wordsNoCase, len);
670 }
671 if (ignoreCase) {
672 while (start <= end) { // Binary searching loop
673 pivot = (start + end) / 2;
674 cond = CompareNCaseInsensitive(wordStart, wordsNoCase[pivot], searchLen);
675 if (!cond) {
676 // Find first match
677 while ((pivot > start) &&
678 (0 == CompareNCaseInsensitive(wordStart,
679 wordsNoCase[pivot-1], searchLen))) {
680 --pivot;
681 }
682 // Grab each match
683 while ((pivot <= end) &&
684 (0 == CompareNCaseInsensitive(wordStart,
685 wordsNoCase[pivot], searchLen))) {
686 wordlen = LengthWord(wordsNoCase[pivot], otherSeparator) + 1;
687 wordsNear.append(wordsNoCase[pivot], wordlen, ' ');
688 ++pivot;
689 }
690 return wordsNear.detach();
691 } else if (cond < 0) {
692 end = pivot - 1;
693 } else if (cond > 0) {
694 start = pivot + 1;
695 }
696 }
697 } else { // Preserve the letter case
698 while (start <= end) { // Binary searching loop
699 pivot = (start + end) / 2;
700 cond = strncmp(wordStart, words[pivot], searchLen);
701 if (!cond) {
702 // Find first match
703 while ((pivot > start) &&
704 (0 == strncmp(wordStart,
705 words[pivot-1], searchLen))) {
706 --pivot;
707 }
708 // Grab each match
709 while ((pivot <= end) &&
710 (0 == strncmp(wordStart,
711 words[pivot], searchLen))) {
712 wordlen = LengthWord(words[pivot], otherSeparator) + 1;
713 wordsNear.append(words[pivot], wordlen, ' ');
714 ++pivot;
715 }
716 return wordsNear.detach();
717 } else if (cond < 0) {
718 end = pivot - 1;
719 } else if (cond > 0) {
720 start = pivot + 1;
721 }
722 }
723 }
724 return NULL;
725 }