]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * regc_locale.c -- | |
3 | * | |
4 | * This file contains locale-specific regexp routines. | |
5 | * This file is #included by regcomp.c. | |
6 | * | |
7 | * Copyright (c) 1998 by Scriptics Corporation. | |
8 | * | |
9 | * This software is copyrighted by the Regents of the University of | |
10 | * California, Sun Microsystems, Inc., Scriptics Corporation, ActiveState | |
11 | * Corporation and other parties. The following terms apply to all files | |
12 | * associated with the software unless explicitly disclaimed in | |
13 | * individual files. | |
14 | * | |
15 | * The authors hereby grant permission to use, copy, modify, distribute, | |
16 | * and license this software and its documentation for any purpose, provided | |
17 | * that existing copyright notices are retained in all copies and that this | |
18 | * notice is included verbatim in any distributions. No written agreement, | |
19 | * license, or royalty fee is required for any of the authorized uses. | |
20 | * Modifications to this software may be copyrighted by their authors | |
21 | * and need not follow the licensing terms described here, provided that | |
22 | * the new terms are clearly indicated on the first page of each file where | |
23 | * they apply. | |
24 | * | |
25 | * IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY | |
26 | * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES | |
27 | * ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY | |
28 | * DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE | |
29 | * POSSIBILITY OF SUCH DAMAGE. | |
30 | * | |
31 | * THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES, | |
32 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, | |
33 | * FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE | |
34 | * IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE | |
35 | * NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR | |
36 | * MODIFICATIONS. | |
37 | * | |
38 | * GOVERNMENT USE: If you are acquiring this software on behalf of the | |
39 | * U.S. government, the Government shall have only "Restricted Rights" | |
40 | * in the software and related documentation as defined in the Federal | |
41 | * Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you | |
42 | * are acquiring the software on behalf of the Department of Defense, the | |
43 | * software shall be classified as "Commercial Computer Software" and the | |
44 | * Government shall have only "Restricted Rights" as defined in Clause | |
45 | * 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the | |
46 | * authors grant the U.S. Government and others acting in its behalf | |
47 | * permission to use and distribute the software in accordance with the | |
48 | * terms specified in this license. | |
49 | * | |
50 | * $Header$ | |
51 | */ | |
52 | ||
53 | int char_and_wchar_strncmp (const char* cp, const wx_wchar* wp, size_t nNum) | |
54 | { | |
55 | while(*cp++ == (const char)*wp++ && --nNum){} | |
56 | ||
57 | return nNum; | |
58 | } | |
59 | ||
60 | /* ASCII character-name table */ | |
61 | ||
62 | static struct cname | |
63 | { | |
64 | char *name; | |
65 | char code; | |
66 | } cnames[] = | |
67 | ||
68 | { | |
69 | { | |
70 | "NUL", '\0' | |
71 | }, | |
72 | { | |
73 | "SOH", '\001' | |
74 | }, | |
75 | { | |
76 | "STX", '\002' | |
77 | }, | |
78 | { | |
79 | "ETX", '\003' | |
80 | }, | |
81 | { | |
82 | "EOT", '\004' | |
83 | }, | |
84 | { | |
85 | "ENQ", '\005' | |
86 | }, | |
87 | { | |
88 | "ACK", '\006' | |
89 | }, | |
90 | { | |
91 | "BEL", '\007' | |
92 | }, | |
93 | { | |
94 | "alert", '\007' | |
95 | }, | |
96 | { | |
97 | "BS", '\010' | |
98 | }, | |
99 | { | |
100 | "backspace", '\b' | |
101 | }, | |
102 | { | |
103 | "HT", '\011' | |
104 | }, | |
105 | { | |
106 | "tab", '\t' | |
107 | }, | |
108 | { | |
109 | "LF", '\012' | |
110 | }, | |
111 | { | |
112 | "newline", '\n' | |
113 | }, | |
114 | { | |
115 | "VT", '\013' | |
116 | }, | |
117 | { | |
118 | "vertical-tab", '\v' | |
119 | }, | |
120 | { | |
121 | "FF", '\014' | |
122 | }, | |
123 | { | |
124 | "form-feed", '\f' | |
125 | }, | |
126 | { | |
127 | "CR", '\015' | |
128 | }, | |
129 | { | |
130 | "carriage-return", '\r' | |
131 | }, | |
132 | { | |
133 | "SO", '\016' | |
134 | }, | |
135 | { | |
136 | "SI", '\017' | |
137 | }, | |
138 | { | |
139 | "DLE", '\020' | |
140 | }, | |
141 | { | |
142 | "DC1", '\021' | |
143 | }, | |
144 | { | |
145 | "DC2", '\022' | |
146 | }, | |
147 | { | |
148 | "DC3", '\023' | |
149 | }, | |
150 | { | |
151 | "DC4", '\024' | |
152 | }, | |
153 | { | |
154 | "NAK", '\025' | |
155 | }, | |
156 | { | |
157 | "SYN", '\026' | |
158 | }, | |
159 | { | |
160 | "ETB", '\027' | |
161 | }, | |
162 | { | |
163 | "CAN", '\030' | |
164 | }, | |
165 | { | |
166 | "EM", '\031' | |
167 | }, | |
168 | { | |
169 | "SUB", '\032' | |
170 | }, | |
171 | { | |
172 | "ESC", '\033' | |
173 | }, | |
174 | { | |
175 | "IS4", '\034' | |
176 | }, | |
177 | { | |
178 | "FS", '\034' | |
179 | }, | |
180 | { | |
181 | "IS3", '\035' | |
182 | }, | |
183 | { | |
184 | "GS", '\035' | |
185 | }, | |
186 | { | |
187 | "IS2", '\036' | |
188 | }, | |
189 | { | |
190 | "RS", '\036' | |
191 | }, | |
192 | { | |
193 | "IS1", '\037' | |
194 | }, | |
195 | { | |
196 | "US", '\037' | |
197 | }, | |
198 | { | |
199 | "space", ' ' | |
200 | }, | |
201 | { | |
202 | "exclamation-mark", '!' | |
203 | }, | |
204 | { | |
205 | "quotation-mark", '"' | |
206 | }, | |
207 | { | |
208 | "number-sign", '#' | |
209 | }, | |
210 | { | |
211 | "dollar-sign", '$' | |
212 | }, | |
213 | { | |
214 | "percent-sign", '%' | |
215 | }, | |
216 | { | |
217 | "ampersand", '&' | |
218 | }, | |
219 | { | |
220 | "apostrophe", '\'' | |
221 | }, | |
222 | { | |
223 | "left-parenthesis", '(' | |
224 | }, | |
225 | { | |
226 | "right-parenthesis", ')' | |
227 | }, | |
228 | { | |
229 | "asterisk", '*' | |
230 | }, | |
231 | { | |
232 | "plus-sign", '+' | |
233 | }, | |
234 | { | |
235 | "comma", ',' | |
236 | }, | |
237 | { | |
238 | "hyphen", '-' | |
239 | }, | |
240 | { | |
241 | "hyphen-minus", '-' | |
242 | }, | |
243 | { | |
244 | "period", '.' | |
245 | }, | |
246 | { | |
247 | "full-stop", '.' | |
248 | }, | |
249 | { | |
250 | "slash", '/' | |
251 | }, | |
252 | { | |
253 | "solidus", '/' | |
254 | }, | |
255 | { | |
256 | "zero", '0' | |
257 | }, | |
258 | { | |
259 | "one", '1' | |
260 | }, | |
261 | { | |
262 | "two", '2' | |
263 | }, | |
264 | { | |
265 | "three", '3' | |
266 | }, | |
267 | { | |
268 | "four", '4' | |
269 | }, | |
270 | { | |
271 | "five", '5' | |
272 | }, | |
273 | { | |
274 | "six", '6' | |
275 | }, | |
276 | { | |
277 | "seven", '7' | |
278 | }, | |
279 | { | |
280 | "eight", '8' | |
281 | }, | |
282 | { | |
283 | "nine", '9' | |
284 | }, | |
285 | { | |
286 | "colon", ':' | |
287 | }, | |
288 | { | |
289 | "semicolon", ';' | |
290 | }, | |
291 | { | |
292 | "less-than-sign", '<' | |
293 | }, | |
294 | { | |
295 | "equals-sign", '=' | |
296 | }, | |
297 | { | |
298 | "greater-than-sign", '>' | |
299 | }, | |
300 | { | |
301 | "question-mark", '?' | |
302 | }, | |
303 | { | |
304 | "commercial-at", '@' | |
305 | }, | |
306 | { | |
307 | "left-square-bracket", '[' | |
308 | }, | |
309 | { | |
310 | "backslash", '\\' | |
311 | }, | |
312 | { | |
313 | "reverse-solidus", '\\' | |
314 | }, | |
315 | { | |
316 | "right-square-bracket", ']' | |
317 | }, | |
318 | { | |
319 | "circumflex", '^' | |
320 | }, | |
321 | { | |
322 | "circumflex-accent", '^' | |
323 | }, | |
324 | { | |
325 | "underscore", '_' | |
326 | }, | |
327 | { | |
328 | "low-line", '_' | |
329 | }, | |
330 | { | |
331 | "grave-accent", '`' | |
332 | }, | |
333 | { | |
334 | "left-brace", '{' | |
335 | }, | |
336 | { | |
337 | "left-curly-bracket", '{' | |
338 | }, | |
339 | { | |
340 | "vertical-line", '|' | |
341 | }, | |
342 | { | |
343 | "right-brace", '}' | |
344 | }, | |
345 | { | |
346 | "right-curly-bracket", '}' | |
347 | }, | |
348 | { | |
349 | "tilde", '~' | |
350 | }, | |
351 | { | |
352 | "DEL", '\177' | |
353 | }, | |
354 | { | |
355 | NULL, 0 | |
356 | } | |
357 | }; | |
358 | ||
359 | /* | |
360 | * some ctype functions with non-ascii-char guard | |
361 | */ | |
362 | static int | |
363 | wx_isdigit(wx_wchar c) | |
364 | { | |
365 | return (c >= 0 && c <= UCHAR_MAX && isdigit((unsigned char) c)); | |
366 | } | |
367 | ||
368 | static int | |
369 | wx_isalpha(wx_wchar c) | |
370 | { | |
371 | return (c >= 0 && c <= UCHAR_MAX && isalpha((unsigned char) c)); | |
372 | } | |
373 | ||
374 | static int | |
375 | wx_isalnum(wx_wchar c) | |
376 | { | |
377 | return (c >= 0 && c <= UCHAR_MAX && isalnum((unsigned char) c)); | |
378 | } | |
379 | ||
380 | static int | |
381 | wx_isupper(wx_wchar c) | |
382 | { | |
383 | return (c >= 0 && c <= UCHAR_MAX && isupper((unsigned char) c)); | |
384 | } | |
385 | ||
386 | static int | |
387 | wx_islower(wx_wchar c) | |
388 | { | |
389 | return (c >= 0 && c <= UCHAR_MAX && islower((unsigned char) c)); | |
390 | } | |
391 | ||
392 | static int | |
393 | wx_isgraph(wx_wchar c) | |
394 | { | |
395 | return (c >= 0 && c <= UCHAR_MAX && isgraph((unsigned char) c)); | |
396 | } | |
397 | ||
398 | static int | |
399 | wx_ispunct(wx_wchar c) | |
400 | { | |
401 | return (c >= 0 && c <= UCHAR_MAX && ispunct((unsigned char) c)); | |
402 | } | |
403 | ||
404 | static int | |
405 | wx_isspace(wx_wchar c) | |
406 | { | |
407 | return (c >= 0 && c <= UCHAR_MAX && isspace((unsigned char) c)); | |
408 | } | |
409 | ||
410 | static wx_wchar | |
411 | wx_toupper(wx_wchar c) | |
412 | { | |
413 | if (c >= 0 && c <= UCHAR_MAX) | |
414 | return toupper((unsigned char) c); | |
415 | return c; | |
416 | } | |
417 | ||
418 | static wx_wchar | |
419 | wx_tolower(wx_wchar c) | |
420 | { | |
421 | if (c >= 0 && c <= UCHAR_MAX) | |
422 | return tolower((unsigned char) c); | |
423 | return c; | |
424 | } | |
425 | ||
426 | ||
427 | /* | |
428 | * nmcces - how many distinct MCCEs are there? | |
429 | */ | |
430 | static int | |
431 | nmcces(struct vars * v) | |
432 | { | |
433 | /* | |
434 | * No multi-character collating elements defined at the moment. | |
435 | */ | |
436 | return 0; | |
437 | } | |
438 | ||
439 | /* | |
440 | * nleaders - how many chrs can be first chrs of MCCEs? | |
441 | */ | |
442 | static int | |
443 | nleaders(struct vars * v) | |
444 | { | |
445 | return 0; | |
446 | } | |
447 | ||
448 | /* | |
449 | * allmcces - return a cvec with all the MCCEs of the locale | |
450 | */ | |
451 | static struct cvec * | |
452 | allmcces(struct vars * v, /* context */ | |
453 | struct cvec * cv) /* this is supposed to have enough room */ | |
454 | { | |
455 | return clearcvec(cv); | |
456 | } | |
457 | ||
458 | /* | |
459 | * element - map collating-element name to celt | |
460 | */ | |
461 | static celt | |
462 | element(struct vars * v, /* context */ | |
463 | chr *startp, /* points to start of name */ | |
464 | chr *endp) /* points just past end of name */ | |
465 | { | |
466 | struct cname *cn; | |
467 | size_t len; | |
468 | ||
469 | /* generic: one-chr names stand for themselves */ | |
470 | assert(startp < endp); | |
471 | len = endp - startp; | |
472 | if (len == 1) | |
473 | return *startp; | |
474 | ||
475 | NOTE(REG_ULOCALE); | |
476 | ||
477 | /* search table */ | |
478 | for (cn = cnames; cn->name != NULL; cn++) | |
479 | { | |
480 | if (strlen(cn->name) == len && | |
481 | char_and_wchar_strncmp(cn->name, startp, len) == 0) | |
482 | { | |
483 | break; /* NOTE BREAK OUT */ | |
484 | } | |
485 | } | |
486 | if (cn->name != NULL) | |
487 | return CHR(cn->code); | |
488 | ||
489 | /* couldn't find it */ | |
490 | ERR(REG_ECOLLATE); | |
491 | return 0; | |
492 | } | |
493 | ||
494 | /* | |
495 | * range - supply cvec for a range, including legality check | |
496 | */ | |
497 | static struct cvec * | |
498 | range(struct vars * v, /* context */ | |
499 | celt a, /* range start */ | |
500 | celt b, /* range end, might equal a */ | |
501 | int cases) /* case-independent? */ | |
502 | { | |
503 | int nchrs; | |
504 | struct cvec *cv; | |
505 | celt c, | |
506 | lc, | |
507 | uc; | |
508 | ||
509 | if (a != b && !before(a, b)) | |
510 | { | |
511 | ERR(REG_ERANGE); | |
512 | return NULL; | |
513 | } | |
514 | ||
515 | if (!cases) | |
516 | { /* easy version */ | |
517 | cv = getcvec(v, 0, 1, 0); | |
518 | NOERRN(); | |
519 | addrange(cv, a, b); | |
520 | return cv; | |
521 | } | |
522 | ||
523 | /* | |
524 | * When case-independent, it's hard to decide when cvec ranges are | |
525 | * usable, so for now at least, we won't try. We allocate enough | |
526 | * space for two case variants plus a little extra for the two title | |
527 | * case variants. | |
528 | */ | |
529 | ||
530 | nchrs = (b - a + 1) * 2 + 4; | |
531 | ||
532 | cv = getcvec(v, nchrs, 0, 0); | |
533 | NOERRN(); | |
534 | ||
535 | for (c = a; c <= b; c++) | |
536 | { | |
537 | addchr(cv, c); | |
538 | lc = wx_tolower((chr) c); | |
539 | if (c != lc) | |
540 | addchr(cv, lc); | |
541 | uc = wx_toupper((chr) c); | |
542 | if (c != uc) | |
543 | addchr(cv, uc); | |
544 | } | |
545 | ||
546 | return cv; | |
547 | } | |
548 | ||
549 | /* | |
550 | * before - is celt x before celt y, for purposes of range legality? | |
551 | */ | |
552 | static int /* predicate */ | |
553 | before(celt x, celt y) | |
554 | { | |
555 | /* trivial because no MCCEs */ | |
556 | if (x < y) | |
557 | return 1; | |
558 | return 0; | |
559 | } | |
560 | ||
561 | /* | |
562 | * eclass - supply cvec for an equivalence class | |
563 | * Must include case counterparts on request. | |
564 | */ | |
565 | static struct cvec * | |
566 | eclass(struct vars * v, /* context */ | |
567 | celt c, /* Collating element representing the | |
568 | * equivalence class. */ | |
569 | int cases) /* all cases? */ | |
570 | { | |
571 | struct cvec *cv; | |
572 | ||
573 | /* crude fake equivalence class for testing */ | |
574 | if ((v->cflags & REG_FAKE) && c == 'x') | |
575 | { | |
576 | cv = getcvec(v, 4, 0, 0); | |
577 | addchr(cv, (chr) 'x'); | |
578 | addchr(cv, (chr) 'y'); | |
579 | if (cases) | |
580 | { | |
581 | addchr(cv, (chr) 'X'); | |
582 | addchr(cv, (chr) 'Y'); | |
583 | } | |
584 | return cv; | |
585 | } | |
586 | ||
587 | /* otherwise, none */ | |
588 | if (cases) | |
589 | return allcases(v, c); | |
590 | cv = getcvec(v, 1, 0, 0); | |
591 | assert(cv != NULL); | |
592 | addchr(cv, (chr) c); | |
593 | return cv; | |
594 | } | |
595 | ||
596 | /* | |
597 | * cclass - supply cvec for a character class | |
598 | * | |
599 | * Must include case counterparts on request. | |
600 | */ | |
601 | static struct cvec * | |
602 | cclass(struct vars * v, /* context */ | |
603 | chr *startp, /* where the name starts */ | |
604 | chr *endp, /* just past the end of the name */ | |
605 | int cases) /* case-independent? */ | |
606 | { | |
607 | size_t len; | |
608 | struct cvec *cv = NULL; | |
609 | char **namePtr; | |
610 | int i, | |
611 | index; | |
612 | ||
613 | /* | |
614 | * The following arrays define the valid character class names. | |
615 | */ | |
616 | ||
617 | static char *classNames[] = { | |
618 | "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph", | |
619 | "lower", "print", "punct", "space", "upper", "xdigit", NULL | |
620 | }; | |
621 | ||
622 | enum classes | |
623 | { | |
624 | CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH, | |
625 | CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT | |
626 | }; | |
627 | ||
628 | /* | |
629 | * Map the name to the corresponding enumerated value. | |
630 | */ | |
631 | len = endp - startp; | |
632 | index = -1; | |
633 | for (namePtr = classNames, i = 0; *namePtr != NULL; namePtr++, i++) | |
634 | { | |
635 | if (strlen(*namePtr) == len && | |
636 | char_and_wchar_strncmp(*namePtr, startp, len) == 0) | |
637 | { | |
638 | index = i; | |
639 | break; | |
640 | } | |
641 | } | |
642 | if (index == -1) | |
643 | { | |
644 | ERR(REG_ECTYPE); | |
645 | return NULL; | |
646 | } | |
647 | ||
648 | /* | |
649 | * Remap lower and upper to alpha if the match is case insensitive. | |
650 | */ | |
651 | ||
652 | if (cases && | |
653 | ((enum classes) index == CC_LOWER || | |
654 | (enum classes) index == CC_UPPER)) | |
655 | index = (int) CC_ALPHA; | |
656 | ||
657 | /* | |
658 | * Now compute the character class contents. | |
659 | * | |
660 | * For the moment, assume that only char codes < 256 can be in these | |
661 | * classes. | |
662 | */ | |
663 | ||
664 | switch ((enum classes) index) | |
665 | { | |
666 | case CC_PRINT: | |
667 | case CC_ALNUM: | |
668 | cv = getcvec(v, UCHAR_MAX, 1, 0); | |
669 | if (cv) | |
670 | { | |
671 | for (i = 0; i <= UCHAR_MAX; i++) | |
672 | { | |
673 | if (wx_isalpha((chr) i)) | |
674 | addchr(cv, (chr) i); | |
675 | } | |
676 | addrange(cv, (chr) '0', (chr) '9'); | |
677 | } | |
678 | break; | |
679 | case CC_ALPHA: | |
680 | cv = getcvec(v, UCHAR_MAX, 0, 0); | |
681 | if (cv) | |
682 | { | |
683 | for (i = 0; i <= UCHAR_MAX; i++) | |
684 | { | |
685 | if (wx_isalpha((chr) i)) | |
686 | addchr(cv, (chr) i); | |
687 | } | |
688 | } | |
689 | break; | |
690 | case CC_ASCII: | |
691 | cv = getcvec(v, 0, 1, 0); | |
692 | if (cv) | |
693 | addrange(cv, 0, 0x7f); | |
694 | break; | |
695 | case CC_BLANK: | |
696 | cv = getcvec(v, 2, 0, 0); | |
697 | addchr(cv, '\t'); | |
698 | addchr(cv, ' '); | |
699 | break; | |
700 | case CC_CNTRL: | |
701 | cv = getcvec(v, 0, 2, 0); | |
702 | addrange(cv, 0x0, 0x1f); | |
703 | addrange(cv, 0x7f, 0x9f); | |
704 | break; | |
705 | case CC_DIGIT: | |
706 | cv = getcvec(v, 0, 1, 0); | |
707 | if (cv) | |
708 | addrange(cv, (chr) '0', (chr) '9'); | |
709 | break; | |
710 | case CC_PUNCT: | |
711 | cv = getcvec(v, UCHAR_MAX, 0, 0); | |
712 | if (cv) | |
713 | { | |
714 | for (i = 0; i <= UCHAR_MAX; i++) | |
715 | { | |
716 | if (wx_ispunct((chr) i)) | |
717 | addchr(cv, (chr) i); | |
718 | } | |
719 | } | |
720 | break; | |
721 | case CC_XDIGIT: | |
722 | cv = getcvec(v, 0, 3, 0); | |
723 | if (cv) | |
724 | { | |
725 | addrange(cv, '0', '9'); | |
726 | addrange(cv, 'a', 'f'); | |
727 | addrange(cv, 'A', 'F'); | |
728 | } | |
729 | break; | |
730 | case CC_SPACE: | |
731 | cv = getcvec(v, UCHAR_MAX, 0, 0); | |
732 | if (cv) | |
733 | { | |
734 | for (i = 0; i <= UCHAR_MAX; i++) | |
735 | { | |
736 | if (wx_isspace((chr) i)) | |
737 | addchr(cv, (chr) i); | |
738 | } | |
739 | } | |
740 | break; | |
741 | case CC_LOWER: | |
742 | cv = getcvec(v, UCHAR_MAX, 0, 0); | |
743 | if (cv) | |
744 | { | |
745 | for (i = 0; i <= UCHAR_MAX; i++) | |
746 | { | |
747 | if (wx_islower((chr) i)) | |
748 | addchr(cv, (chr) i); | |
749 | } | |
750 | } | |
751 | break; | |
752 | case CC_UPPER: | |
753 | cv = getcvec(v, UCHAR_MAX, 0, 0); | |
754 | if (cv) | |
755 | { | |
756 | for (i = 0; i <= UCHAR_MAX; i++) | |
757 | { | |
758 | if (wx_isupper((chr) i)) | |
759 | addchr(cv, (chr) i); | |
760 | } | |
761 | } | |
762 | break; | |
763 | case CC_GRAPH: | |
764 | cv = getcvec(v, UCHAR_MAX, 0, 0); | |
765 | if (cv) | |
766 | { | |
767 | for (i = 0; i <= UCHAR_MAX; i++) | |
768 | { | |
769 | if (wx_isgraph((chr) i)) | |
770 | addchr(cv, (chr) i); | |
771 | } | |
772 | } | |
773 | break; | |
774 | } | |
775 | if (cv == NULL) | |
776 | ERR(REG_ESPACE); | |
777 | return cv; | |
778 | } | |
779 | ||
780 | /* | |
781 | * allcases - supply cvec for all case counterparts of a chr (including itself) | |
782 | * | |
783 | * This is a shortcut, preferably an efficient one, for simple characters; | |
784 | * messy cases are done via range(). | |
785 | */ | |
786 | static struct cvec * | |
787 | allcases(struct vars * v, /* context */ | |
788 | chr pc) /* character to get case equivs of */ | |
789 | { | |
790 | struct cvec *cv; | |
791 | chr c = (chr) pc; | |
792 | chr lc, | |
793 | uc; | |
794 | ||
795 | lc = wx_tolower((chr) c); | |
796 | uc = wx_toupper((chr) c); | |
797 | ||
798 | cv = getcvec(v, 2, 0, 0); | |
799 | addchr(cv, lc); | |
800 | if (lc != uc) | |
801 | addchr(cv, uc); | |
802 | return cv; | |
803 | } | |
804 | ||
805 | /* | |
806 | * cmp - chr-substring compare | |
807 | * | |
808 | * Backrefs need this. It should preferably be efficient. | |
809 | * Note that it does not need to report anything except equal/unequal. | |
810 | * Note also that the length is exact, and the comparison should not | |
811 | * stop at embedded NULs! | |
812 | */ | |
813 | static int /* 0 for equal, nonzero for unequal */ | |
814 | cmp(const chr *x, const chr *y, /* strings to compare */ | |
815 | size_t len) /* exact length of comparison */ | |
816 | { | |
817 | return memcmp(VS(x), VS(y), len * sizeof(chr)); | |
818 | } | |
819 | ||
820 | /* | |
821 | * casecmp - case-independent chr-substring compare | |
822 | * | |
823 | * REG_ICASE backrefs need this. It should preferably be efficient. | |
824 | * Note that it does not need to report anything except equal/unequal. | |
825 | * Note also that the length is exact, and the comparison should not | |
826 | * stop at embedded NULs! | |
827 | */ | |
828 | static int /* 0 for equal, nonzero for unequal */ | |
829 | casecmp(const chr *x, const chr *y, /* strings to compare */ | |
830 | size_t len) /* exact length of comparison */ | |
831 | { | |
832 | for (; len > 0; len--, x++, y++) | |
833 | { | |
834 | if ((*x != *y) && (wx_tolower(*x) != wx_tolower(*y))) | |
835 | return 1; | |
836 | } | |
837 | return 0; | |
838 | } |