]>
Commit | Line | Data |
---|---|---|
3ca4086b | 1 | /* |
830efc9b RN |
2 | * regc_locale.c -- |
3 | * | |
3ca4086b | 4 | * This file contains the Unicode locale specific regexp routines. |
830efc9b RN |
5 | * This file is #included by regcomp.c. |
6 | * | |
7 | * Copyright (c) 1998 by Scriptics Corporation. | |
8 | * | |
3ca4086b VS |
9 | * See the file "license.terms" for information on usage and redistribution |
10 | * of this file, and for a DISCLAIMER OF ALL WARRANTIES. | |
830efc9b | 11 | * |
3ca4086b | 12 | * RCS: @(#) $Id$ |
1701c4d4 | 13 | * |
77ffb593 | 14 | * wxWidgets: |
1701c4d4 | 15 | * The Scriptics license can be found in the file COPYRIGHT. Modifications |
77ffb593 | 16 | * for wxWidgets are under the wxWidgets licence, see README for details. |
830efc9b RN |
17 | */ |
18 | ||
1701c4d4 VS |
19 | #define CONST const |
20 | ||
3ca4086b | 21 | /* ASCII character-name table */ |
830efc9b | 22 | |
3ca4086b | 23 | static struct cname { |
1701c4d4 | 24 | chr *name; |
3ca4086b VS |
25 | char code; |
26 | } cnames[] = { | |
1701c4d4 VS |
27 | {_T("NUL"), '\0'}, |
28 | {_T("SOH"), '\001'}, | |
29 | {_T("STX"), '\002'}, | |
30 | {_T("ETX"), '\003'}, | |
31 | {_T("EOT"), '\004'}, | |
32 | {_T("ENQ"), '\005'}, | |
33 | {_T("ACK"), '\006'}, | |
34 | {_T("BEL"), '\007'}, | |
35 | {_T("alert"), '\007'}, | |
36 | {_T("BS"), '\010'}, | |
37 | {_T("backspace"), '\b'}, | |
38 | {_T("HT"), '\011'}, | |
39 | {_T("tab"), '\t'}, | |
40 | {_T("LF"), '\012'}, | |
41 | {_T("newline"), '\n'}, | |
42 | {_T("VT"), '\013'}, | |
43 | {_T("vertical-tab"), '\v'}, | |
44 | {_T("FF"), '\014'}, | |
45 | {_T("form-feed"), '\f'}, | |
46 | {_T("CR"), '\015'}, | |
47 | {_T("carriage-return"), '\r'}, | |
48 | {_T("SO"), '\016'}, | |
49 | {_T("SI"), '\017'}, | |
50 | {_T("DLE"), '\020'}, | |
51 | {_T("DC1"), '\021'}, | |
52 | {_T("DC2"), '\022'}, | |
53 | {_T("DC3"), '\023'}, | |
54 | {_T("DC4"), '\024'}, | |
55 | {_T("NAK"), '\025'}, | |
56 | {_T("SYN"), '\026'}, | |
57 | {_T("ETB"), '\027'}, | |
58 | {_T("CAN"), '\030'}, | |
59 | {_T("EM"), '\031'}, | |
60 | {_T("SUB"), '\032'}, | |
61 | {_T("ESC"), '\033'}, | |
62 | {_T("IS4"), '\034'}, | |
63 | {_T("FS"), '\034'}, | |
64 | {_T("IS3"), '\035'}, | |
65 | {_T("GS"), '\035'}, | |
66 | {_T("IS2"), '\036'}, | |
67 | {_T("RS"), '\036'}, | |
68 | {_T("IS1"), '\037'}, | |
69 | {_T("US"), '\037'}, | |
70 | {_T("space"), ' '}, | |
71 | {_T("exclamation-mark"), '!'}, | |
72 | {_T("quotation-mark"), '"'}, | |
73 | {_T("number-sign"), '#'}, | |
74 | {_T("dollar-sign"), '$'}, | |
75 | {_T("percent-sign"), '%'}, | |
76 | {_T("ampersand"), '&'}, | |
77 | {_T("apostrophe"), '\''}, | |
78 | {_T("left-parenthesis"), '('}, | |
79 | {_T("right-parenthesis"), ')'}, | |
80 | {_T("asterisk"), '*'}, | |
81 | {_T("plus-sign"), '+'}, | |
82 | {_T("comma"), ','}, | |
83 | {_T("hyphen"), '-'}, | |
84 | {_T("hyphen-minus"), '-'}, | |
85 | {_T("period"), '.'}, | |
86 | {_T("full-stop"), '.'}, | |
87 | {_T("slash"), '/'}, | |
88 | {_T("solidus"), '/'}, | |
89 | {_T("zero"), '0'}, | |
90 | {_T("one"), '1'}, | |
91 | {_T("two"), '2'}, | |
92 | {_T("three"), '3'}, | |
93 | {_T("four"), '4'}, | |
94 | {_T("five"), '5'}, | |
95 | {_T("six"), '6'}, | |
96 | {_T("seven"), '7'}, | |
97 | {_T("eight"), '8'}, | |
98 | {_T("nine"), '9'}, | |
99 | {_T("colon"), ':'}, | |
100 | {_T("semicolon"), ';'}, | |
101 | {_T("less-than-sign"), '<'}, | |
102 | {_T("equals-sign"), '='}, | |
103 | {_T("greater-than-sign"), '>'}, | |
104 | {_T("question-mark"), '?'}, | |
105 | {_T("commercial-at"), '@'}, | |
106 | {_T("left-square-bracket"), '['}, | |
107 | {_T("backslash"), '\\'}, | |
108 | {_T("reverse-solidus"), '\\'}, | |
109 | {_T("right-square-bracket"), ']'}, | |
110 | {_T("circumflex"), '^'}, | |
111 | {_T("circumflex-accent"), '^'}, | |
112 | {_T("underscore"), '_'}, | |
113 | {_T("low-line"), '_'}, | |
114 | {_T("grave-accent"), '`'}, | |
115 | {_T("left-brace"), '{'}, | |
116 | {_T("left-curly-bracket"), '{'}, | |
117 | {_T("vertical-line"), '|'}, | |
118 | {_T("right-brace"), '}'}, | |
119 | {_T("right-curly-bracket"), '}'}, | |
120 | {_T("tilde"), '~'}, | |
121 | {_T("DEL"), '\177'}, | |
122 | {NULL, 0} | |
3ca4086b | 123 | }; |
c5feba0e | 124 | |
1701c4d4 VS |
125 | #if wxUSE_UNICODE |
126 | ||
3ca4086b | 127 | /* Unicode character-class tables */ |
c5feba0e | 128 | |
3ca4086b VS |
129 | typedef struct crange { |
130 | chr start; | |
131 | chr end; | |
132 | } crange; | |
c5feba0e | 133 | |
3ca4086b VS |
134 | /* |
135 | * Declarations of Unicode character ranges. This code | |
136 | * is automatically generated by the tools/uniClass.tcl script | |
137 | * and used in generic/regc_locale.c. Do not modify by hand. | |
138 | */ | |
830efc9b | 139 | |
3ca4086b VS |
140 | /* Unicode: alphabetic characters */ |
141 | ||
142 | static crange alphaRangeTable[] = { | |
143 | {0x0041, 0x005a}, {0x0061, 0x007a}, {0x00c0, 0x00d6}, {0x00d8, 0x00f6}, | |
144 | {0x00f8, 0x021f}, {0x0222, 0x0233}, {0x0250, 0x02ad}, {0x02b0, 0x02b8}, | |
145 | {0x02bb, 0x02c1}, {0x02e0, 0x02e4}, {0x0388, 0x038a}, {0x038e, 0x03a1}, | |
146 | {0x03a3, 0x03ce}, {0x03d0, 0x03d7}, {0x03da, 0x03f5}, {0x0400, 0x0481}, | |
147 | {0x048c, 0x04c4}, {0x04d0, 0x04f5}, {0x0531, 0x0556}, {0x0561, 0x0587}, | |
148 | {0x05d0, 0x05ea}, {0x05f0, 0x05f2}, {0x0621, 0x063a}, {0x0640, 0x064a}, | |
149 | {0x0671, 0x06d3}, {0x06fa, 0x06fc}, {0x0712, 0x072c}, {0x0780, 0x07a5}, | |
150 | {0x0905, 0x0939}, {0x0958, 0x0961}, {0x0985, 0x098c}, {0x0993, 0x09a8}, | |
151 | {0x09aa, 0x09b0}, {0x09b6, 0x09b9}, {0x09df, 0x09e1}, {0x0a05, 0x0a0a}, | |
152 | {0x0a13, 0x0a28}, {0x0a2a, 0x0a30}, {0x0a59, 0x0a5c}, {0x0a72, 0x0a74}, | |
153 | {0x0a85, 0x0a8b}, {0x0a8f, 0x0a91}, {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0}, | |
154 | {0x0ab5, 0x0ab9}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b28}, {0x0b2a, 0x0b30}, | |
155 | {0x0b36, 0x0b39}, {0x0b5f, 0x0b61}, {0x0b85, 0x0b8a}, {0x0b8e, 0x0b90}, | |
156 | {0x0b92, 0x0b95}, {0x0ba8, 0x0baa}, {0x0bae, 0x0bb5}, {0x0bb7, 0x0bb9}, | |
157 | {0x0c05, 0x0c0c}, {0x0c0e, 0x0c10}, {0x0c12, 0x0c28}, {0x0c2a, 0x0c33}, | |
158 | {0x0c35, 0x0c39}, {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90}, {0x0c92, 0x0ca8}, | |
159 | {0x0caa, 0x0cb3}, {0x0cb5, 0x0cb9}, {0x0d05, 0x0d0c}, {0x0d0e, 0x0d10}, | |
160 | {0x0d12, 0x0d28}, {0x0d2a, 0x0d39}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1}, | |
161 | {0x0db3, 0x0dbb}, {0x0dc0, 0x0dc6}, {0x0e01, 0x0e30}, {0x0e40, 0x0e46}, | |
162 | {0x0e94, 0x0e97}, {0x0e99, 0x0e9f}, {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb0}, | |
163 | {0x0ec0, 0x0ec4}, {0x0f40, 0x0f47}, {0x0f49, 0x0f6a}, {0x0f88, 0x0f8b}, | |
164 | {0x1000, 0x1021}, {0x1023, 0x1027}, {0x1050, 0x1055}, {0x10a0, 0x10c5}, | |
165 | {0x10d0, 0x10f6}, {0x1100, 0x1159}, {0x115f, 0x11a2}, {0x11a8, 0x11f9}, | |
166 | {0x1200, 0x1206}, {0x1208, 0x1246}, {0x124a, 0x124d}, {0x1250, 0x1256}, | |
167 | {0x125a, 0x125d}, {0x1260, 0x1286}, {0x128a, 0x128d}, {0x1290, 0x12ae}, | |
168 | {0x12b2, 0x12b5}, {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12ce}, | |
169 | {0x12d0, 0x12d6}, {0x12d8, 0x12ee}, {0x12f0, 0x130e}, {0x1312, 0x1315}, | |
170 | {0x1318, 0x131e}, {0x1320, 0x1346}, {0x1348, 0x135a}, {0x13a0, 0x13f4}, | |
171 | {0x1401, 0x166c}, {0x166f, 0x1676}, {0x1681, 0x169a}, {0x16a0, 0x16ea}, | |
172 | {0x1780, 0x17b3}, {0x1820, 0x1877}, {0x1880, 0x18a8}, {0x1e00, 0x1e9b}, | |
173 | {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45}, | |
174 | {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, | |
175 | {0x1fb6, 0x1fbc}, {0x1fc2, 0x1fc4}, {0x1fc6, 0x1fcc}, {0x1fd0, 0x1fd3}, | |
176 | {0x1fd6, 0x1fdb}, {0x1fe0, 0x1fec}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffc}, | |
177 | {0x210a, 0x2113}, {0x2119, 0x211d}, {0x212a, 0x212d}, {0x212f, 0x2131}, | |
178 | {0x2133, 0x2139}, {0x3031, 0x3035}, {0x3041, 0x3094}, {0x30a1, 0x30fa}, | |
179 | {0x30fc, 0x30fe}, {0x3105, 0x312c}, {0x3131, 0x318e}, {0x31a0, 0x31b7}, | |
180 | {0x3400, 0x4db5}, {0x4e00, 0x9fa5}, {0xa000, 0xa48c}, {0xac00, 0xd7a3}, | |
181 | {0xf900, 0xfa2d}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1f, 0xfb28}, | |
182 | {0xfb2a, 0xfb36}, {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfd3d}, | |
183 | {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb}, {0xfe70, 0xfe72}, | |
184 | {0xfe76, 0xfefc}, {0xff21, 0xff3a}, {0xff41, 0xff5a}, {0xff66, 0xffbe}, | |
185 | {0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7}, {0xffda, 0xffdc} | |
186 | }; | |
830efc9b | 187 | |
3ca4086b VS |
188 | #define NUM_ALPHA_RANGE (sizeof(alphaRangeTable)/sizeof(crange)) |
189 | ||
190 | static chr alphaCharTable[] = { | |
191 | 0x00aa, 0x00b5, 0x00ba, 0x02d0, 0x02d1, 0x02ee, 0x037a, 0x0386, 0x038c, | |
192 | 0x04c7, 0x04c8, 0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0559, 0x06d5, 0x06e5, | |
193 | 0x06e6, 0x0710, 0x093d, 0x0950, 0x098f, 0x0990, 0x09b2, 0x09dc, 0x09dd, | |
194 | 0x09f0, 0x09f1, 0x0a0f, 0x0a10, 0x0a32, 0x0a33, 0x0a35, 0x0a36, 0x0a38, | |
195 | 0x0a39, 0x0a5e, 0x0a8d, 0x0ab2, 0x0ab3, 0x0abd, 0x0ad0, 0x0ae0, 0x0b0f, | |
196 | 0x0b10, 0x0b32, 0x0b33, 0x0b3d, 0x0b5c, 0x0b5d, 0x0b99, 0x0b9a, 0x0b9c, | |
197 | 0x0b9e, 0x0b9f, 0x0ba3, 0x0ba4, 0x0c60, 0x0c61, 0x0cde, 0x0ce0, 0x0ce1, | |
198 | 0x0d60, 0x0d61, 0x0dbd, 0x0e32, 0x0e33, 0x0e81, 0x0e82, 0x0e84, 0x0e87, | |
199 | 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0eb2, 0x0eb3, | |
200 | 0x0ebd, 0x0ec6, 0x0edc, 0x0edd, 0x0f00, 0x1029, 0x102a, 0x1248, 0x1258, | |
201 | 0x1288, 0x12b0, 0x12c0, 0x1310, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe, 0x207f, | |
202 | 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x3005, 0x3006, 0x309d, | |
203 | 0x309e, 0xfb1d, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfe74, 0xfffe | |
204 | }; | |
205 | ||
206 | #define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr)) | |
207 | ||
208 | /* Unicode: decimal digit characters */ | |
209 | ||
210 | static crange digitRangeTable[] = { | |
211 | {0x0030, 0x0039}, {0x0660, 0x0669}, {0x06f0, 0x06f9}, {0x0966, 0x096f}, | |
212 | {0x09e6, 0x09ef}, {0x0a66, 0x0a6f}, {0x0ae6, 0x0aef}, {0x0b66, 0x0b6f}, | |
213 | {0x0be7, 0x0bef}, {0x0c66, 0x0c6f}, {0x0ce6, 0x0cef}, {0x0d66, 0x0d6f}, | |
214 | {0x0e50, 0x0e59}, {0x0ed0, 0x0ed9}, {0x0f20, 0x0f29}, {0x1040, 0x1049}, | |
215 | {0x1369, 0x1371}, {0x17e0, 0x17e9}, {0x1810, 0x1819}, {0xff10, 0xff19} | |
216 | }; | |
217 | ||
218 | #define NUM_DIGIT_RANGE (sizeof(digitRangeTable)/sizeof(crange)) | |
219 | ||
220 | /* no singletons of digit characters */ | |
221 | ||
222 | /* Unicode: punctuation characters */ | |
223 | ||
224 | static crange punctRangeTable[] = { | |
225 | {0x0021, 0x0023}, {0x0025, 0x002a}, {0x002c, 0x002f}, {0x005b, 0x005d}, | |
226 | {0x055a, 0x055f}, {0x066a, 0x066d}, {0x0700, 0x070d}, {0x0f04, 0x0f12}, | |
227 | {0x0f3a, 0x0f3d}, {0x104a, 0x104f}, {0x1361, 0x1368}, {0x16eb, 0x16ed}, | |
228 | {0x17d4, 0x17da}, {0x1800, 0x180a}, {0x2010, 0x2027}, {0x2030, 0x2043}, | |
229 | {0x2048, 0x204d}, {0x3001, 0x3003}, {0x3008, 0x3011}, {0x3014, 0x301f}, | |
230 | {0xfe30, 0xfe44}, {0xfe49, 0xfe52}, {0xfe54, 0xfe61}, {0xff01, 0xff03}, | |
231 | {0xff05, 0xff0a}, {0xff0c, 0xff0f}, {0xff3b, 0xff3d}, {0xff61, 0xff65} | |
232 | }; | |
233 | ||
234 | #define NUM_PUNCT_RANGE (sizeof(punctRangeTable)/sizeof(crange)) | |
235 | ||
236 | static chr punctCharTable[] = { | |
237 | 0x003a, 0x003b, 0x003f, 0x0040, 0x005f, 0x007b, 0x007d, 0x00a1, 0x00ab, | |
238 | 0x00ad, 0x00b7, 0x00bb, 0x00bf, 0x037e, 0x0387, 0x0589, 0x058a, 0x05be, | |
239 | 0x05c0, 0x05c3, 0x05f3, 0x05f4, 0x060c, 0x061b, 0x061f, 0x06d4, 0x0964, | |
240 | 0x0965, 0x0970, 0x0df4, 0x0e4f, 0x0e5a, 0x0e5b, 0x0f85, 0x10fb, 0x166d, | |
241 | 0x166e, 0x169b, 0x169c, 0x17dc, 0x2045, 0x2046, 0x207d, 0x207e, 0x208d, | |
242 | 0x208e, 0x2329, 0x232a, 0x3030, 0x30fb, 0xfd3e, 0xfd3f, 0xfe63, 0xfe68, | |
243 | 0xfe6a, 0xfe6b, 0xff1a, 0xff1b, 0xff1f, 0xff20, 0xff3f, 0xff5b, 0xff5d | |
244 | }; | |
245 | ||
246 | #define NUM_PUNCT_CHAR (sizeof(punctCharTable)/sizeof(chr)) | |
247 | ||
248 | /* Unicode: white space characters */ | |
249 | ||
250 | static crange spaceRangeTable[] = { | |
251 | {0x0009, 0x000d}, {0x2000, 0x200b} | |
252 | }; | |
253 | ||
254 | #define NUM_SPACE_RANGE (sizeof(spaceRangeTable)/sizeof(crange)) | |
255 | ||
256 | static chr spaceCharTable[] = { | |
257 | 0x0020, 0x00a0, 0x1680, 0x2028, 0x2029, 0x202f, 0x3000 | |
258 | }; | |
259 | ||
260 | #define NUM_SPACE_CHAR (sizeof(spaceCharTable)/sizeof(chr)) | |
261 | ||
262 | /* Unicode: lowercase characters */ | |
263 | ||
264 | static crange lowerRangeTable[] = { | |
265 | {0x0061, 0x007a}, {0x00df, 0x00f6}, {0x00f8, 0x00ff}, {0x017e, 0x0180}, | |
266 | {0x0199, 0x019b}, {0x01bd, 0x01bf}, {0x0250, 0x02ad}, {0x03ac, 0x03ce}, | |
267 | {0x03d5, 0x03d7}, {0x03ef, 0x03f3}, {0x0430, 0x045f}, {0x0561, 0x0587}, | |
268 | {0x1e95, 0x1e9b}, {0x1f00, 0x1f07}, {0x1f10, 0x1f15}, {0x1f20, 0x1f27}, | |
269 | {0x1f30, 0x1f37}, {0x1f40, 0x1f45}, {0x1f50, 0x1f57}, {0x1f60, 0x1f67}, | |
270 | {0x1f70, 0x1f7d}, {0x1f80, 0x1f87}, {0x1f90, 0x1f97}, {0x1fa0, 0x1fa7}, | |
271 | {0x1fb0, 0x1fb4}, {0x1fc2, 0x1fc4}, {0x1fd0, 0x1fd3}, {0x1fe0, 0x1fe7}, | |
272 | {0x1ff2, 0x1ff4}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xff41, 0xff5a} | |
273 | }; | |
274 | ||
275 | #define NUM_LOWER_RANGE (sizeof(lowerRangeTable)/sizeof(crange)) | |
276 | ||
277 | static chr lowerCharTable[] = { | |
278 | 0x00aa, 0x00b5, 0x00ba, 0x0101, 0x0103, 0x0105, 0x0107, 0x0109, 0x010b, | |
279 | 0x010d, 0x010f, 0x0111, 0x0113, 0x0115, 0x0117, 0x0119, 0x011b, 0x011d, | |
280 | 0x011f, 0x0121, 0x0123, 0x0125, 0x0127, 0x0129, 0x012b, 0x012d, 0x012f, | |
281 | 0x0131, 0x0133, 0x0135, 0x0137, 0x0138, 0x013a, 0x013c, 0x013e, 0x0140, | |
282 | 0x0142, 0x0144, 0x0146, 0x0148, 0x0149, 0x014b, 0x014d, 0x014f, 0x0151, | |
283 | 0x0153, 0x0155, 0x0157, 0x0159, 0x015b, 0x015d, 0x015f, 0x0161, 0x0163, | |
284 | 0x0165, 0x0167, 0x0169, 0x016b, 0x016d, 0x016f, 0x0171, 0x0173, 0x0175, | |
285 | 0x0177, 0x017a, 0x017c, 0x0183, 0x0185, 0x0188, 0x018c, 0x018d, 0x0192, | |
286 | 0x0195, 0x019e, 0x01a1, 0x01a3, 0x01a5, 0x01a8, 0x01aa, 0x01ab, 0x01ad, | |
287 | 0x01b0, 0x01b4, 0x01b6, 0x01b9, 0x01ba, 0x01c6, 0x01c9, 0x01cc, 0x01ce, | |
288 | 0x01d0, 0x01d2, 0x01d4, 0x01d6, 0x01d8, 0x01da, 0x01dc, 0x01dd, 0x01df, | |
289 | 0x01e1, 0x01e3, 0x01e5, 0x01e7, 0x01e9, 0x01eb, 0x01ed, 0x01ef, 0x01f0, | |
290 | 0x01f3, 0x01f5, 0x01f9, 0x01fb, 0x01fd, 0x01ff, 0x0201, 0x0203, 0x0205, | |
291 | 0x0207, 0x0209, 0x020b, 0x020d, 0x020f, 0x0211, 0x0213, 0x0215, 0x0217, | |
292 | 0x0219, 0x021b, 0x021d, 0x021f, 0x0223, 0x0225, 0x0227, 0x0229, 0x022b, | |
293 | 0x022d, 0x022f, 0x0231, 0x0233, 0x0390, 0x03d0, 0x03d1, 0x03db, 0x03dd, | |
294 | 0x03df, 0x03e1, 0x03e3, 0x03e5, 0x03e7, 0x03e9, 0x03eb, 0x03ed, 0x03f5, | |
295 | 0x0461, 0x0463, 0x0465, 0x0467, 0x0469, 0x046b, 0x046d, 0x046f, 0x0471, | |
296 | 0x0473, 0x0475, 0x0477, 0x0479, 0x047b, 0x047d, 0x047f, 0x0481, 0x048d, | |
297 | 0x048f, 0x0491, 0x0493, 0x0495, 0x0497, 0x0499, 0x049b, 0x049d, 0x049f, | |
298 | 0x04a1, 0x04a3, 0x04a5, 0x04a7, 0x04a9, 0x04ab, 0x04ad, 0x04af, 0x04b1, | |
299 | 0x04b3, 0x04b5, 0x04b7, 0x04b9, 0x04bb, 0x04bd, 0x04bf, 0x04c2, 0x04c4, | |
300 | 0x04c8, 0x04cc, 0x04d1, 0x04d3, 0x04d5, 0x04d7, 0x04d9, 0x04db, 0x04dd, | |
301 | 0x04df, 0x04e1, 0x04e3, 0x04e5, 0x04e7, 0x04e9, 0x04eb, 0x04ed, 0x04ef, | |
302 | 0x04f1, 0x04f3, 0x04f5, 0x04f9, 0x1e01, 0x1e03, 0x1e05, 0x1e07, 0x1e09, | |
303 | 0x1e0b, 0x1e0d, 0x1e0f, 0x1e11, 0x1e13, 0x1e15, 0x1e17, 0x1e19, 0x1e1b, | |
304 | 0x1e1d, 0x1e1f, 0x1e21, 0x1e23, 0x1e25, 0x1e27, 0x1e29, 0x1e2b, 0x1e2d, | |
305 | 0x1e2f, 0x1e31, 0x1e33, 0x1e35, 0x1e37, 0x1e39, 0x1e3b, 0x1e3d, 0x1e3f, | |
306 | 0x1e41, 0x1e43, 0x1e45, 0x1e47, 0x1e49, 0x1e4b, 0x1e4d, 0x1e4f, 0x1e51, | |
307 | 0x1e53, 0x1e55, 0x1e57, 0x1e59, 0x1e5b, 0x1e5d, 0x1e5f, 0x1e61, 0x1e63, | |
308 | 0x1e65, 0x1e67, 0x1e69, 0x1e6b, 0x1e6d, 0x1e6f, 0x1e71, 0x1e73, 0x1e75, | |
309 | 0x1e77, 0x1e79, 0x1e7b, 0x1e7d, 0x1e7f, 0x1e81, 0x1e83, 0x1e85, 0x1e87, | |
310 | 0x1e89, 0x1e8b, 0x1e8d, 0x1e8f, 0x1e91, 0x1e93, 0x1ea1, 0x1ea3, 0x1ea5, | |
311 | 0x1ea7, 0x1ea9, 0x1eab, 0x1ead, 0x1eaf, 0x1eb1, 0x1eb3, 0x1eb5, 0x1eb7, | |
312 | 0x1eb9, 0x1ebb, 0x1ebd, 0x1ebf, 0x1ec1, 0x1ec3, 0x1ec5, 0x1ec7, 0x1ec9, | |
313 | 0x1ecb, 0x1ecd, 0x1ecf, 0x1ed1, 0x1ed3, 0x1ed5, 0x1ed7, 0x1ed9, 0x1edb, | |
314 | 0x1edd, 0x1edf, 0x1ee1, 0x1ee3, 0x1ee5, 0x1ee7, 0x1ee9, 0x1eeb, 0x1eed, | |
315 | 0x1eef, 0x1ef1, 0x1ef3, 0x1ef5, 0x1ef7, 0x1ef9, 0x1fb6, 0x1fb7, 0x1fbe, | |
316 | 0x1fc6, 0x1fc7, 0x1fd6, 0x1fd7, 0x1ff6, 0x1ff7, 0x207f, 0x210a, 0x210e, | |
317 | 0x210f, 0x2113, 0x212f, 0x2134, 0x2139 | |
318 | }; | |
319 | ||
320 | #define NUM_LOWER_CHAR (sizeof(lowerCharTable)/sizeof(chr)) | |
321 | ||
322 | /* Unicode: uppercase characters */ | |
323 | ||
324 | static crange upperRangeTable[] = { | |
325 | {0x0041, 0x005a}, {0x00c0, 0x00d6}, {0x00d8, 0x00de}, {0x0189, 0x018b}, | |
326 | {0x018e, 0x0191}, {0x0196, 0x0198}, {0x01b1, 0x01b3}, {0x01f6, 0x01f8}, | |
327 | {0x0388, 0x038a}, {0x0391, 0x03a1}, {0x03a3, 0x03ab}, {0x03d2, 0x03d4}, | |
328 | {0x0400, 0x042f}, {0x0531, 0x0556}, {0x10a0, 0x10c5}, {0x1f08, 0x1f0f}, | |
329 | {0x1f18, 0x1f1d}, {0x1f28, 0x1f2f}, {0x1f38, 0x1f3f}, {0x1f48, 0x1f4d}, | |
330 | {0x1f68, 0x1f6f}, {0x1fb8, 0x1fbb}, {0x1fc8, 0x1fcb}, {0x1fd8, 0x1fdb}, | |
331 | {0x1fe8, 0x1fec}, {0x1ff8, 0x1ffb}, {0x210b, 0x210d}, {0x2110, 0x2112}, | |
332 | {0x2119, 0x211d}, {0x212a, 0x212d}, {0xff21, 0xff3a} | |
333 | }; | |
334 | ||
335 | #define NUM_UPPER_RANGE (sizeof(upperRangeTable)/sizeof(crange)) | |
336 | ||
337 | static chr upperCharTable[] = { | |
338 | 0x0100, 0x0102, 0x0104, 0x0106, 0x0108, 0x010a, 0x010c, 0x010e, 0x0110, | |
339 | 0x0112, 0x0114, 0x0116, 0x0118, 0x011a, 0x011c, 0x011e, 0x0120, 0x0122, | |
340 | 0x0124, 0x0126, 0x0128, 0x012a, 0x012c, 0x012e, 0x0130, 0x0132, 0x0134, | |
341 | 0x0136, 0x0139, 0x013b, 0x013d, 0x013f, 0x0141, 0x0143, 0x0145, 0x0147, | |
342 | 0x014a, 0x014c, 0x014e, 0x0150, 0x0152, 0x0154, 0x0156, 0x0158, 0x015a, | |
343 | 0x015c, 0x015e, 0x0160, 0x0162, 0x0164, 0x0166, 0x0168, 0x016a, 0x016c, | |
344 | 0x016e, 0x0170, 0x0172, 0x0174, 0x0176, 0x0178, 0x0179, 0x017b, 0x017d, | |
345 | 0x0181, 0x0182, 0x0184, 0x0186, 0x0187, 0x0193, 0x0194, 0x019c, 0x019d, | |
346 | 0x019f, 0x01a0, 0x01a2, 0x01a4, 0x01a6, 0x01a7, 0x01a9, 0x01ac, 0x01ae, | |
347 | 0x01af, 0x01b5, 0x01b7, 0x01b8, 0x01bc, 0x01c4, 0x01c7, 0x01ca, 0x01cd, | |
348 | 0x01cf, 0x01d1, 0x01d3, 0x01d5, 0x01d7, 0x01d9, 0x01db, 0x01de, 0x01e0, | |
349 | 0x01e2, 0x01e4, 0x01e6, 0x01e8, 0x01ea, 0x01ec, 0x01ee, 0x01f1, 0x01f4, | |
350 | 0x01fa, 0x01fc, 0x01fe, 0x0200, 0x0202, 0x0204, 0x0206, 0x0208, 0x020a, | |
351 | 0x020c, 0x020e, 0x0210, 0x0212, 0x0214, 0x0216, 0x0218, 0x021a, 0x021c, | |
352 | 0x021e, 0x0222, 0x0224, 0x0226, 0x0228, 0x022a, 0x022c, 0x022e, 0x0230, | |
353 | 0x0232, 0x0386, 0x038c, 0x038e, 0x038f, 0x03da, 0x03dc, 0x03de, 0x03e0, | |
354 | 0x03e2, 0x03e4, 0x03e6, 0x03e8, 0x03ea, 0x03ec, 0x03ee, 0x03f4, 0x0460, | |
355 | 0x0462, 0x0464, 0x0466, 0x0468, 0x046a, 0x046c, 0x046e, 0x0470, 0x0472, | |
356 | 0x0474, 0x0476, 0x0478, 0x047a, 0x047c, 0x047e, 0x0480, 0x048c, 0x048e, | |
357 | 0x0490, 0x0492, 0x0494, 0x0496, 0x0498, 0x049a, 0x049c, 0x049e, 0x04a0, | |
358 | 0x04a2, 0x04a4, 0x04a6, 0x04a8, 0x04aa, 0x04ac, 0x04ae, 0x04b0, 0x04b2, | |
359 | 0x04b4, 0x04b6, 0x04b8, 0x04ba, 0x04bc, 0x04be, 0x04c0, 0x04c1, 0x04c3, | |
360 | 0x04c7, 0x04cb, 0x04d0, 0x04d2, 0x04d4, 0x04d6, 0x04d8, 0x04da, 0x04dc, | |
361 | 0x04de, 0x04e0, 0x04e2, 0x04e4, 0x04e6, 0x04e8, 0x04ea, 0x04ec, 0x04ee, | |
362 | 0x04f0, 0x04f2, 0x04f4, 0x04f8, 0x1e00, 0x1e02, 0x1e04, 0x1e06, 0x1e08, | |
363 | 0x1e0a, 0x1e0c, 0x1e0e, 0x1e10, 0x1e12, 0x1e14, 0x1e16, 0x1e18, 0x1e1a, | |
364 | 0x1e1c, 0x1e1e, 0x1e20, 0x1e22, 0x1e24, 0x1e26, 0x1e28, 0x1e2a, 0x1e2c, | |
365 | 0x1e2e, 0x1e30, 0x1e32, 0x1e34, 0x1e36, 0x1e38, 0x1e3a, 0x1e3c, 0x1e3e, | |
366 | 0x1e40, 0x1e42, 0x1e44, 0x1e46, 0x1e48, 0x1e4a, 0x1e4c, 0x1e4e, 0x1e50, | |
367 | 0x1e52, 0x1e54, 0x1e56, 0x1e58, 0x1e5a, 0x1e5c, 0x1e5e, 0x1e60, 0x1e62, | |
368 | 0x1e64, 0x1e66, 0x1e68, 0x1e6a, 0x1e6c, 0x1e6e, 0x1e70, 0x1e72, 0x1e74, | |
369 | 0x1e76, 0x1e78, 0x1e7a, 0x1e7c, 0x1e7e, 0x1e80, 0x1e82, 0x1e84, 0x1e86, | |
370 | 0x1e88, 0x1e8a, 0x1e8c, 0x1e8e, 0x1e90, 0x1e92, 0x1e94, 0x1ea0, 0x1ea2, | |
371 | 0x1ea4, 0x1ea6, 0x1ea8, 0x1eaa, 0x1eac, 0x1eae, 0x1eb0, 0x1eb2, 0x1eb4, | |
372 | 0x1eb6, 0x1eb8, 0x1eba, 0x1ebc, 0x1ebe, 0x1ec0, 0x1ec2, 0x1ec4, 0x1ec6, | |
373 | 0x1ec8, 0x1eca, 0x1ecc, 0x1ece, 0x1ed0, 0x1ed2, 0x1ed4, 0x1ed6, 0x1ed8, | |
374 | 0x1eda, 0x1edc, 0x1ede, 0x1ee0, 0x1ee2, 0x1ee4, 0x1ee6, 0x1ee8, 0x1eea, | |
375 | 0x1eec, 0x1eee, 0x1ef0, 0x1ef2, 0x1ef4, 0x1ef6, 0x1ef8, 0x1f59, 0x1f5b, | |
376 | 0x1f5d, 0x1f5f, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x2130, | |
377 | 0x2131, 0x2133 | |
830efc9b RN |
378 | }; |
379 | ||
3ca4086b VS |
380 | #define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr)) |
381 | ||
382 | /* Unicode: unicode print characters excluding space */ | |
383 | ||
384 | static crange graphRangeTable[] = { | |
385 | {0x0021, 0x007e}, {0x00a0, 0x011f}, {0x0121, 0x021f}, {0x0222, 0x0233}, | |
386 | {0x0250, 0x02ad}, {0x02b0, 0x02ee}, {0x0300, 0x031f}, {0x0321, 0x034e}, | |
387 | {0x0360, 0x0362}, {0x0384, 0x038a}, {0x038e, 0x03a1}, {0x03a3, 0x03ce}, | |
388 | {0x03d0, 0x03d7}, {0x03da, 0x03f5}, {0x0400, 0x041f}, {0x0421, 0x0486}, | |
389 | {0x048c, 0x04c4}, {0x04d0, 0x04f5}, {0x0531, 0x0556}, {0x0559, 0x055f}, | |
390 | {0x0561, 0x0587}, {0x0591, 0x05a1}, {0x05a3, 0x05b9}, {0x05bb, 0x05c4}, | |
391 | {0x05d0, 0x05ea}, {0x05f0, 0x05f4}, {0x0621, 0x063a}, {0x0640, 0x0655}, | |
392 | {0x0660, 0x066d}, {0x0670, 0x06ed}, {0x06f0, 0x06fe}, {0x0700, 0x070d}, | |
393 | {0x0710, 0x071f}, {0x0721, 0x072c}, {0x0730, 0x074a}, {0x0780, 0x07b0}, | |
394 | {0x0901, 0x0903}, {0x0905, 0x091f}, {0x0921, 0x0939}, {0x093c, 0x094d}, | |
395 | {0x0950, 0x0954}, {0x0958, 0x0970}, {0x0981, 0x0983}, {0x0985, 0x098c}, | |
396 | {0x0993, 0x09a8}, {0x09aa, 0x09b0}, {0x09b6, 0x09b9}, {0x09be, 0x09c4}, | |
397 | {0x09cb, 0x09cd}, {0x09df, 0x09e3}, {0x09e6, 0x09fa}, {0x0a05, 0x0a0a}, | |
398 | {0x0a13, 0x0a1f}, {0x0a21, 0x0a28}, {0x0a2a, 0x0a30}, {0x0a3e, 0x0a42}, | |
399 | {0x0a4b, 0x0a4d}, {0x0a59, 0x0a5c}, {0x0a66, 0x0a74}, {0x0a81, 0x0a83}, | |
400 | {0x0a85, 0x0a8b}, {0x0a8f, 0x0a91}, {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0}, | |
401 | {0x0ab5, 0x0ab9}, {0x0abc, 0x0ac5}, {0x0ac7, 0x0ac9}, {0x0acb, 0x0acd}, | |
402 | {0x0ae6, 0x0aef}, {0x0b01, 0x0b03}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b1f}, | |
403 | {0x0b21, 0x0b28}, {0x0b2a, 0x0b30}, {0x0b36, 0x0b39}, {0x0b3c, 0x0b43}, | |
404 | {0x0b4b, 0x0b4d}, {0x0b5f, 0x0b61}, {0x0b66, 0x0b70}, {0x0b85, 0x0b8a}, | |
405 | {0x0b8e, 0x0b90}, {0x0b92, 0x0b95}, {0x0ba8, 0x0baa}, {0x0bae, 0x0bb5}, | |
406 | {0x0bb7, 0x0bb9}, {0x0bbe, 0x0bc2}, {0x0bc6, 0x0bc8}, {0x0bca, 0x0bcd}, | |
407 | {0x0be7, 0x0bf2}, {0x0c01, 0x0c03}, {0x0c05, 0x0c0c}, {0x0c0e, 0x0c10}, | |
408 | {0x0c12, 0x0c1f}, {0x0c21, 0x0c28}, {0x0c2a, 0x0c33}, {0x0c35, 0x0c39}, | |
409 | {0x0c3e, 0x0c44}, {0x0c46, 0x0c48}, {0x0c4a, 0x0c4d}, {0x0c66, 0x0c6f}, | |
410 | {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90}, {0x0c92, 0x0ca8}, {0x0caa, 0x0cb3}, | |
411 | {0x0cb5, 0x0cb9}, {0x0cbe, 0x0cc4}, {0x0cc6, 0x0cc8}, {0x0cca, 0x0ccd}, | |
412 | {0x0ce6, 0x0cef}, {0x0d05, 0x0d0c}, {0x0d0e, 0x0d10}, {0x0d12, 0x0d1f}, | |
413 | {0x0d21, 0x0d28}, {0x0d2a, 0x0d39}, {0x0d3e, 0x0d43}, {0x0d46, 0x0d48}, | |
414 | {0x0d4a, 0x0d4d}, {0x0d66, 0x0d6f}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1}, | |
415 | {0x0db3, 0x0dbb}, {0x0dc0, 0x0dc6}, {0x0dcf, 0x0dd4}, {0x0dd8, 0x0ddf}, | |
416 | {0x0df2, 0x0df4}, {0x0e01, 0x0e1f}, {0x0e21, 0x0e3a}, {0x0e3f, 0x0e5b}, | |
417 | {0x0e94, 0x0e97}, {0x0e99, 0x0e9f}, {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb9}, | |
418 | {0x0ebb, 0x0ebd}, {0x0ec0, 0x0ec4}, {0x0ec8, 0x0ecd}, {0x0ed0, 0x0ed9}, | |
419 | {0x0f00, 0x0f1f}, {0x0f21, 0x0f47}, {0x0f49, 0x0f6a}, {0x0f71, 0x0f8b}, | |
420 | {0x0f90, 0x0f97}, {0x0f99, 0x0fbc}, {0x0fbe, 0x0fcc}, {0x1000, 0x101f}, | |
421 | {0x1023, 0x1027}, {0x102c, 0x1032}, {0x1036, 0x1039}, {0x1040, 0x1059}, | |
422 | {0x10a0, 0x10c5}, {0x10d0, 0x10f6}, {0x1100, 0x111f}, {0x1121, 0x1159}, | |
423 | {0x115f, 0x11a2}, {0x11a8, 0x11f9}, {0x1200, 0x1206}, {0x1208, 0x121f}, | |
424 | {0x1221, 0x1246}, {0x124a, 0x124d}, {0x1250, 0x1256}, {0x125a, 0x125d}, | |
425 | {0x1260, 0x1286}, {0x128a, 0x128d}, {0x1290, 0x12ae}, {0x12b2, 0x12b5}, | |
426 | {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12ce}, {0x12d0, 0x12d6}, | |
427 | {0x12d8, 0x12ee}, {0x12f0, 0x130e}, {0x1312, 0x1315}, {0x1318, 0x131e}, | |
428 | {0x1321, 0x1346}, {0x1348, 0x135a}, {0x1361, 0x137c}, {0x13a0, 0x13f4}, | |
429 | {0x1401, 0x141f}, {0x1421, 0x151f}, {0x1521, 0x161f}, {0x1621, 0x1676}, | |
430 | {0x1680, 0x169c}, {0x16a0, 0x16f0}, {0x1780, 0x17dc}, {0x17e0, 0x17e9}, | |
431 | {0x1800, 0x180a}, {0x1810, 0x1819}, {0x1821, 0x1877}, {0x1880, 0x18a9}, | |
432 | {0x1e00, 0x1e1f}, {0x1e21, 0x1e9b}, {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15}, | |
433 | {0x1f18, 0x1f1d}, {0x1f21, 0x1f45}, {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, | |
434 | {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fc4}, {0x1fc6, 0x1fd3}, | |
435 | {0x1fd6, 0x1fdb}, {0x1fdd, 0x1fef}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffe}, | |
436 | {0x2000, 0x200b}, {0x2010, 0x201f}, {0x2021, 0x2029}, {0x202f, 0x2046}, | |
437 | {0x2048, 0x204d}, {0x2074, 0x208e}, {0x20a0, 0x20af}, {0x20d0, 0x20e3}, | |
438 | {0x2100, 0x211f}, {0x2121, 0x213a}, {0x2153, 0x2183}, {0x2190, 0x21f3}, | |
439 | {0x2200, 0x221f}, {0x2221, 0x22f1}, {0x2300, 0x231f}, {0x2321, 0x237b}, | |
440 | {0x237d, 0x239a}, {0x2400, 0x241f}, {0x2421, 0x2426}, {0x2440, 0x244a}, | |
441 | {0x2460, 0x24ea}, {0x2500, 0x251f}, {0x2521, 0x2595}, {0x25a0, 0x25f7}, | |
442 | {0x2600, 0x2613}, {0x2619, 0x261f}, {0x2621, 0x2671}, {0x2701, 0x2704}, | |
443 | {0x2706, 0x2709}, {0x270c, 0x271f}, {0x2721, 0x2727}, {0x2729, 0x274b}, | |
444 | {0x274f, 0x2752}, {0x2758, 0x275e}, {0x2761, 0x2767}, {0x2776, 0x2794}, | |
445 | {0x2798, 0x27af}, {0x27b1, 0x27be}, {0x2800, 0x281f}, {0x2821, 0x28ff}, | |
446 | {0x2e80, 0x2e99}, {0x2e9b, 0x2ef3}, {0x2f00, 0x2f1f}, {0x2f21, 0x2fd5}, | |
447 | {0x2ff0, 0x2ffb}, {0x3000, 0x301f}, {0x3021, 0x303a}, {0x3041, 0x3094}, | |
448 | {0x3099, 0x309e}, {0x30a1, 0x30fe}, {0x3105, 0x311f}, {0x3121, 0x312c}, | |
449 | {0x3131, 0x318e}, {0x3190, 0x31b7}, {0x3200, 0x321c}, {0x3221, 0x3243}, | |
450 | {0x3260, 0x327b}, {0x327f, 0x32b0}, {0x32c0, 0x32cb}, {0x32d0, 0x32fe}, | |
451 | {0x3300, 0x331f}, {0x3321, 0x3376}, {0x337b, 0x33dd}, {0x33e0, 0x33fe}, | |
452 | {0x3400, 0x341f}, {0x3421, 0x351f}, {0x3521, 0x361f}, {0x3621, 0x371f}, | |
453 | {0x3721, 0x381f}, {0x3821, 0x391f}, {0x3921, 0x3a1f}, {0x3a21, 0x3b1f}, | |
454 | {0x3b21, 0x3c1f}, {0x3c21, 0x3d1f}, {0x3d21, 0x3e1f}, {0x3e21, 0x3f1f}, | |
455 | {0x3f21, 0x401f}, {0x4021, 0x411f}, {0x4121, 0x421f}, {0x4221, 0x431f}, | |
456 | {0x4321, 0x441f}, {0x4421, 0x451f}, {0x4521, 0x461f}, {0x4621, 0x471f}, | |
457 | {0x4721, 0x481f}, {0x4821, 0x491f}, {0x4921, 0x4a1f}, {0x4a21, 0x4b1f}, | |
458 | {0x4b21, 0x4c1f}, {0x4c21, 0x4d1f}, {0x4d21, 0x4db5}, {0x4e00, 0x4e1f}, | |
459 | {0x4e21, 0x4f1f}, {0x4f21, 0x501f}, {0x5021, 0x511f}, {0x5121, 0x521f}, | |
460 | {0x5221, 0x531f}, {0x5321, 0x541f}, {0x5421, 0x551f}, {0x5521, 0x561f}, | |
461 | {0x5621, 0x571f}, {0x5721, 0x581f}, {0x5821, 0x591f}, {0x5921, 0x5a1f}, | |
462 | {0x5a21, 0x5b1f}, {0x5b21, 0x5c1f}, {0x5c21, 0x5d1f}, {0x5d21, 0x5e1f}, | |
463 | {0x5e21, 0x5f1f}, {0x5f21, 0x601f}, {0x6021, 0x611f}, {0x6121, 0x621f}, | |
464 | {0x6221, 0x631f}, {0x6321, 0x641f}, {0x6421, 0x651f}, {0x6521, 0x661f}, | |
465 | {0x6621, 0x671f}, {0x6721, 0x681f}, {0x6821, 0x691f}, {0x6921, 0x6a1f}, | |
466 | {0x6a21, 0x6b1f}, {0x6b21, 0x6c1f}, {0x6c21, 0x6d1f}, {0x6d21, 0x6e1f}, | |
467 | {0x6e21, 0x6f1f}, {0x6f21, 0x701f}, {0x7021, 0x711f}, {0x7121, 0x721f}, | |
468 | {0x7221, 0x731f}, {0x7321, 0x741f}, {0x7421, 0x751f}, {0x7521, 0x761f}, | |
469 | {0x7621, 0x771f}, {0x7721, 0x781f}, {0x7821, 0x791f}, {0x7921, 0x7a1f}, | |
470 | {0x7a21, 0x7b1f}, {0x7b21, 0x7c1f}, {0x7c21, 0x7d1f}, {0x7d21, 0x7e1f}, | |
471 | {0x7e21, 0x7f1f}, {0x7f21, 0x801f}, {0x8021, 0x811f}, {0x8121, 0x821f}, | |
472 | {0x8221, 0x831f}, {0x8321, 0x841f}, {0x8421, 0x851f}, {0x8521, 0x861f}, | |
473 | {0x8621, 0x871f}, {0x8721, 0x881f}, {0x8821, 0x891f}, {0x8921, 0x8a1f}, | |
474 | {0x8a21, 0x8b1f}, {0x8b21, 0x8c1f}, {0x8c21, 0x8d1f}, {0x8d21, 0x8e1f}, | |
475 | {0x8e21, 0x8f1f}, {0x8f21, 0x901f}, {0x9021, 0x911f}, {0x9121, 0x921f}, | |
476 | {0x9221, 0x931f}, {0x9321, 0x941f}, {0x9421, 0x951f}, {0x9521, 0x961f}, | |
477 | {0x9621, 0x971f}, {0x9721, 0x981f}, {0x9821, 0x991f}, {0x9921, 0x9a1f}, | |
478 | {0x9a21, 0x9b1f}, {0x9b21, 0x9c1f}, {0x9c21, 0x9d1f}, {0x9d21, 0x9e1f}, | |
479 | {0x9e21, 0x9f1f}, {0x9f21, 0x9fa5}, {0xa000, 0xa01f}, {0xa021, 0xa11f}, | |
480 | {0xa121, 0xa21f}, {0xa221, 0xa31f}, {0xa321, 0xa41f}, {0xa421, 0xa48c}, | |
481 | {0xa490, 0xa4a1}, {0xa4a4, 0xa4b3}, {0xa4b5, 0xa4c0}, {0xa4c2, 0xa4c4}, | |
482 | {0xac00, 0xac1f}, {0xac21, 0xad1f}, {0xad21, 0xae1f}, {0xae21, 0xaf1f}, | |
483 | {0xaf21, 0xb01f}, {0xb021, 0xb11f}, {0xb121, 0xb21f}, {0xb221, 0xb31f}, | |
484 | {0xb321, 0xb41f}, {0xb421, 0xb51f}, {0xb521, 0xb61f}, {0xb621, 0xb71f}, | |
485 | {0xb721, 0xb81f}, {0xb821, 0xb91f}, {0xb921, 0xba1f}, {0xba21, 0xbb1f}, | |
486 | {0xbb21, 0xbc1f}, {0xbc21, 0xbd1f}, {0xbd21, 0xbe1f}, {0xbe21, 0xbf1f}, | |
487 | {0xbf21, 0xc01f}, {0xc021, 0xc11f}, {0xc121, 0xc21f}, {0xc221, 0xc31f}, | |
488 | {0xc321, 0xc41f}, {0xc421, 0xc51f}, {0xc521, 0xc61f}, {0xc621, 0xc71f}, | |
489 | {0xc721, 0xc81f}, {0xc821, 0xc91f}, {0xc921, 0xca1f}, {0xca21, 0xcb1f}, | |
490 | {0xcb21, 0xcc1f}, {0xcc21, 0xcd1f}, {0xcd21, 0xce1f}, {0xce21, 0xcf1f}, | |
491 | {0xcf21, 0xd01f}, {0xd021, 0xd11f}, {0xd121, 0xd21f}, {0xd221, 0xd31f}, | |
492 | {0xd321, 0xd41f}, {0xd421, 0xd51f}, {0xd521, 0xd61f}, {0xd621, 0xd71f}, | |
493 | {0xd721, 0xd7a3}, {0xf900, 0xf91f}, {0xf921, 0xfa1f}, {0xfa21, 0xfa2d}, | |
494 | {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1d, 0xfb1f}, {0xfb21, 0xfb36}, | |
495 | {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfc1f}, {0xfc21, 0xfd1f}, | |
496 | {0xfd21, 0xfd3f}, {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb}, | |
497 | {0xfe21, 0xfe23}, {0xfe30, 0xfe44}, {0xfe49, 0xfe52}, {0xfe54, 0xfe66}, | |
498 | {0xfe68, 0xfe6b}, {0xfe70, 0xfe72}, {0xfe76, 0xfefc}, {0xff01, 0xff1f}, | |
499 | {0xff21, 0xff5e}, {0xff61, 0xffbe}, {0xffc2, 0xffc7}, {0xffca, 0xffcf}, | |
500 | {0xffd2, 0xffd7}, {0xffda, 0xffdc}, {0xffe0, 0xffe6}, {0xffe8, 0xffee}, | |
501 | {0xfffc, 0xffff} | |
502 | }; | |
503 | ||
504 | #define NUM_GRAPH_RANGE (sizeof(graphRangeTable)/sizeof(crange)) | |
505 | ||
506 | static chr graphCharTable[] = { | |
507 | 0x0374, 0x0375, 0x037a, 0x037e, 0x038c, 0x0488, 0x0489, 0x04c7, 0x04c8, | |
508 | 0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0589, 0x058a, 0x060c, 0x061b, 0x061f, | |
509 | 0x098f, 0x0990, 0x09b2, 0x09bc, 0x09c7, 0x09c8, 0x09d7, 0x09dc, 0x09dd, | |
510 | 0x0a02, 0x0a0f, 0x0a10, 0x0a32, 0x0a33, 0x0a35, 0x0a36, 0x0a38, 0x0a39, | |
511 | 0x0a3c, 0x0a47, 0x0a48, 0x0a5e, 0x0a8d, 0x0ab2, 0x0ab3, 0x0ad0, 0x0ae0, | |
512 | 0x0b0f, 0x0b10, 0x0b32, 0x0b33, 0x0b47, 0x0b48, 0x0b56, 0x0b57, 0x0b5c, | |
513 | 0x0b5d, 0x0b82, 0x0b83, 0x0b99, 0x0b9a, 0x0b9c, 0x0b9e, 0x0b9f, 0x0ba3, | |
514 | 0x0ba4, 0x0bd7, 0x0c55, 0x0c56, 0x0c60, 0x0c61, 0x0c82, 0x0c83, 0x0cd5, | |
515 | 0x0cd6, 0x0cde, 0x0ce0, 0x0ce1, 0x0d02, 0x0d03, 0x0d57, 0x0d60, 0x0d61, | |
516 | 0x0d82, 0x0d83, 0x0dbd, 0x0dca, 0x0dd6, 0x0e81, 0x0e82, 0x0e84, 0x0e87, | |
517 | 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0ec6, 0x0edc, | |
518 | 0x0edd, 0x0fcf, 0x1021, 0x1029, 0x102a, 0x10fb, 0x1248, 0x1258, 0x1288, | |
519 | 0x12b0, 0x12c0, 0x1310, 0x1f59, 0x1f5b, 0x1f5d, 0x2070, 0x274d, 0x2756, | |
520 | 0x303e, 0x303f, 0xa4c6, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfe74 | |
521 | }; | |
522 | ||
523 | #define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(chr)) | |
524 | ||
525 | /* | |
526 | * End of auto-generated Unicode character ranges declarations. | |
527 | */ | |
528 | ||
1701c4d4 VS |
529 | /* |
530 | * Supply implementations for some tcl functions that this module depends on | |
531 | * to make it self contained | |
532 | */ | |
533 | ||
534 | #include "tclUniData.c" | |
535 | #define Tcl_UniChar wxChar | |
536 | ||
537 | /* | |
538 | * Compute the uppercase equivalent of the given Unicode character. | |
539 | * Taken from tcl. | |
540 | */ | |
541 | ||
542 | Tcl_UniChar Tcl_UniCharToUpper(int ch) | |
543 | { | |
544 | int info = GetUniCharInfo(ch); | |
545 | ||
546 | if (GetCaseType(info) & 0x04) { | |
547 | return (Tcl_UniChar) (ch - GetDelta(info)); | |
548 | } else { | |
549 | return ch; | |
550 | } | |
551 | } | |
552 | ||
553 | /* | |
554 | * Compute the lowercase equivalent of the given Unicode character. | |
555 | * Taken from tcl. | |
556 | */ | |
557 | ||
558 | Tcl_UniChar Tcl_UniCharToLower(int ch) | |
559 | { | |
560 | int info = GetUniCharInfo(ch); | |
561 | ||
562 | if (GetCaseType(info) & 0x02) { | |
563 | return (Tcl_UniChar) (ch + GetDelta(info)); | |
564 | } else { | |
565 | return ch; | |
566 | } | |
567 | } | |
568 | ||
569 | /* | |
570 | * Compute the titlecase equivalent of the given Unicode character. | |
571 | * Taken from tcl. | |
572 | */ | |
573 | ||
574 | Tcl_UniChar Tcl_UniCharToTitle(int ch) | |
575 | { | |
576 | int info = GetUniCharInfo(ch); | |
577 | int mode = GetCaseType(info); | |
578 | ||
579 | if (mode & 0x1) { | |
580 | /* | |
581 | * Subtract or add one depending on the original case. | |
582 | */ | |
583 | ||
584 | return (Tcl_UniChar) (ch + ((mode & 0x4) ? -1 : 1)); | |
585 | } else if (mode == 0x4) { | |
586 | return (Tcl_UniChar) (ch - GetDelta(info)); | |
587 | } else { | |
588 | return ch; | |
589 | } | |
590 | } | |
591 | ||
592 | #else /* wxUSE_UNICODE */ | |
593 | ||
594 | #include <locale.h> | |
595 | ||
596 | typedef int (*isfunc_t)(int); | |
597 | ||
598 | /* ASCII character-class table */ | |
599 | static struct cclass { | |
600 | char *name; | |
601 | char *chars; | |
602 | int hasch; | |
603 | isfunc_t isfunc; | |
604 | } cclasses[] = { | |
605 | {"alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ | |
606 | 0123456789", 1, isalnum}, | |
607 | {"alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", | |
608 | 1, isalpha}, | |
609 | {"blank", " \t", 0, NULL}, | |
610 | {"cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ | |
611 | \25\26\27\30\31\32\33\34\35\36\37\177", 0, iscntrl}, | |
612 | {"digit", "0123456789", 0, isdigit}, | |
613 | {"graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ | |
614 | 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", | |
615 | 1, isgraph}, | |
616 | {"lower", "abcdefghijklmnopqrstuvwxyz", | |
617 | 1, islower}, | |
618 | {"print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ | |
619 | 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", | |
620 | 1, isprint}, | |
621 | {"punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", | |
622 | 0, ispunct}, | |
623 | {"space", "\t\n\v\f\r ", 0, isspace}, | |
624 | {"upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", | |
625 | 0, isupper}, | |
626 | {"xdigit", "0123456789ABCDEFabcdef", | |
627 | 0, isxdigit}, | |
628 | {NULL, 0, 0, NULL} | |
629 | }; | |
630 | ||
631 | /* | |
632 | * Supply implementations for some tcl functions that this module depends on | |
633 | * to make it self contained | |
634 | */ | |
635 | ||
636 | #define Tcl_UniChar wxChar | |
52de37c7 VS |
637 | Tcl_UniChar Tcl_UniCharToUpper(int ch) { return wxCRT_ToupperNative(ch); } |
638 | Tcl_UniChar Tcl_UniCharToLower(int ch) { return wxCRT_TolowerNative(ch); } | |
639 | Tcl_UniChar Tcl_UniCharToTitle(int ch) { return wxCRT_ToupperNative(ch); } | |
1701c4d4 VS |
640 | |
641 | #endif /* !wxUSE_UNICODE */ | |
642 | ||
3ca4086b | 643 | #define CH NOCELT |
830efc9b RN |
644 | |
645 | /* | |
3ca4086b VS |
646 | - nmcces - how many distinct MCCEs are there? |
647 | ^ static int nmcces(struct vars *); | |
830efc9b RN |
648 | */ |
649 | static int | |
3ca4086b VS |
650 | nmcces(v) |
651 | struct vars *v; /* context */ | |
830efc9b | 652 | { |
3ca4086b VS |
653 | /* |
654 | * No multi-character collating elements defined at the moment. | |
655 | */ | |
656 | return 0; | |
830efc9b RN |
657 | } |
658 | ||
659 | /* | |
3ca4086b VS |
660 | - nleaders - how many chrs can be first chrs of MCCEs? |
661 | ^ static int nleaders(struct vars *); | |
830efc9b RN |
662 | */ |
663 | static int | |
3ca4086b VS |
664 | nleaders(v) |
665 | struct vars *v; /* context */ | |
830efc9b | 666 | { |
3ca4086b | 667 | return 0; |
830efc9b RN |
668 | } |
669 | ||
670 | /* | |
3ca4086b VS |
671 | - allmcces - return a cvec with all the MCCEs of the locale |
672 | ^ static struct cvec *allmcces(struct vars *, struct cvec *); | |
830efc9b RN |
673 | */ |
674 | static struct cvec * | |
3ca4086b VS |
675 | allmcces(v, cv) |
676 | struct vars *v; /* context */ | |
677 | struct cvec *cv; /* this is supposed to have enough room */ | |
830efc9b | 678 | { |
3ca4086b | 679 | return clearcvec(cv); |
830efc9b RN |
680 | } |
681 | ||
682 | /* | |
3ca4086b VS |
683 | - element - map collating-element name to celt |
684 | ^ static celt element(struct vars *, chr *, chr *); | |
830efc9b RN |
685 | */ |
686 | static celt | |
3ca4086b VS |
687 | element(v, startp, endp) |
688 | struct vars *v; /* context */ | |
689 | chr *startp; /* points to start of name */ | |
690 | chr *endp; /* points just past end of name */ | |
830efc9b | 691 | { |
3ca4086b VS |
692 | struct cname *cn; |
693 | size_t len; | |
3ca4086b VS |
694 | |
695 | /* generic: one-chr names stand for themselves */ | |
696 | assert(startp < endp); | |
697 | len = endp - startp; | |
698 | if (len == 1) { | |
699 | return *startp; | |
700 | } | |
701 | ||
702 | NOTE(REG_ULOCALE); | |
703 | ||
704 | /* search table */ | |
3ca4086b | 705 | for (cn=cnames; cn->name!=NULL; cn++) { |
52de37c7 | 706 | if (wxCRT_StrlenNative(cn->name)==len && wxCRT_StrncmpNative(cn->name, startp, len)==0) { |
3ca4086b | 707 | break; /* NOTE BREAK OUT */ |
830efc9b | 708 | } |
3ca4086b | 709 | } |
3ca4086b VS |
710 | if (cn->name != NULL) { |
711 | return CHR(cn->code); | |
712 | } | |
713 | ||
714 | /* couldn't find it */ | |
715 | ERR(REG_ECOLLATE); | |
716 | return 0; | |
830efc9b RN |
717 | } |
718 | ||
719 | /* | |
3ca4086b VS |
720 | - range - supply cvec for a range, including legality check |
721 | ^ static struct cvec *range(struct vars *, celt, celt, int); | |
830efc9b RN |
722 | */ |
723 | static struct cvec * | |
3ca4086b VS |
724 | range(v, a, b, cases) |
725 | struct vars *v; /* context */ | |
726 | celt a; /* range start */ | |
727 | celt b; /* range end, might equal a */ | |
728 | int cases; /* case-independent? */ | |
830efc9b | 729 | { |
3ca4086b VS |
730 | int nchrs; |
731 | struct cvec *cv; | |
732 | celt c, lc, uc, tc; | |
830efc9b | 733 | |
3ca4086b VS |
734 | if (a != b && !before(a, b)) { |
735 | ERR(REG_ERANGE); | |
736 | return NULL; | |
737 | } | |
830efc9b | 738 | |
3ca4086b VS |
739 | if (!cases) { /* easy version */ |
740 | cv = getcvec(v, 0, 1, 0); | |
830efc9b | 741 | NOERRN(); |
3ca4086b VS |
742 | addrange(cv, a, b); |
743 | return cv; | |
744 | } | |
745 | ||
746 | /* | |
747 | * When case-independent, it's hard to decide when cvec ranges are | |
748 | * usable, so for now at least, we won't try. We allocate enough | |
749 | * space for two case variants plus a little extra for the two | |
750 | * title case variants. | |
751 | */ | |
752 | ||
753 | nchrs = (b - a + 1)*2 + 4; | |
754 | ||
755 | cv = getcvec(v, nchrs, 0, 0); | |
756 | NOERRN(); | |
757 | ||
758 | for (c=a; c<=b; c++) { | |
759 | addchr(cv, c); | |
760 | lc = Tcl_UniCharToLower((chr)c); | |
761 | uc = Tcl_UniCharToUpper((chr)c); | |
762 | tc = Tcl_UniCharToTitle((chr)c); | |
763 | if (c != lc) { | |
764 | addchr(cv, lc); | |
765 | } | |
766 | if (c != uc) { | |
767 | addchr(cv, uc); | |
768 | } | |
769 | if (c != tc && tc != uc) { | |
770 | addchr(cv, tc); | |
830efc9b | 771 | } |
3ca4086b | 772 | } |
830efc9b | 773 | |
3ca4086b | 774 | return cv; |
830efc9b RN |
775 | } |
776 | ||
777 | /* | |
3ca4086b VS |
778 | - before - is celt x before celt y, for purposes of range legality? |
779 | ^ static int before(celt, celt); | |
830efc9b | 780 | */ |
3ca4086b VS |
781 | static int /* predicate */ |
782 | before(x, y) | |
783 | celt x, y; /* collating elements */ | |
830efc9b | 784 | { |
3ca4086b VS |
785 | /* trivial because no MCCEs */ |
786 | if (x < y) { | |
787 | return 1; | |
788 | } | |
789 | return 0; | |
830efc9b RN |
790 | } |
791 | ||
792 | /* | |
3ca4086b | 793 | - eclass - supply cvec for an equivalence class |
830efc9b | 794 | * Must include case counterparts on request. |
3ca4086b | 795 | ^ static struct cvec *eclass(struct vars *, celt, int); |
830efc9b RN |
796 | */ |
797 | static struct cvec * | |
3ca4086b VS |
798 | eclass(v, c, cases) |
799 | struct vars *v; /* context */ | |
800 | celt c; /* Collating element representing | |
801 | * the equivalence class. */ | |
802 | int cases; /* all cases? */ | |
830efc9b | 803 | { |
3ca4086b VS |
804 | struct cvec *cv; |
805 | ||
806 | /* crude fake equivalence class for testing */ | |
807 | if ((v->cflags®_FAKE) && c == 'x') { | |
808 | cv = getcvec(v, 4, 0, 0); | |
809 | addchr(cv, (chr)'x'); | |
810 | addchr(cv, (chr)'y'); | |
811 | if (cases) { | |
812 | addchr(cv, (chr)'X'); | |
813 | addchr(cv, (chr)'Y'); | |
830efc9b | 814 | } |
830efc9b | 815 | return cv; |
3ca4086b VS |
816 | } |
817 | ||
818 | /* otherwise, none */ | |
819 | if (cases) { | |
820 | return allcases(v, c); | |
821 | } | |
822 | cv = getcvec(v, 1, 0, 0); | |
823 | assert(cv != NULL); | |
824 | addchr(cv, (chr)c); | |
825 | return cv; | |
830efc9b RN |
826 | } |
827 | ||
1701c4d4 VS |
828 | #if wxUSE_UNICODE |
829 | ||
830efc9b | 830 | /* |
3ca4086b | 831 | - cclass - supply cvec for a character class |
830efc9b | 832 | * Must include case counterparts on request. |
3ca4086b | 833 | ^ static struct cvec *cclass(struct vars *, chr *, chr *, int); |
830efc9b RN |
834 | */ |
835 | static struct cvec * | |
3ca4086b VS |
836 | cclass(v, startp, endp, cases) |
837 | struct vars *v; /* context */ | |
838 | chr *startp; /* where the name starts */ | |
839 | chr *endp; /* just past the end of the name */ | |
840 | int cases; /* case-independent? */ | |
830efc9b | 841 | { |
3ca4086b VS |
842 | size_t len; |
843 | struct cvec *cv = NULL; | |
1701c4d4 VS |
844 | CONST chr *np; |
845 | chr **namePtr; | |
3ca4086b VS |
846 | int i, index; |
847 | ||
848 | /* | |
849 | * The following arrays define the valid character class names. | |
850 | */ | |
851 | ||
1701c4d4 VS |
852 | static chr *classNames[] = { |
853 | _T("alnum"), _T("alpha"), _T("ascii"), _T("blank"), _T("cntrl"), _T("digit"), _T("graph"), | |
854 | _T("lower"), _T("print"), _T("punct"), _T("space"), _T("upper"), _T("xdigit"), NULL | |
3ca4086b VS |
855 | }; |
856 | ||
857 | enum classes { | |
858 | CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH, | |
859 | CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT | |
860 | }; | |
861 | ||
830efc9b | 862 | |
3ca4086b VS |
863 | /* |
864 | * Extract the class name | |
865 | */ | |
830efc9b | 866 | |
3ca4086b | 867 | len = endp - startp; |
1701c4d4 | 868 | np = startp; |
830efc9b | 869 | |
3ca4086b VS |
870 | /* |
871 | * Remap lower and upper to alpha if the match is case insensitive. | |
872 | */ | |
830efc9b | 873 | |
52de37c7 VS |
874 | if (cases && len == 5 && (wxCRT_StrncmpNative(_T("lower"), np, 5) == 0 |
875 | || wxCRT_StrncmpNative(_T("upper"), np, 5) == 0)) { | |
1701c4d4 | 876 | np = _T("alpha"); |
3ca4086b | 877 | } |
830efc9b | 878 | |
3ca4086b VS |
879 | /* |
880 | * Map the name to the corresponding enumerated value. | |
881 | */ | |
830efc9b | 882 | |
3ca4086b VS |
883 | index = -1; |
884 | for (namePtr=classNames,i=0 ; *namePtr!=NULL ; namePtr++,i++) { | |
52de37c7 | 885 | if ((wxCRT_StrlenNative(*namePtr) == len) && (wxCRT_StrncmpNative(*namePtr, np, len) == 0)) { |
3ca4086b VS |
886 | index = i; |
887 | break; | |
888 | } | |
889 | } | |
3ca4086b VS |
890 | if (index == -1) { |
891 | ERR(REG_ECTYPE); | |
892 | return NULL; | |
893 | } | |
894 | ||
895 | /* | |
896 | * Now compute the character class contents. | |
897 | */ | |
898 | ||
899 | switch((enum classes) index) { | |
900 | case CC_PRINT: | |
901 | case CC_ALNUM: | |
902 | cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE, 0); | |
903 | if (cv) { | |
904 | for (i=0 ; i<NUM_ALPHA_CHAR ; i++) { | |
905 | addchr(cv, alphaCharTable[i]); | |
906 | } | |
907 | for (i=0 ; i<NUM_ALPHA_RANGE ; i++) { | |
908 | addrange(cv, alphaRangeTable[i].start, | |
909 | alphaRangeTable[i].end); | |
910 | } | |
911 | for (i=0 ; i<NUM_DIGIT_RANGE ; i++) { | |
912 | addrange(cv, digitRangeTable[i].start, | |
913 | digitRangeTable[i].end); | |
914 | } | |
915 | } | |
916 | break; | |
917 | case CC_ALPHA: | |
918 | cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE, 0); | |
919 | if (cv) { | |
920 | for (i=0 ; i<NUM_ALPHA_RANGE ; i++) { | |
921 | addrange(cv, alphaRangeTable[i].start, | |
922 | alphaRangeTable[i].end); | |
923 | } | |
924 | for (i=0 ; i<NUM_ALPHA_CHAR ; i++) { | |
925 | addchr(cv, alphaCharTable[i]); | |
926 | } | |
927 | } | |
928 | break; | |
929 | case CC_ASCII: | |
930 | cv = getcvec(v, 0, 1, 0); | |
931 | if (cv) { | |
932 | addrange(cv, 0, 0x7f); | |
933 | } | |
934 | break; | |
935 | case CC_BLANK: | |
936 | cv = getcvec(v, 2, 0, 0); | |
937 | addchr(cv, '\t'); | |
938 | addchr(cv, ' '); | |
939 | break; | |
940 | case CC_CNTRL: | |
941 | cv = getcvec(v, 0, 2, 0); | |
942 | addrange(cv, 0x0, 0x1f); | |
943 | addrange(cv, 0x7f, 0x9f); | |
944 | break; | |
945 | case CC_DIGIT: | |
946 | cv = getcvec(v, 0, NUM_DIGIT_RANGE, 0); | |
947 | if (cv) { | |
948 | for (i=0 ; i<NUM_DIGIT_RANGE ; i++) { | |
949 | addrange(cv, digitRangeTable[i].start, | |
950 | digitRangeTable[i].end); | |
951 | } | |
952 | } | |
953 | break; | |
954 | case CC_PUNCT: | |
955 | cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE, 0); | |
956 | if (cv) { | |
957 | for (i=0 ; i<NUM_PUNCT_RANGE ; i++) { | |
958 | addrange(cv, punctRangeTable[i].start, | |
959 | punctRangeTable[i].end); | |
960 | } | |
961 | for (i=0 ; i<NUM_PUNCT_CHAR ; i++) { | |
962 | addchr(cv, punctCharTable[i]); | |
963 | } | |
964 | } | |
965 | break; | |
966 | case CC_XDIGIT: | |
830efc9b | 967 | /* |
3ca4086b VS |
968 | * This is a 3 instead of (NUM_DIGIT_RANGE+2) because I've no |
969 | * idea how to define the digits 'a' through 'f' in | |
970 | * non-western locales. The concept is quite possibly non | |
971 | * portable, or only used in contextx where the characters | |
972 | * used would be the western ones anyway! Whatever is | |
973 | * actually the case, the number of ranges is fixed (until | |
974 | * someone comes up with a better arrangement!) | |
830efc9b | 975 | */ |
3ca4086b VS |
976 | cv = getcvec(v, 0, 3, 0); |
977 | if (cv) { | |
978 | addrange(cv, '0', '9'); | |
979 | addrange(cv, 'a', 'f'); | |
980 | addrange(cv, 'A', 'F'); | |
830efc9b | 981 | } |
3ca4086b VS |
982 | break; |
983 | case CC_SPACE: | |
984 | cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE, 0); | |
985 | if (cv) { | |
986 | for (i=0 ; i<NUM_SPACE_RANGE ; i++) { | |
987 | addrange(cv, spaceRangeTable[i].start, | |
988 | spaceRangeTable[i].end); | |
989 | } | |
990 | for (i=0 ; i<NUM_SPACE_CHAR ; i++) { | |
991 | addchr(cv, spaceCharTable[i]); | |
992 | } | |
993 | } | |
994 | break; | |
995 | case CC_LOWER: | |
996 | cv = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE, 0); | |
997 | if (cv) { | |
998 | for (i=0 ; i<NUM_LOWER_RANGE ; i++) { | |
999 | addrange(cv, lowerRangeTable[i].start, | |
1000 | lowerRangeTable[i].end); | |
1001 | } | |
1002 | for (i=0 ; i<NUM_LOWER_CHAR ; i++) { | |
1003 | addchr(cv, lowerCharTable[i]); | |
1004 | } | |
1005 | } | |
1006 | break; | |
1007 | case CC_UPPER: | |
1008 | cv = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE, 0); | |
1009 | if (cv) { | |
1010 | for (i=0 ; i<NUM_UPPER_RANGE ; i++) { | |
1011 | addrange(cv, upperRangeTable[i].start, | |
1012 | upperRangeTable[i].end); | |
1013 | } | |
1014 | for (i=0 ; i<NUM_UPPER_CHAR ; i++) { | |
1015 | addchr(cv, upperCharTable[i]); | |
1016 | } | |
1017 | } | |
1018 | break; | |
1019 | case CC_GRAPH: | |
1020 | cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE, 0); | |
1021 | if (cv) { | |
1022 | for (i=0 ; i<NUM_GRAPH_RANGE ; i++) { | |
1023 | addrange(cv, graphRangeTable[i].start, | |
1024 | graphRangeTable[i].end); | |
1025 | } | |
1026 | for (i=0 ; i<NUM_GRAPH_CHAR ; i++) { | |
1027 | addchr(cv, graphCharTable[i]); | |
1028 | } | |
1029 | } | |
1030 | break; | |
1031 | } | |
1032 | if (cv == NULL) { | |
1033 | ERR(REG_ESPACE); | |
1034 | } | |
1035 | return cv; | |
830efc9b RN |
1036 | } |
1037 | ||
1701c4d4 VS |
1038 | #else /* wxUSE_UNICODE */ |
1039 | ||
1040 | static struct cvec * | |
1041 | cclass(v, startp, endp, cases) | |
1042 | struct vars *v; | |
1043 | chr *startp; /* where the name starts */ | |
1044 | chr *endp; /* just past the end of the name */ | |
1045 | int cases; /* case-independent? */ | |
1046 | { | |
1047 | size_t len; | |
1048 | char *p; | |
1049 | struct cclass *cc; | |
1050 | struct cvec *cv; | |
1051 | chr *np; | |
1052 | int i; | |
1053 | int count; | |
1054 | char buf[256]; | |
1055 | const char *loc; | |
1056 | ||
1057 | /* find the name */ | |
1058 | len = endp - startp; | |
1059 | np = startp; | |
52de37c7 VS |
1060 | if (cases && len == 5 && (wxCRT_StrncmpNative(_T("lower"), np, 5) == 0 || |
1061 | wxCRT_StrncmpNative(_T("upper"), np, 5) == 0)) | |
1701c4d4 VS |
1062 | np = _T("alpha"); |
1063 | for (cc = cclasses; cc->name != NULL; cc++) | |
52de37c7 | 1064 | if (wxCRT_StrlenNative(cc->name) == len && wxCRT_StrncmpNative(cc->name, np, len) == 0) |
1701c4d4 VS |
1065 | break; /* NOTE BREAK OUT */ |
1066 | if (cc->name == NULL) { | |
1067 | ERR(REG_ECTYPE); | |
1068 | return NULL; | |
1069 | } | |
1070 | ||
1071 | loc = setlocale(LC_CTYPE, NULL); | |
1072 | ||
1073 | if (!cc->isfunc || loc == NULL || strcmp(loc, "C") == 0) | |
1074 | { | |
1075 | /* set up vector */ | |
1076 | cv = getcvec(v, (int)strlen(cc->chars), 0, 0); | |
1077 | if (cv == NULL) { | |
1078 | ERR(REG_ESPACE); | |
1079 | return NULL; | |
1080 | } | |
1081 | ||
1082 | /* fill it in */ | |
1083 | for (p = cc->chars; *p != '\0'; p++) | |
1084 | addchr(cv, (chr)*p); | |
1085 | } | |
1086 | else | |
1087 | { | |
1088 | count = 0; | |
1089 | for (i = 0; i < 256; i++) | |
1090 | if (cc->isfunc(i)) | |
1091 | buf[count++] = i; | |
1092 | ||
1093 | /* set up vector */ | |
1094 | cv = getcvec(v, count, 0, 0); | |
1095 | if (cv == NULL) { | |
1096 | ERR(REG_ESPACE); | |
1097 | return NULL; | |
1098 | } | |
1099 | ||
1100 | /* fill it in */ | |
1101 | for (i = 0; i < count; i++) | |
1102 | addchr(cv, buf[i]); | |
1103 | } | |
1104 | ||
1105 | return cv; | |
1106 | } | |
1107 | ||
1108 | #endif /* !wxUSE_UNICODE */ | |
1109 | ||
1110 | ||
830efc9b | 1111 | /* |
3ca4086b | 1112 | - allcases - supply cvec for all case counterparts of a chr (including itself) |
830efc9b RN |
1113 | * This is a shortcut, preferably an efficient one, for simple characters; |
1114 | * messy cases are done via range(). | |
3ca4086b | 1115 | ^ static struct cvec *allcases(struct vars *, pchr); |
830efc9b RN |
1116 | */ |
1117 | static struct cvec * | |
3ca4086b VS |
1118 | allcases(v, pc) |
1119 | struct vars *v; /* context */ | |
1120 | pchr pc; /* character to get case equivs of */ | |
830efc9b | 1121 | { |
3ca4086b VS |
1122 | struct cvec *cv; |
1123 | chr c = (chr)pc; | |
1124 | chr lc, uc, tc; | |
1125 | ||
1126 | lc = Tcl_UniCharToLower((chr)c); | |
1127 | uc = Tcl_UniCharToUpper((chr)c); | |
1128 | tc = Tcl_UniCharToTitle((chr)c); | |
1129 | ||
1130 | if (tc != uc) { | |
1131 | cv = getcvec(v, 3, 0, 0); | |
1132 | addchr(cv, tc); | |
1133 | } else { | |
830efc9b | 1134 | cv = getcvec(v, 2, 0, 0); |
3ca4086b VS |
1135 | } |
1136 | addchr(cv, lc); | |
1137 | if (lc != uc) { | |
1138 | addchr(cv, uc); | |
1139 | } | |
1140 | return cv; | |
830efc9b RN |
1141 | } |
1142 | ||
1143 | /* | |
3ca4086b VS |
1144 | - cmp - chr-substring compare |
1145 | * Backrefs need this. It should preferably be efficient. | |
830efc9b RN |
1146 | * Note that it does not need to report anything except equal/unequal. |
1147 | * Note also that the length is exact, and the comparison should not | |
1148 | * stop at embedded NULs! | |
3ca4086b | 1149 | ^ static int cmp(CONST chr *, CONST chr *, size_t); |
830efc9b | 1150 | */ |
3ca4086b VS |
1151 | static int /* 0 for equal, nonzero for unequal */ |
1152 | cmp(x, y, len) | |
1153 | CONST chr *x, *y; /* strings to compare */ | |
1154 | size_t len; /* exact length of comparison */ | |
830efc9b | 1155 | { |
3ca4086b | 1156 | return memcmp(VS(x), VS(y), len*sizeof(chr)); |
830efc9b RN |
1157 | } |
1158 | ||
1159 | /* | |
3ca4086b | 1160 | - casecmp - case-independent chr-substring compare |
830efc9b RN |
1161 | * REG_ICASE backrefs need this. It should preferably be efficient. |
1162 | * Note that it does not need to report anything except equal/unequal. | |
1163 | * Note also that the length is exact, and the comparison should not | |
1164 | * stop at embedded NULs! | |
3ca4086b | 1165 | ^ static int casecmp(CONST chr *, CONST chr *, size_t); |
830efc9b | 1166 | */ |
3ca4086b VS |
1167 | static int /* 0 for equal, nonzero for unequal */ |
1168 | casecmp(x, y, len) | |
1169 | CONST chr *x, *y; /* strings to compare */ | |
1170 | size_t len; /* exact length of comparison */ | |
830efc9b | 1171 | { |
3ca4086b VS |
1172 | for (; len > 0; len--, x++, y++) { |
1173 | if ((*x!=*y) && (Tcl_UniCharToLower(*x) != Tcl_UniCharToLower(*y))) { | |
1174 | return 1; | |
830efc9b | 1175 | } |
3ca4086b VS |
1176 | } |
1177 | return 0; | |
830efc9b | 1178 | } |