fix some 'might not be initialized in this function' warnings
[wxWidgets.git] / src / regex / regc_locale.c
1 /*
2 * regc_locale.c --
3 *
4 * This file contains the Unicode locale specific regexp routines.
5 * This file is #included by regcomp.c.
6 *
7 * Copyright (c) 1998 by Scriptics Corporation.
8 *
9 * See the file "license.terms" for information on usage and redistribution
10 * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
11 *
12 * RCS: @(#) $Id$
13 *
14 * wxWidgets:
15 * The Scriptics license can be found in the file COPYRIGHT. Modifications
16 * for wxWidgets are under the wxWidgets licence, see README for details.
17 */
18
19 #define CONST const
20
21 /* ASCII character-name table */
22
23 static struct cname {
24 chr *name;
25 char code;
26 } cnames[] = {
27 {_T("NUL"), '\0'},
28 {_T("SOH"), '\001'},
29 {_T("STX"), '\002'},
30 {_T("ETX"), '\003'},
31 {_T("EOT"), '\004'},
32 {_T("ENQ"), '\005'},
33 {_T("ACK"), '\006'},
34 {_T("BEL"), '\007'},
35 {_T("alert"), '\007'},
36 {_T("BS"), '\010'},
37 {_T("backspace"), '\b'},
38 {_T("HT"), '\011'},
39 {_T("tab"), '\t'},
40 {_T("LF"), '\012'},
41 {_T("newline"), '\n'},
42 {_T("VT"), '\013'},
43 {_T("vertical-tab"), '\v'},
44 {_T("FF"), '\014'},
45 {_T("form-feed"), '\f'},
46 {_T("CR"), '\015'},
47 {_T("carriage-return"), '\r'},
48 {_T("SO"), '\016'},
49 {_T("SI"), '\017'},
50 {_T("DLE"), '\020'},
51 {_T("DC1"), '\021'},
52 {_T("DC2"), '\022'},
53 {_T("DC3"), '\023'},
54 {_T("DC4"), '\024'},
55 {_T("NAK"), '\025'},
56 {_T("SYN"), '\026'},
57 {_T("ETB"), '\027'},
58 {_T("CAN"), '\030'},
59 {_T("EM"), '\031'},
60 {_T("SUB"), '\032'},
61 {_T("ESC"), '\033'},
62 {_T("IS4"), '\034'},
63 {_T("FS"), '\034'},
64 {_T("IS3"), '\035'},
65 {_T("GS"), '\035'},
66 {_T("IS2"), '\036'},
67 {_T("RS"), '\036'},
68 {_T("IS1"), '\037'},
69 {_T("US"), '\037'},
70 {_T("space"), ' '},
71 {_T("exclamation-mark"), '!'},
72 {_T("quotation-mark"), '"'},
73 {_T("number-sign"), '#'},
74 {_T("dollar-sign"), '$'},
75 {_T("percent-sign"), '%'},
76 {_T("ampersand"), '&'},
77 {_T("apostrophe"), '\''},
78 {_T("left-parenthesis"), '('},
79 {_T("right-parenthesis"), ')'},
80 {_T("asterisk"), '*'},
81 {_T("plus-sign"), '+'},
82 {_T("comma"), ','},
83 {_T("hyphen"), '-'},
84 {_T("hyphen-minus"), '-'},
85 {_T("period"), '.'},
86 {_T("full-stop"), '.'},
87 {_T("slash"), '/'},
88 {_T("solidus"), '/'},
89 {_T("zero"), '0'},
90 {_T("one"), '1'},
91 {_T("two"), '2'},
92 {_T("three"), '3'},
93 {_T("four"), '4'},
94 {_T("five"), '5'},
95 {_T("six"), '6'},
96 {_T("seven"), '7'},
97 {_T("eight"), '8'},
98 {_T("nine"), '9'},
99 {_T("colon"), ':'},
100 {_T("semicolon"), ';'},
101 {_T("less-than-sign"), '<'},
102 {_T("equals-sign"), '='},
103 {_T("greater-than-sign"), '>'},
104 {_T("question-mark"), '?'},
105 {_T("commercial-at"), '@'},
106 {_T("left-square-bracket"), '['},
107 {_T("backslash"), '\\'},
108 {_T("reverse-solidus"), '\\'},
109 {_T("right-square-bracket"), ']'},
110 {_T("circumflex"), '^'},
111 {_T("circumflex-accent"), '^'},
112 {_T("underscore"), '_'},
113 {_T("low-line"), '_'},
114 {_T("grave-accent"), '`'},
115 {_T("left-brace"), '{'},
116 {_T("left-curly-bracket"), '{'},
117 {_T("vertical-line"), '|'},
118 {_T("right-brace"), '}'},
119 {_T("right-curly-bracket"), '}'},
120 {_T("tilde"), '~'},
121 {_T("DEL"), '\177'},
122 {NULL, 0}
123 };
124
125 #if wxUSE_UNICODE
126
127 /* Unicode character-class tables */
128
129 typedef struct crange {
130 chr start;
131 chr end;
132 } crange;
133
134 /*
135 * Declarations of Unicode character ranges. This code
136 * is automatically generated by the tools/uniClass.tcl script
137 * and used in generic/regc_locale.c. Do not modify by hand.
138 */
139
140 /* Unicode: alphabetic characters */
141
142 static crange alphaRangeTable[] = {
143 {0x0041, 0x005a}, {0x0061, 0x007a}, {0x00c0, 0x00d6}, {0x00d8, 0x00f6},
144 {0x00f8, 0x021f}, {0x0222, 0x0233}, {0x0250, 0x02ad}, {0x02b0, 0x02b8},
145 {0x02bb, 0x02c1}, {0x02e0, 0x02e4}, {0x0388, 0x038a}, {0x038e, 0x03a1},
146 {0x03a3, 0x03ce}, {0x03d0, 0x03d7}, {0x03da, 0x03f5}, {0x0400, 0x0481},
147 {0x048c, 0x04c4}, {0x04d0, 0x04f5}, {0x0531, 0x0556}, {0x0561, 0x0587},
148 {0x05d0, 0x05ea}, {0x05f0, 0x05f2}, {0x0621, 0x063a}, {0x0640, 0x064a},
149 {0x0671, 0x06d3}, {0x06fa, 0x06fc}, {0x0712, 0x072c}, {0x0780, 0x07a5},
150 {0x0905, 0x0939}, {0x0958, 0x0961}, {0x0985, 0x098c}, {0x0993, 0x09a8},
151 {0x09aa, 0x09b0}, {0x09b6, 0x09b9}, {0x09df, 0x09e1}, {0x0a05, 0x0a0a},
152 {0x0a13, 0x0a28}, {0x0a2a, 0x0a30}, {0x0a59, 0x0a5c}, {0x0a72, 0x0a74},
153 {0x0a85, 0x0a8b}, {0x0a8f, 0x0a91}, {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0},
154 {0x0ab5, 0x0ab9}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b28}, {0x0b2a, 0x0b30},
155 {0x0b36, 0x0b39}, {0x0b5f, 0x0b61}, {0x0b85, 0x0b8a}, {0x0b8e, 0x0b90},
156 {0x0b92, 0x0b95}, {0x0ba8, 0x0baa}, {0x0bae, 0x0bb5}, {0x0bb7, 0x0bb9},
157 {0x0c05, 0x0c0c}, {0x0c0e, 0x0c10}, {0x0c12, 0x0c28}, {0x0c2a, 0x0c33},
158 {0x0c35, 0x0c39}, {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90}, {0x0c92, 0x0ca8},
159 {0x0caa, 0x0cb3}, {0x0cb5, 0x0cb9}, {0x0d05, 0x0d0c}, {0x0d0e, 0x0d10},
160 {0x0d12, 0x0d28}, {0x0d2a, 0x0d39}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1},
161 {0x0db3, 0x0dbb}, {0x0dc0, 0x0dc6}, {0x0e01, 0x0e30}, {0x0e40, 0x0e46},
162 {0x0e94, 0x0e97}, {0x0e99, 0x0e9f}, {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb0},
163 {0x0ec0, 0x0ec4}, {0x0f40, 0x0f47}, {0x0f49, 0x0f6a}, {0x0f88, 0x0f8b},
164 {0x1000, 0x1021}, {0x1023, 0x1027}, {0x1050, 0x1055}, {0x10a0, 0x10c5},
165 {0x10d0, 0x10f6}, {0x1100, 0x1159}, {0x115f, 0x11a2}, {0x11a8, 0x11f9},
166 {0x1200, 0x1206}, {0x1208, 0x1246}, {0x124a, 0x124d}, {0x1250, 0x1256},
167 {0x125a, 0x125d}, {0x1260, 0x1286}, {0x128a, 0x128d}, {0x1290, 0x12ae},
168 {0x12b2, 0x12b5}, {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12ce},
169 {0x12d0, 0x12d6}, {0x12d8, 0x12ee}, {0x12f0, 0x130e}, {0x1312, 0x1315},
170 {0x1318, 0x131e}, {0x1320, 0x1346}, {0x1348, 0x135a}, {0x13a0, 0x13f4},
171 {0x1401, 0x166c}, {0x166f, 0x1676}, {0x1681, 0x169a}, {0x16a0, 0x16ea},
172 {0x1780, 0x17b3}, {0x1820, 0x1877}, {0x1880, 0x18a8}, {0x1e00, 0x1e9b},
173 {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45},
174 {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4},
175 {0x1fb6, 0x1fbc}, {0x1fc2, 0x1fc4}, {0x1fc6, 0x1fcc}, {0x1fd0, 0x1fd3},
176 {0x1fd6, 0x1fdb}, {0x1fe0, 0x1fec}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffc},
177 {0x210a, 0x2113}, {0x2119, 0x211d}, {0x212a, 0x212d}, {0x212f, 0x2131},
178 {0x2133, 0x2139}, {0x3031, 0x3035}, {0x3041, 0x3094}, {0x30a1, 0x30fa},
179 {0x30fc, 0x30fe}, {0x3105, 0x312c}, {0x3131, 0x318e}, {0x31a0, 0x31b7},
180 {0x3400, 0x4db5}, {0x4e00, 0x9fa5}, {0xa000, 0xa48c}, {0xac00, 0xd7a3},
181 {0xf900, 0xfa2d}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1f, 0xfb28},
182 {0xfb2a, 0xfb36}, {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfd3d},
183 {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb}, {0xfe70, 0xfe72},
184 {0xfe76, 0xfefc}, {0xff21, 0xff3a}, {0xff41, 0xff5a}, {0xff66, 0xffbe},
185 {0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7}, {0xffda, 0xffdc}
186 };
187
188 #define NUM_ALPHA_RANGE (sizeof(alphaRangeTable)/sizeof(crange))
189
190 static chr alphaCharTable[] = {
191 0x00aa, 0x00b5, 0x00ba, 0x02d0, 0x02d1, 0x02ee, 0x037a, 0x0386, 0x038c,
192 0x04c7, 0x04c8, 0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0559, 0x06d5, 0x06e5,
193 0x06e6, 0x0710, 0x093d, 0x0950, 0x098f, 0x0990, 0x09b2, 0x09dc, 0x09dd,
194 0x09f0, 0x09f1, 0x0a0f, 0x0a10, 0x0a32, 0x0a33, 0x0a35, 0x0a36, 0x0a38,
195 0x0a39, 0x0a5e, 0x0a8d, 0x0ab2, 0x0ab3, 0x0abd, 0x0ad0, 0x0ae0, 0x0b0f,
196 0x0b10, 0x0b32, 0x0b33, 0x0b3d, 0x0b5c, 0x0b5d, 0x0b99, 0x0b9a, 0x0b9c,
197 0x0b9e, 0x0b9f, 0x0ba3, 0x0ba4, 0x0c60, 0x0c61, 0x0cde, 0x0ce0, 0x0ce1,
198 0x0d60, 0x0d61, 0x0dbd, 0x0e32, 0x0e33, 0x0e81, 0x0e82, 0x0e84, 0x0e87,
199 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0eb2, 0x0eb3,
200 0x0ebd, 0x0ec6, 0x0edc, 0x0edd, 0x0f00, 0x1029, 0x102a, 0x1248, 0x1258,
201 0x1288, 0x12b0, 0x12c0, 0x1310, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe, 0x207f,
202 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x3005, 0x3006, 0x309d,
203 0x309e, 0xfb1d, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfe74, 0xfffe
204 };
205
206 #define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr))
207
208 /* Unicode: decimal digit characters */
209
210 static crange digitRangeTable[] = {
211 {0x0030, 0x0039}, {0x0660, 0x0669}, {0x06f0, 0x06f9}, {0x0966, 0x096f},
212 {0x09e6, 0x09ef}, {0x0a66, 0x0a6f}, {0x0ae6, 0x0aef}, {0x0b66, 0x0b6f},
213 {0x0be7, 0x0bef}, {0x0c66, 0x0c6f}, {0x0ce6, 0x0cef}, {0x0d66, 0x0d6f},
214 {0x0e50, 0x0e59}, {0x0ed0, 0x0ed9}, {0x0f20, 0x0f29}, {0x1040, 0x1049},
215 {0x1369, 0x1371}, {0x17e0, 0x17e9}, {0x1810, 0x1819}, {0xff10, 0xff19}
216 };
217
218 #define NUM_DIGIT_RANGE (sizeof(digitRangeTable)/sizeof(crange))
219
220 /* no singletons of digit characters */
221
222 /* Unicode: punctuation characters */
223
224 static crange punctRangeTable[] = {
225 {0x0021, 0x0023}, {0x0025, 0x002a}, {0x002c, 0x002f}, {0x005b, 0x005d},
226 {0x055a, 0x055f}, {0x066a, 0x066d}, {0x0700, 0x070d}, {0x0f04, 0x0f12},
227 {0x0f3a, 0x0f3d}, {0x104a, 0x104f}, {0x1361, 0x1368}, {0x16eb, 0x16ed},
228 {0x17d4, 0x17da}, {0x1800, 0x180a}, {0x2010, 0x2027}, {0x2030, 0x2043},
229 {0x2048, 0x204d}, {0x3001, 0x3003}, {0x3008, 0x3011}, {0x3014, 0x301f},
230 {0xfe30, 0xfe44}, {0xfe49, 0xfe52}, {0xfe54, 0xfe61}, {0xff01, 0xff03},
231 {0xff05, 0xff0a}, {0xff0c, 0xff0f}, {0xff3b, 0xff3d}, {0xff61, 0xff65}
232 };
233
234 #define NUM_PUNCT_RANGE (sizeof(punctRangeTable)/sizeof(crange))
235
236 static chr punctCharTable[] = {
237 0x003a, 0x003b, 0x003f, 0x0040, 0x005f, 0x007b, 0x007d, 0x00a1, 0x00ab,
238 0x00ad, 0x00b7, 0x00bb, 0x00bf, 0x037e, 0x0387, 0x0589, 0x058a, 0x05be,
239 0x05c0, 0x05c3, 0x05f3, 0x05f4, 0x060c, 0x061b, 0x061f, 0x06d4, 0x0964,
240 0x0965, 0x0970, 0x0df4, 0x0e4f, 0x0e5a, 0x0e5b, 0x0f85, 0x10fb, 0x166d,
241 0x166e, 0x169b, 0x169c, 0x17dc, 0x2045, 0x2046, 0x207d, 0x207e, 0x208d,
242 0x208e, 0x2329, 0x232a, 0x3030, 0x30fb, 0xfd3e, 0xfd3f, 0xfe63, 0xfe68,
243 0xfe6a, 0xfe6b, 0xff1a, 0xff1b, 0xff1f, 0xff20, 0xff3f, 0xff5b, 0xff5d
244 };
245
246 #define NUM_PUNCT_CHAR (sizeof(punctCharTable)/sizeof(chr))
247
248 /* Unicode: white space characters */
249
250 static crange spaceRangeTable[] = {
251 {0x0009, 0x000d}, {0x2000, 0x200b}
252 };
253
254 #define NUM_SPACE_RANGE (sizeof(spaceRangeTable)/sizeof(crange))
255
256 static chr spaceCharTable[] = {
257 0x0020, 0x00a0, 0x1680, 0x2028, 0x2029, 0x202f, 0x3000
258 };
259
260 #define NUM_SPACE_CHAR (sizeof(spaceCharTable)/sizeof(chr))
261
262 /* Unicode: lowercase characters */
263
264 static crange lowerRangeTable[] = {
265 {0x0061, 0x007a}, {0x00df, 0x00f6}, {0x00f8, 0x00ff}, {0x017e, 0x0180},
266 {0x0199, 0x019b}, {0x01bd, 0x01bf}, {0x0250, 0x02ad}, {0x03ac, 0x03ce},
267 {0x03d5, 0x03d7}, {0x03ef, 0x03f3}, {0x0430, 0x045f}, {0x0561, 0x0587},
268 {0x1e95, 0x1e9b}, {0x1f00, 0x1f07}, {0x1f10, 0x1f15}, {0x1f20, 0x1f27},
269 {0x1f30, 0x1f37}, {0x1f40, 0x1f45}, {0x1f50, 0x1f57}, {0x1f60, 0x1f67},
270 {0x1f70, 0x1f7d}, {0x1f80, 0x1f87}, {0x1f90, 0x1f97}, {0x1fa0, 0x1fa7},
271 {0x1fb0, 0x1fb4}, {0x1fc2, 0x1fc4}, {0x1fd0, 0x1fd3}, {0x1fe0, 0x1fe7},
272 {0x1ff2, 0x1ff4}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xff41, 0xff5a}
273 };
274
275 #define NUM_LOWER_RANGE (sizeof(lowerRangeTable)/sizeof(crange))
276
277 static chr lowerCharTable[] = {
278 0x00aa, 0x00b5, 0x00ba, 0x0101, 0x0103, 0x0105, 0x0107, 0x0109, 0x010b,
279 0x010d, 0x010f, 0x0111, 0x0113, 0x0115, 0x0117, 0x0119, 0x011b, 0x011d,
280 0x011f, 0x0121, 0x0123, 0x0125, 0x0127, 0x0129, 0x012b, 0x012d, 0x012f,
281 0x0131, 0x0133, 0x0135, 0x0137, 0x0138, 0x013a, 0x013c, 0x013e, 0x0140,
282 0x0142, 0x0144, 0x0146, 0x0148, 0x0149, 0x014b, 0x014d, 0x014f, 0x0151,
283 0x0153, 0x0155, 0x0157, 0x0159, 0x015b, 0x015d, 0x015f, 0x0161, 0x0163,
284 0x0165, 0x0167, 0x0169, 0x016b, 0x016d, 0x016f, 0x0171, 0x0173, 0x0175,
285 0x0177, 0x017a, 0x017c, 0x0183, 0x0185, 0x0188, 0x018c, 0x018d, 0x0192,
286 0x0195, 0x019e, 0x01a1, 0x01a3, 0x01a5, 0x01a8, 0x01aa, 0x01ab, 0x01ad,
287 0x01b0, 0x01b4, 0x01b6, 0x01b9, 0x01ba, 0x01c6, 0x01c9, 0x01cc, 0x01ce,
288 0x01d0, 0x01d2, 0x01d4, 0x01d6, 0x01d8, 0x01da, 0x01dc, 0x01dd, 0x01df,
289 0x01e1, 0x01e3, 0x01e5, 0x01e7, 0x01e9, 0x01eb, 0x01ed, 0x01ef, 0x01f0,
290 0x01f3, 0x01f5, 0x01f9, 0x01fb, 0x01fd, 0x01ff, 0x0201, 0x0203, 0x0205,
291 0x0207, 0x0209, 0x020b, 0x020d, 0x020f, 0x0211, 0x0213, 0x0215, 0x0217,
292 0x0219, 0x021b, 0x021d, 0x021f, 0x0223, 0x0225, 0x0227, 0x0229, 0x022b,
293 0x022d, 0x022f, 0x0231, 0x0233, 0x0390, 0x03d0, 0x03d1, 0x03db, 0x03dd,
294 0x03df, 0x03e1, 0x03e3, 0x03e5, 0x03e7, 0x03e9, 0x03eb, 0x03ed, 0x03f5,
295 0x0461, 0x0463, 0x0465, 0x0467, 0x0469, 0x046b, 0x046d, 0x046f, 0x0471,
296 0x0473, 0x0475, 0x0477, 0x0479, 0x047b, 0x047d, 0x047f, 0x0481, 0x048d,
297 0x048f, 0x0491, 0x0493, 0x0495, 0x0497, 0x0499, 0x049b, 0x049d, 0x049f,
298 0x04a1, 0x04a3, 0x04a5, 0x04a7, 0x04a9, 0x04ab, 0x04ad, 0x04af, 0x04b1,
299 0x04b3, 0x04b5, 0x04b7, 0x04b9, 0x04bb, 0x04bd, 0x04bf, 0x04c2, 0x04c4,
300 0x04c8, 0x04cc, 0x04d1, 0x04d3, 0x04d5, 0x04d7, 0x04d9, 0x04db, 0x04dd,
301 0x04df, 0x04e1, 0x04e3, 0x04e5, 0x04e7, 0x04e9, 0x04eb, 0x04ed, 0x04ef,
302 0x04f1, 0x04f3, 0x04f5, 0x04f9, 0x1e01, 0x1e03, 0x1e05, 0x1e07, 0x1e09,
303 0x1e0b, 0x1e0d, 0x1e0f, 0x1e11, 0x1e13, 0x1e15, 0x1e17, 0x1e19, 0x1e1b,
304 0x1e1d, 0x1e1f, 0x1e21, 0x1e23, 0x1e25, 0x1e27, 0x1e29, 0x1e2b, 0x1e2d,
305 0x1e2f, 0x1e31, 0x1e33, 0x1e35, 0x1e37, 0x1e39, 0x1e3b, 0x1e3d, 0x1e3f,
306 0x1e41, 0x1e43, 0x1e45, 0x1e47, 0x1e49, 0x1e4b, 0x1e4d, 0x1e4f, 0x1e51,
307 0x1e53, 0x1e55, 0x1e57, 0x1e59, 0x1e5b, 0x1e5d, 0x1e5f, 0x1e61, 0x1e63,
308 0x1e65, 0x1e67, 0x1e69, 0x1e6b, 0x1e6d, 0x1e6f, 0x1e71, 0x1e73, 0x1e75,
309 0x1e77, 0x1e79, 0x1e7b, 0x1e7d, 0x1e7f, 0x1e81, 0x1e83, 0x1e85, 0x1e87,
310 0x1e89, 0x1e8b, 0x1e8d, 0x1e8f, 0x1e91, 0x1e93, 0x1ea1, 0x1ea3, 0x1ea5,
311 0x1ea7, 0x1ea9, 0x1eab, 0x1ead, 0x1eaf, 0x1eb1, 0x1eb3, 0x1eb5, 0x1eb7,
312 0x1eb9, 0x1ebb, 0x1ebd, 0x1ebf, 0x1ec1, 0x1ec3, 0x1ec5, 0x1ec7, 0x1ec9,
313 0x1ecb, 0x1ecd, 0x1ecf, 0x1ed1, 0x1ed3, 0x1ed5, 0x1ed7, 0x1ed9, 0x1edb,
314 0x1edd, 0x1edf, 0x1ee1, 0x1ee3, 0x1ee5, 0x1ee7, 0x1ee9, 0x1eeb, 0x1eed,
315 0x1eef, 0x1ef1, 0x1ef3, 0x1ef5, 0x1ef7, 0x1ef9, 0x1fb6, 0x1fb7, 0x1fbe,
316 0x1fc6, 0x1fc7, 0x1fd6, 0x1fd7, 0x1ff6, 0x1ff7, 0x207f, 0x210a, 0x210e,
317 0x210f, 0x2113, 0x212f, 0x2134, 0x2139
318 };
319
320 #define NUM_LOWER_CHAR (sizeof(lowerCharTable)/sizeof(chr))
321
322 /* Unicode: uppercase characters */
323
324 static crange upperRangeTable[] = {
325 {0x0041, 0x005a}, {0x00c0, 0x00d6}, {0x00d8, 0x00de}, {0x0189, 0x018b},
326 {0x018e, 0x0191}, {0x0196, 0x0198}, {0x01b1, 0x01b3}, {0x01f6, 0x01f8},
327 {0x0388, 0x038a}, {0x0391, 0x03a1}, {0x03a3, 0x03ab}, {0x03d2, 0x03d4},
328 {0x0400, 0x042f}, {0x0531, 0x0556}, {0x10a0, 0x10c5}, {0x1f08, 0x1f0f},
329 {0x1f18, 0x1f1d}, {0x1f28, 0x1f2f}, {0x1f38, 0x1f3f}, {0x1f48, 0x1f4d},
330 {0x1f68, 0x1f6f}, {0x1fb8, 0x1fbb}, {0x1fc8, 0x1fcb}, {0x1fd8, 0x1fdb},
331 {0x1fe8, 0x1fec}, {0x1ff8, 0x1ffb}, {0x210b, 0x210d}, {0x2110, 0x2112},
332 {0x2119, 0x211d}, {0x212a, 0x212d}, {0xff21, 0xff3a}
333 };
334
335 #define NUM_UPPER_RANGE (sizeof(upperRangeTable)/sizeof(crange))
336
337 static chr upperCharTable[] = {
338 0x0100, 0x0102, 0x0104, 0x0106, 0x0108, 0x010a, 0x010c, 0x010e, 0x0110,
339 0x0112, 0x0114, 0x0116, 0x0118, 0x011a, 0x011c, 0x011e, 0x0120, 0x0122,
340 0x0124, 0x0126, 0x0128, 0x012a, 0x012c, 0x012e, 0x0130, 0x0132, 0x0134,
341 0x0136, 0x0139, 0x013b, 0x013d, 0x013f, 0x0141, 0x0143, 0x0145, 0x0147,
342 0x014a, 0x014c, 0x014e, 0x0150, 0x0152, 0x0154, 0x0156, 0x0158, 0x015a,
343 0x015c, 0x015e, 0x0160, 0x0162, 0x0164, 0x0166, 0x0168, 0x016a, 0x016c,
344 0x016e, 0x0170, 0x0172, 0x0174, 0x0176, 0x0178, 0x0179, 0x017b, 0x017d,
345 0x0181, 0x0182, 0x0184, 0x0186, 0x0187, 0x0193, 0x0194, 0x019c, 0x019d,
346 0x019f, 0x01a0, 0x01a2, 0x01a4, 0x01a6, 0x01a7, 0x01a9, 0x01ac, 0x01ae,
347 0x01af, 0x01b5, 0x01b7, 0x01b8, 0x01bc, 0x01c4, 0x01c7, 0x01ca, 0x01cd,
348 0x01cf, 0x01d1, 0x01d3, 0x01d5, 0x01d7, 0x01d9, 0x01db, 0x01de, 0x01e0,
349 0x01e2, 0x01e4, 0x01e6, 0x01e8, 0x01ea, 0x01ec, 0x01ee, 0x01f1, 0x01f4,
350 0x01fa, 0x01fc, 0x01fe, 0x0200, 0x0202, 0x0204, 0x0206, 0x0208, 0x020a,
351 0x020c, 0x020e, 0x0210, 0x0212, 0x0214, 0x0216, 0x0218, 0x021a, 0x021c,
352 0x021e, 0x0222, 0x0224, 0x0226, 0x0228, 0x022a, 0x022c, 0x022e, 0x0230,
353 0x0232, 0x0386, 0x038c, 0x038e, 0x038f, 0x03da, 0x03dc, 0x03de, 0x03e0,
354 0x03e2, 0x03e4, 0x03e6, 0x03e8, 0x03ea, 0x03ec, 0x03ee, 0x03f4, 0x0460,
355 0x0462, 0x0464, 0x0466, 0x0468, 0x046a, 0x046c, 0x046e, 0x0470, 0x0472,
356 0x0474, 0x0476, 0x0478, 0x047a, 0x047c, 0x047e, 0x0480, 0x048c, 0x048e,
357 0x0490, 0x0492, 0x0494, 0x0496, 0x0498, 0x049a, 0x049c, 0x049e, 0x04a0,
358 0x04a2, 0x04a4, 0x04a6, 0x04a8, 0x04aa, 0x04ac, 0x04ae, 0x04b0, 0x04b2,
359 0x04b4, 0x04b6, 0x04b8, 0x04ba, 0x04bc, 0x04be, 0x04c0, 0x04c1, 0x04c3,
360 0x04c7, 0x04cb, 0x04d0, 0x04d2, 0x04d4, 0x04d6, 0x04d8, 0x04da, 0x04dc,
361 0x04de, 0x04e0, 0x04e2, 0x04e4, 0x04e6, 0x04e8, 0x04ea, 0x04ec, 0x04ee,
362 0x04f0, 0x04f2, 0x04f4, 0x04f8, 0x1e00, 0x1e02, 0x1e04, 0x1e06, 0x1e08,
363 0x1e0a, 0x1e0c, 0x1e0e, 0x1e10, 0x1e12, 0x1e14, 0x1e16, 0x1e18, 0x1e1a,
364 0x1e1c, 0x1e1e, 0x1e20, 0x1e22, 0x1e24, 0x1e26, 0x1e28, 0x1e2a, 0x1e2c,
365 0x1e2e, 0x1e30, 0x1e32, 0x1e34, 0x1e36, 0x1e38, 0x1e3a, 0x1e3c, 0x1e3e,
366 0x1e40, 0x1e42, 0x1e44, 0x1e46, 0x1e48, 0x1e4a, 0x1e4c, 0x1e4e, 0x1e50,
367 0x1e52, 0x1e54, 0x1e56, 0x1e58, 0x1e5a, 0x1e5c, 0x1e5e, 0x1e60, 0x1e62,
368 0x1e64, 0x1e66, 0x1e68, 0x1e6a, 0x1e6c, 0x1e6e, 0x1e70, 0x1e72, 0x1e74,
369 0x1e76, 0x1e78, 0x1e7a, 0x1e7c, 0x1e7e, 0x1e80, 0x1e82, 0x1e84, 0x1e86,
370 0x1e88, 0x1e8a, 0x1e8c, 0x1e8e, 0x1e90, 0x1e92, 0x1e94, 0x1ea0, 0x1ea2,
371 0x1ea4, 0x1ea6, 0x1ea8, 0x1eaa, 0x1eac, 0x1eae, 0x1eb0, 0x1eb2, 0x1eb4,
372 0x1eb6, 0x1eb8, 0x1eba, 0x1ebc, 0x1ebe, 0x1ec0, 0x1ec2, 0x1ec4, 0x1ec6,
373 0x1ec8, 0x1eca, 0x1ecc, 0x1ece, 0x1ed0, 0x1ed2, 0x1ed4, 0x1ed6, 0x1ed8,
374 0x1eda, 0x1edc, 0x1ede, 0x1ee0, 0x1ee2, 0x1ee4, 0x1ee6, 0x1ee8, 0x1eea,
375 0x1eec, 0x1eee, 0x1ef0, 0x1ef2, 0x1ef4, 0x1ef6, 0x1ef8, 0x1f59, 0x1f5b,
376 0x1f5d, 0x1f5f, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x2130,
377 0x2131, 0x2133
378 };
379
380 #define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr))
381
382 /* Unicode: unicode print characters excluding space */
383
384 static crange graphRangeTable[] = {
385 {0x0021, 0x007e}, {0x00a0, 0x011f}, {0x0121, 0x021f}, {0x0222, 0x0233},
386 {0x0250, 0x02ad}, {0x02b0, 0x02ee}, {0x0300, 0x031f}, {0x0321, 0x034e},
387 {0x0360, 0x0362}, {0x0384, 0x038a}, {0x038e, 0x03a1}, {0x03a3, 0x03ce},
388 {0x03d0, 0x03d7}, {0x03da, 0x03f5}, {0x0400, 0x041f}, {0x0421, 0x0486},
389 {0x048c, 0x04c4}, {0x04d0, 0x04f5}, {0x0531, 0x0556}, {0x0559, 0x055f},
390 {0x0561, 0x0587}, {0x0591, 0x05a1}, {0x05a3, 0x05b9}, {0x05bb, 0x05c4},
391 {0x05d0, 0x05ea}, {0x05f0, 0x05f4}, {0x0621, 0x063a}, {0x0640, 0x0655},
392 {0x0660, 0x066d}, {0x0670, 0x06ed}, {0x06f0, 0x06fe}, {0x0700, 0x070d},
393 {0x0710, 0x071f}, {0x0721, 0x072c}, {0x0730, 0x074a}, {0x0780, 0x07b0},
394 {0x0901, 0x0903}, {0x0905, 0x091f}, {0x0921, 0x0939}, {0x093c, 0x094d},
395 {0x0950, 0x0954}, {0x0958, 0x0970}, {0x0981, 0x0983}, {0x0985, 0x098c},
396 {0x0993, 0x09a8}, {0x09aa, 0x09b0}, {0x09b6, 0x09b9}, {0x09be, 0x09c4},
397 {0x09cb, 0x09cd}, {0x09df, 0x09e3}, {0x09e6, 0x09fa}, {0x0a05, 0x0a0a},
398 {0x0a13, 0x0a1f}, {0x0a21, 0x0a28}, {0x0a2a, 0x0a30}, {0x0a3e, 0x0a42},
399 {0x0a4b, 0x0a4d}, {0x0a59, 0x0a5c}, {0x0a66, 0x0a74}, {0x0a81, 0x0a83},
400 {0x0a85, 0x0a8b}, {0x0a8f, 0x0a91}, {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0},
401 {0x0ab5, 0x0ab9}, {0x0abc, 0x0ac5}, {0x0ac7, 0x0ac9}, {0x0acb, 0x0acd},
402 {0x0ae6, 0x0aef}, {0x0b01, 0x0b03}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b1f},
403 {0x0b21, 0x0b28}, {0x0b2a, 0x0b30}, {0x0b36, 0x0b39}, {0x0b3c, 0x0b43},
404 {0x0b4b, 0x0b4d}, {0x0b5f, 0x0b61}, {0x0b66, 0x0b70}, {0x0b85, 0x0b8a},
405 {0x0b8e, 0x0b90}, {0x0b92, 0x0b95}, {0x0ba8, 0x0baa}, {0x0bae, 0x0bb5},
406 {0x0bb7, 0x0bb9}, {0x0bbe, 0x0bc2}, {0x0bc6, 0x0bc8}, {0x0bca, 0x0bcd},
407 {0x0be7, 0x0bf2}, {0x0c01, 0x0c03}, {0x0c05, 0x0c0c}, {0x0c0e, 0x0c10},
408 {0x0c12, 0x0c1f}, {0x0c21, 0x0c28}, {0x0c2a, 0x0c33}, {0x0c35, 0x0c39},
409 {0x0c3e, 0x0c44}, {0x0c46, 0x0c48}, {0x0c4a, 0x0c4d}, {0x0c66, 0x0c6f},
410 {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90}, {0x0c92, 0x0ca8}, {0x0caa, 0x0cb3},
411 {0x0cb5, 0x0cb9}, {0x0cbe, 0x0cc4}, {0x0cc6, 0x0cc8}, {0x0cca, 0x0ccd},
412 {0x0ce6, 0x0cef}, {0x0d05, 0x0d0c}, {0x0d0e, 0x0d10}, {0x0d12, 0x0d1f},
413 {0x0d21, 0x0d28}, {0x0d2a, 0x0d39}, {0x0d3e, 0x0d43}, {0x0d46, 0x0d48},
414 {0x0d4a, 0x0d4d}, {0x0d66, 0x0d6f}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1},
415 {0x0db3, 0x0dbb}, {0x0dc0, 0x0dc6}, {0x0dcf, 0x0dd4}, {0x0dd8, 0x0ddf},
416 {0x0df2, 0x0df4}, {0x0e01, 0x0e1f}, {0x0e21, 0x0e3a}, {0x0e3f, 0x0e5b},
417 {0x0e94, 0x0e97}, {0x0e99, 0x0e9f}, {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb9},
418 {0x0ebb, 0x0ebd}, {0x0ec0, 0x0ec4}, {0x0ec8, 0x0ecd}, {0x0ed0, 0x0ed9},
419 {0x0f00, 0x0f1f}, {0x0f21, 0x0f47}, {0x0f49, 0x0f6a}, {0x0f71, 0x0f8b},
420 {0x0f90, 0x0f97}, {0x0f99, 0x0fbc}, {0x0fbe, 0x0fcc}, {0x1000, 0x101f},
421 {0x1023, 0x1027}, {0x102c, 0x1032}, {0x1036, 0x1039}, {0x1040, 0x1059},
422 {0x10a0, 0x10c5}, {0x10d0, 0x10f6}, {0x1100, 0x111f}, {0x1121, 0x1159},
423 {0x115f, 0x11a2}, {0x11a8, 0x11f9}, {0x1200, 0x1206}, {0x1208, 0x121f},
424 {0x1221, 0x1246}, {0x124a, 0x124d}, {0x1250, 0x1256}, {0x125a, 0x125d},
425 {0x1260, 0x1286}, {0x128a, 0x128d}, {0x1290, 0x12ae}, {0x12b2, 0x12b5},
426 {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12ce}, {0x12d0, 0x12d6},
427 {0x12d8, 0x12ee}, {0x12f0, 0x130e}, {0x1312, 0x1315}, {0x1318, 0x131e},
428 {0x1321, 0x1346}, {0x1348, 0x135a}, {0x1361, 0x137c}, {0x13a0, 0x13f4},
429 {0x1401, 0x141f}, {0x1421, 0x151f}, {0x1521, 0x161f}, {0x1621, 0x1676},
430 {0x1680, 0x169c}, {0x16a0, 0x16f0}, {0x1780, 0x17dc}, {0x17e0, 0x17e9},
431 {0x1800, 0x180a}, {0x1810, 0x1819}, {0x1821, 0x1877}, {0x1880, 0x18a9},
432 {0x1e00, 0x1e1f}, {0x1e21, 0x1e9b}, {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15},
433 {0x1f18, 0x1f1d}, {0x1f21, 0x1f45}, {0x1f48, 0x1f4d}, {0x1f50, 0x1f57},
434 {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fc4}, {0x1fc6, 0x1fd3},
435 {0x1fd6, 0x1fdb}, {0x1fdd, 0x1fef}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffe},
436 {0x2000, 0x200b}, {0x2010, 0x201f}, {0x2021, 0x2029}, {0x202f, 0x2046},
437 {0x2048, 0x204d}, {0x2074, 0x208e}, {0x20a0, 0x20af}, {0x20d0, 0x20e3},
438 {0x2100, 0x211f}, {0x2121, 0x213a}, {0x2153, 0x2183}, {0x2190, 0x21f3},
439 {0x2200, 0x221f}, {0x2221, 0x22f1}, {0x2300, 0x231f}, {0x2321, 0x237b},
440 {0x237d, 0x239a}, {0x2400, 0x241f}, {0x2421, 0x2426}, {0x2440, 0x244a},
441 {0x2460, 0x24ea}, {0x2500, 0x251f}, {0x2521, 0x2595}, {0x25a0, 0x25f7},
442 {0x2600, 0x2613}, {0x2619, 0x261f}, {0x2621, 0x2671}, {0x2701, 0x2704},
443 {0x2706, 0x2709}, {0x270c, 0x271f}, {0x2721, 0x2727}, {0x2729, 0x274b},
444 {0x274f, 0x2752}, {0x2758, 0x275e}, {0x2761, 0x2767}, {0x2776, 0x2794},
445 {0x2798, 0x27af}, {0x27b1, 0x27be}, {0x2800, 0x281f}, {0x2821, 0x28ff},
446 {0x2e80, 0x2e99}, {0x2e9b, 0x2ef3}, {0x2f00, 0x2f1f}, {0x2f21, 0x2fd5},
447 {0x2ff0, 0x2ffb}, {0x3000, 0x301f}, {0x3021, 0x303a}, {0x3041, 0x3094},
448 {0x3099, 0x309e}, {0x30a1, 0x30fe}, {0x3105, 0x311f}, {0x3121, 0x312c},
449 {0x3131, 0x318e}, {0x3190, 0x31b7}, {0x3200, 0x321c}, {0x3221, 0x3243},
450 {0x3260, 0x327b}, {0x327f, 0x32b0}, {0x32c0, 0x32cb}, {0x32d0, 0x32fe},
451 {0x3300, 0x331f}, {0x3321, 0x3376}, {0x337b, 0x33dd}, {0x33e0, 0x33fe},
452 {0x3400, 0x341f}, {0x3421, 0x351f}, {0x3521, 0x361f}, {0x3621, 0x371f},
453 {0x3721, 0x381f}, {0x3821, 0x391f}, {0x3921, 0x3a1f}, {0x3a21, 0x3b1f},
454 {0x3b21, 0x3c1f}, {0x3c21, 0x3d1f}, {0x3d21, 0x3e1f}, {0x3e21, 0x3f1f},
455 {0x3f21, 0x401f}, {0x4021, 0x411f}, {0x4121, 0x421f}, {0x4221, 0x431f},
456 {0x4321, 0x441f}, {0x4421, 0x451f}, {0x4521, 0x461f}, {0x4621, 0x471f},
457 {0x4721, 0x481f}, {0x4821, 0x491f}, {0x4921, 0x4a1f}, {0x4a21, 0x4b1f},
458 {0x4b21, 0x4c1f}, {0x4c21, 0x4d1f}, {0x4d21, 0x4db5}, {0x4e00, 0x4e1f},
459 {0x4e21, 0x4f1f}, {0x4f21, 0x501f}, {0x5021, 0x511f}, {0x5121, 0x521f},
460 {0x5221, 0x531f}, {0x5321, 0x541f}, {0x5421, 0x551f}, {0x5521, 0x561f},
461 {0x5621, 0x571f}, {0x5721, 0x581f}, {0x5821, 0x591f}, {0x5921, 0x5a1f},
462 {0x5a21, 0x5b1f}, {0x5b21, 0x5c1f}, {0x5c21, 0x5d1f}, {0x5d21, 0x5e1f},
463 {0x5e21, 0x5f1f}, {0x5f21, 0x601f}, {0x6021, 0x611f}, {0x6121, 0x621f},
464 {0x6221, 0x631f}, {0x6321, 0x641f}, {0x6421, 0x651f}, {0x6521, 0x661f},
465 {0x6621, 0x671f}, {0x6721, 0x681f}, {0x6821, 0x691f}, {0x6921, 0x6a1f},
466 {0x6a21, 0x6b1f}, {0x6b21, 0x6c1f}, {0x6c21, 0x6d1f}, {0x6d21, 0x6e1f},
467 {0x6e21, 0x6f1f}, {0x6f21, 0x701f}, {0x7021, 0x711f}, {0x7121, 0x721f},
468 {0x7221, 0x731f}, {0x7321, 0x741f}, {0x7421, 0x751f}, {0x7521, 0x761f},
469 {0x7621, 0x771f}, {0x7721, 0x781f}, {0x7821, 0x791f}, {0x7921, 0x7a1f},
470 {0x7a21, 0x7b1f}, {0x7b21, 0x7c1f}, {0x7c21, 0x7d1f}, {0x7d21, 0x7e1f},
471 {0x7e21, 0x7f1f}, {0x7f21, 0x801f}, {0x8021, 0x811f}, {0x8121, 0x821f},
472 {0x8221, 0x831f}, {0x8321, 0x841f}, {0x8421, 0x851f}, {0x8521, 0x861f},
473 {0x8621, 0x871f}, {0x8721, 0x881f}, {0x8821, 0x891f}, {0x8921, 0x8a1f},
474 {0x8a21, 0x8b1f}, {0x8b21, 0x8c1f}, {0x8c21, 0x8d1f}, {0x8d21, 0x8e1f},
475 {0x8e21, 0x8f1f}, {0x8f21, 0x901f}, {0x9021, 0x911f}, {0x9121, 0x921f},
476 {0x9221, 0x931f}, {0x9321, 0x941f}, {0x9421, 0x951f}, {0x9521, 0x961f},
477 {0x9621, 0x971f}, {0x9721, 0x981f}, {0x9821, 0x991f}, {0x9921, 0x9a1f},
478 {0x9a21, 0x9b1f}, {0x9b21, 0x9c1f}, {0x9c21, 0x9d1f}, {0x9d21, 0x9e1f},
479 {0x9e21, 0x9f1f}, {0x9f21, 0x9fa5}, {0xa000, 0xa01f}, {0xa021, 0xa11f},
480 {0xa121, 0xa21f}, {0xa221, 0xa31f}, {0xa321, 0xa41f}, {0xa421, 0xa48c},
481 {0xa490, 0xa4a1}, {0xa4a4, 0xa4b3}, {0xa4b5, 0xa4c0}, {0xa4c2, 0xa4c4},
482 {0xac00, 0xac1f}, {0xac21, 0xad1f}, {0xad21, 0xae1f}, {0xae21, 0xaf1f},
483 {0xaf21, 0xb01f}, {0xb021, 0xb11f}, {0xb121, 0xb21f}, {0xb221, 0xb31f},
484 {0xb321, 0xb41f}, {0xb421, 0xb51f}, {0xb521, 0xb61f}, {0xb621, 0xb71f},
485 {0xb721, 0xb81f}, {0xb821, 0xb91f}, {0xb921, 0xba1f}, {0xba21, 0xbb1f},
486 {0xbb21, 0xbc1f}, {0xbc21, 0xbd1f}, {0xbd21, 0xbe1f}, {0xbe21, 0xbf1f},
487 {0xbf21, 0xc01f}, {0xc021, 0xc11f}, {0xc121, 0xc21f}, {0xc221, 0xc31f},
488 {0xc321, 0xc41f}, {0xc421, 0xc51f}, {0xc521, 0xc61f}, {0xc621, 0xc71f},
489 {0xc721, 0xc81f}, {0xc821, 0xc91f}, {0xc921, 0xca1f}, {0xca21, 0xcb1f},
490 {0xcb21, 0xcc1f}, {0xcc21, 0xcd1f}, {0xcd21, 0xce1f}, {0xce21, 0xcf1f},
491 {0xcf21, 0xd01f}, {0xd021, 0xd11f}, {0xd121, 0xd21f}, {0xd221, 0xd31f},
492 {0xd321, 0xd41f}, {0xd421, 0xd51f}, {0xd521, 0xd61f}, {0xd621, 0xd71f},
493 {0xd721, 0xd7a3}, {0xf900, 0xf91f}, {0xf921, 0xfa1f}, {0xfa21, 0xfa2d},
494 {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1d, 0xfb1f}, {0xfb21, 0xfb36},
495 {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfc1f}, {0xfc21, 0xfd1f},
496 {0xfd21, 0xfd3f}, {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb},
497 {0xfe21, 0xfe23}, {0xfe30, 0xfe44}, {0xfe49, 0xfe52}, {0xfe54, 0xfe66},
498 {0xfe68, 0xfe6b}, {0xfe70, 0xfe72}, {0xfe76, 0xfefc}, {0xff01, 0xff1f},
499 {0xff21, 0xff5e}, {0xff61, 0xffbe}, {0xffc2, 0xffc7}, {0xffca, 0xffcf},
500 {0xffd2, 0xffd7}, {0xffda, 0xffdc}, {0xffe0, 0xffe6}, {0xffe8, 0xffee},
501 {0xfffc, 0xffff}
502 };
503
504 #define NUM_GRAPH_RANGE (sizeof(graphRangeTable)/sizeof(crange))
505
506 static chr graphCharTable[] = {
507 0x0374, 0x0375, 0x037a, 0x037e, 0x038c, 0x0488, 0x0489, 0x04c7, 0x04c8,
508 0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0589, 0x058a, 0x060c, 0x061b, 0x061f,
509 0x098f, 0x0990, 0x09b2, 0x09bc, 0x09c7, 0x09c8, 0x09d7, 0x09dc, 0x09dd,
510 0x0a02, 0x0a0f, 0x0a10, 0x0a32, 0x0a33, 0x0a35, 0x0a36, 0x0a38, 0x0a39,
511 0x0a3c, 0x0a47, 0x0a48, 0x0a5e, 0x0a8d, 0x0ab2, 0x0ab3, 0x0ad0, 0x0ae0,
512 0x0b0f, 0x0b10, 0x0b32, 0x0b33, 0x0b47, 0x0b48, 0x0b56, 0x0b57, 0x0b5c,
513 0x0b5d, 0x0b82, 0x0b83, 0x0b99, 0x0b9a, 0x0b9c, 0x0b9e, 0x0b9f, 0x0ba3,
514 0x0ba4, 0x0bd7, 0x0c55, 0x0c56, 0x0c60, 0x0c61, 0x0c82, 0x0c83, 0x0cd5,
515 0x0cd6, 0x0cde, 0x0ce0, 0x0ce1, 0x0d02, 0x0d03, 0x0d57, 0x0d60, 0x0d61,
516 0x0d82, 0x0d83, 0x0dbd, 0x0dca, 0x0dd6, 0x0e81, 0x0e82, 0x0e84, 0x0e87,
517 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0ec6, 0x0edc,
518 0x0edd, 0x0fcf, 0x1021, 0x1029, 0x102a, 0x10fb, 0x1248, 0x1258, 0x1288,
519 0x12b0, 0x12c0, 0x1310, 0x1f59, 0x1f5b, 0x1f5d, 0x2070, 0x274d, 0x2756,
520 0x303e, 0x303f, 0xa4c6, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfe74
521 };
522
523 #define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(chr))
524
525 /*
526 * End of auto-generated Unicode character ranges declarations.
527 */
528
529 /*
530 * Supply implementations for some tcl functions that this module depends on
531 * to make it self contained
532 */
533
534 #include "tclUniData.c"
535 #define Tcl_UniChar wxChar
536
537 /*
538 * Compute the uppercase equivalent of the given Unicode character.
539 * Taken from tcl.
540 */
541
542 Tcl_UniChar Tcl_UniCharToUpper(int ch)
543 {
544 int info = GetUniCharInfo(ch);
545
546 if (GetCaseType(info) & 0x04) {
547 return (Tcl_UniChar) (ch - GetDelta(info));
548 } else {
549 return ch;
550 }
551 }
552
553 /*
554 * Compute the lowercase equivalent of the given Unicode character.
555 * Taken from tcl.
556 */
557
558 Tcl_UniChar Tcl_UniCharToLower(int ch)
559 {
560 int info = GetUniCharInfo(ch);
561
562 if (GetCaseType(info) & 0x02) {
563 return (Tcl_UniChar) (ch + GetDelta(info));
564 } else {
565 return ch;
566 }
567 }
568
569 /*
570 * Compute the titlecase equivalent of the given Unicode character.
571 * Taken from tcl.
572 */
573
574 Tcl_UniChar Tcl_UniCharToTitle(int ch)
575 {
576 int info = GetUniCharInfo(ch);
577 int mode = GetCaseType(info);
578
579 if (mode & 0x1) {
580 /*
581 * Subtract or add one depending on the original case.
582 */
583
584 return (Tcl_UniChar) (ch + ((mode & 0x4) ? -1 : 1));
585 } else if (mode == 0x4) {
586 return (Tcl_UniChar) (ch - GetDelta(info));
587 } else {
588 return ch;
589 }
590 }
591
592 #else /* wxUSE_UNICODE */
593
594 #include <locale.h>
595
596 typedef int (*isfunc_t)(int);
597
598 /* ASCII character-class table */
599 static struct cclass {
600 char *name;
601 char *chars;
602 int hasch;
603 isfunc_t isfunc;
604 } cclasses[] = {
605 {"alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
606 0123456789", 1, isalnum},
607 {"alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
608 1, isalpha},
609 {"blank", " \t", 0, NULL},
610 {"cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
611 \25\26\27\30\31\32\33\34\35\36\37\177", 0, iscntrl},
612 {"digit", "0123456789", 0, isdigit},
613 {"graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
614 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
615 1, isgraph},
616 {"lower", "abcdefghijklmnopqrstuvwxyz",
617 1, islower},
618 {"print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
619 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
620 1, isprint},
621 {"punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
622 0, ispunct},
623 {"space", "\t\n\v\f\r ", 0, isspace},
624 {"upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
625 0, isupper},
626 {"xdigit", "0123456789ABCDEFabcdef",
627 0, isxdigit},
628 {NULL, 0, 0, NULL}
629 };
630
631 /*
632 * Supply implementations for some tcl functions that this module depends on
633 * to make it self contained
634 */
635
636 #define Tcl_UniChar wxChar
637 Tcl_UniChar Tcl_UniCharToUpper(int ch) { return wxToupper(ch); }
638 Tcl_UniChar Tcl_UniCharToLower(int ch) { return wxTolower(ch); }
639 Tcl_UniChar Tcl_UniCharToTitle(int ch) { return wxToupper(ch); }
640
641 #endif /* !wxUSE_UNICODE */
642
643 #define CH NOCELT
644
645 /*
646 - nmcces - how many distinct MCCEs are there?
647 ^ static int nmcces(struct vars *);
648 */
649 static int
650 nmcces(v)
651 struct vars *v; /* context */
652 {
653 /*
654 * No multi-character collating elements defined at the moment.
655 */
656 return 0;
657 }
658
659 /*
660 - nleaders - how many chrs can be first chrs of MCCEs?
661 ^ static int nleaders(struct vars *);
662 */
663 static int
664 nleaders(v)
665 struct vars *v; /* context */
666 {
667 return 0;
668 }
669
670 /*
671 - allmcces - return a cvec with all the MCCEs of the locale
672 ^ static struct cvec *allmcces(struct vars *, struct cvec *);
673 */
674 static struct cvec *
675 allmcces(v, cv)
676 struct vars *v; /* context */
677 struct cvec *cv; /* this is supposed to have enough room */
678 {
679 return clearcvec(cv);
680 }
681
682 /*
683 - element - map collating-element name to celt
684 ^ static celt element(struct vars *, chr *, chr *);
685 */
686 static celt
687 element(v, startp, endp)
688 struct vars *v; /* context */
689 chr *startp; /* points to start of name */
690 chr *endp; /* points just past end of name */
691 {
692 struct cname *cn;
693 size_t len;
694
695 /* generic: one-chr names stand for themselves */
696 assert(startp < endp);
697 len = endp - startp;
698 if (len == 1) {
699 return *startp;
700 }
701
702 NOTE(REG_ULOCALE);
703
704 /* search table */
705 for (cn=cnames; cn->name!=NULL; cn++) {
706 if (wxStrlen_(cn->name)==len && wxStrncmp(cn->name, startp, len)==0) {
707 break; /* NOTE BREAK OUT */
708 }
709 }
710 if (cn->name != NULL) {
711 return CHR(cn->code);
712 }
713
714 /* couldn't find it */
715 ERR(REG_ECOLLATE);
716 return 0;
717 }
718
719 /*
720 - range - supply cvec for a range, including legality check
721 ^ static struct cvec *range(struct vars *, celt, celt, int);
722 */
723 static struct cvec *
724 range(v, a, b, cases)
725 struct vars *v; /* context */
726 celt a; /* range start */
727 celt b; /* range end, might equal a */
728 int cases; /* case-independent? */
729 {
730 int nchrs;
731 struct cvec *cv;
732 celt c, lc, uc, tc;
733
734 if (a != b && !before(a, b)) {
735 ERR(REG_ERANGE);
736 return NULL;
737 }
738
739 if (!cases) { /* easy version */
740 cv = getcvec(v, 0, 1, 0);
741 NOERRN();
742 addrange(cv, a, b);
743 return cv;
744 }
745
746 /*
747 * When case-independent, it's hard to decide when cvec ranges are
748 * usable, so for now at least, we won't try. We allocate enough
749 * space for two case variants plus a little extra for the two
750 * title case variants.
751 */
752
753 nchrs = (b - a + 1)*2 + 4;
754
755 cv = getcvec(v, nchrs, 0, 0);
756 NOERRN();
757
758 for (c=a; c<=b; c++) {
759 addchr(cv, c);
760 lc = Tcl_UniCharToLower((chr)c);
761 uc = Tcl_UniCharToUpper((chr)c);
762 tc = Tcl_UniCharToTitle((chr)c);
763 if (c != lc) {
764 addchr(cv, lc);
765 }
766 if (c != uc) {
767 addchr(cv, uc);
768 }
769 if (c != tc && tc != uc) {
770 addchr(cv, tc);
771 }
772 }
773
774 return cv;
775 }
776
777 /*
778 - before - is celt x before celt y, for purposes of range legality?
779 ^ static int before(celt, celt);
780 */
781 static int /* predicate */
782 before(x, y)
783 celt x, y; /* collating elements */
784 {
785 /* trivial because no MCCEs */
786 if (x < y) {
787 return 1;
788 }
789 return 0;
790 }
791
792 /*
793 - eclass - supply cvec for an equivalence class
794 * Must include case counterparts on request.
795 ^ static struct cvec *eclass(struct vars *, celt, int);
796 */
797 static struct cvec *
798 eclass(v, c, cases)
799 struct vars *v; /* context */
800 celt c; /* Collating element representing
801 * the equivalence class. */
802 int cases; /* all cases? */
803 {
804 struct cvec *cv;
805
806 /* crude fake equivalence class for testing */
807 if ((v->cflags&REG_FAKE) && c == 'x') {
808 cv = getcvec(v, 4, 0, 0);
809 addchr(cv, (chr)'x');
810 addchr(cv, (chr)'y');
811 if (cases) {
812 addchr(cv, (chr)'X');
813 addchr(cv, (chr)'Y');
814 }
815 return cv;
816 }
817
818 /* otherwise, none */
819 if (cases) {
820 return allcases(v, c);
821 }
822 cv = getcvec(v, 1, 0, 0);
823 assert(cv != NULL);
824 addchr(cv, (chr)c);
825 return cv;
826 }
827
828 #if wxUSE_UNICODE
829
830 /*
831 - cclass - supply cvec for a character class
832 * Must include case counterparts on request.
833 ^ static struct cvec *cclass(struct vars *, chr *, chr *, int);
834 */
835 static struct cvec *
836 cclass(v, startp, endp, cases)
837 struct vars *v; /* context */
838 chr *startp; /* where the name starts */
839 chr *endp; /* just past the end of the name */
840 int cases; /* case-independent? */
841 {
842 size_t len;
843 struct cvec *cv = NULL;
844 CONST chr *np;
845 chr **namePtr;
846 int i, index;
847
848 /*
849 * The following arrays define the valid character class names.
850 */
851
852 static chr *classNames[] = {
853 _T("alnum"), _T("alpha"), _T("ascii"), _T("blank"), _T("cntrl"), _T("digit"), _T("graph"),
854 _T("lower"), _T("print"), _T("punct"), _T("space"), _T("upper"), _T("xdigit"), NULL
855 };
856
857 enum classes {
858 CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
859 CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT
860 };
861
862
863 /*
864 * Extract the class name
865 */
866
867 len = endp - startp;
868 np = startp;
869
870 /*
871 * Remap lower and upper to alpha if the match is case insensitive.
872 */
873
874 if (cases && len == 5 && (wxStrncmp(_T("lower"), np, 5) == 0
875 || wxStrncmp(_T("upper"), np, 5) == 0)) {
876 np = _T("alpha");
877 }
878
879 /*
880 * Map the name to the corresponding enumerated value.
881 */
882
883 index = -1;
884 for (namePtr=classNames,i=0 ; *namePtr!=NULL ; namePtr++,i++) {
885 if ((wxStrlen_(*namePtr) == len) && (wxStrncmp(*namePtr, np, len) == 0)) {
886 index = i;
887 break;
888 }
889 }
890 if (index == -1) {
891 ERR(REG_ECTYPE);
892 return NULL;
893 }
894
895 /*
896 * Now compute the character class contents.
897 */
898
899 switch((enum classes) index) {
900 case CC_PRINT:
901 case CC_ALNUM:
902 cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE, 0);
903 if (cv) {
904 for (i=0 ; i<NUM_ALPHA_CHAR ; i++) {
905 addchr(cv, alphaCharTable[i]);
906 }
907 for (i=0 ; i<NUM_ALPHA_RANGE ; i++) {
908 addrange(cv, alphaRangeTable[i].start,
909 alphaRangeTable[i].end);
910 }
911 for (i=0 ; i<NUM_DIGIT_RANGE ; i++) {
912 addrange(cv, digitRangeTable[i].start,
913 digitRangeTable[i].end);
914 }
915 }
916 break;
917 case CC_ALPHA:
918 cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE, 0);
919 if (cv) {
920 for (i=0 ; i<NUM_ALPHA_RANGE ; i++) {
921 addrange(cv, alphaRangeTable[i].start,
922 alphaRangeTable[i].end);
923 }
924 for (i=0 ; i<NUM_ALPHA_CHAR ; i++) {
925 addchr(cv, alphaCharTable[i]);
926 }
927 }
928 break;
929 case CC_ASCII:
930 cv = getcvec(v, 0, 1, 0);
931 if (cv) {
932 addrange(cv, 0, 0x7f);
933 }
934 break;
935 case CC_BLANK:
936 cv = getcvec(v, 2, 0, 0);
937 addchr(cv, '\t');
938 addchr(cv, ' ');
939 break;
940 case CC_CNTRL:
941 cv = getcvec(v, 0, 2, 0);
942 addrange(cv, 0x0, 0x1f);
943 addrange(cv, 0x7f, 0x9f);
944 break;
945 case CC_DIGIT:
946 cv = getcvec(v, 0, NUM_DIGIT_RANGE, 0);
947 if (cv) {
948 for (i=0 ; i<NUM_DIGIT_RANGE ; i++) {
949 addrange(cv, digitRangeTable[i].start,
950 digitRangeTable[i].end);
951 }
952 }
953 break;
954 case CC_PUNCT:
955 cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE, 0);
956 if (cv) {
957 for (i=0 ; i<NUM_PUNCT_RANGE ; i++) {
958 addrange(cv, punctRangeTable[i].start,
959 punctRangeTable[i].end);
960 }
961 for (i=0 ; i<NUM_PUNCT_CHAR ; i++) {
962 addchr(cv, punctCharTable[i]);
963 }
964 }
965 break;
966 case CC_XDIGIT:
967 /*
968 * This is a 3 instead of (NUM_DIGIT_RANGE+2) because I've no
969 * idea how to define the digits 'a' through 'f' in
970 * non-western locales. The concept is quite possibly non
971 * portable, or only used in contextx where the characters
972 * used would be the western ones anyway! Whatever is
973 * actually the case, the number of ranges is fixed (until
974 * someone comes up with a better arrangement!)
975 */
976 cv = getcvec(v, 0, 3, 0);
977 if (cv) {
978 addrange(cv, '0', '9');
979 addrange(cv, 'a', 'f');
980 addrange(cv, 'A', 'F');
981 }
982 break;
983 case CC_SPACE:
984 cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE, 0);
985 if (cv) {
986 for (i=0 ; i<NUM_SPACE_RANGE ; i++) {
987 addrange(cv, spaceRangeTable[i].start,
988 spaceRangeTable[i].end);
989 }
990 for (i=0 ; i<NUM_SPACE_CHAR ; i++) {
991 addchr(cv, spaceCharTable[i]);
992 }
993 }
994 break;
995 case CC_LOWER:
996 cv = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE, 0);
997 if (cv) {
998 for (i=0 ; i<NUM_LOWER_RANGE ; i++) {
999 addrange(cv, lowerRangeTable[i].start,
1000 lowerRangeTable[i].end);
1001 }
1002 for (i=0 ; i<NUM_LOWER_CHAR ; i++) {
1003 addchr(cv, lowerCharTable[i]);
1004 }
1005 }
1006 break;
1007 case CC_UPPER:
1008 cv = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE, 0);
1009 if (cv) {
1010 for (i=0 ; i<NUM_UPPER_RANGE ; i++) {
1011 addrange(cv, upperRangeTable[i].start,
1012 upperRangeTable[i].end);
1013 }
1014 for (i=0 ; i<NUM_UPPER_CHAR ; i++) {
1015 addchr(cv, upperCharTable[i]);
1016 }
1017 }
1018 break;
1019 case CC_GRAPH:
1020 cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE, 0);
1021 if (cv) {
1022 for (i=0 ; i<NUM_GRAPH_RANGE ; i++) {
1023 addrange(cv, graphRangeTable[i].start,
1024 graphRangeTable[i].end);
1025 }
1026 for (i=0 ; i<NUM_GRAPH_CHAR ; i++) {
1027 addchr(cv, graphCharTable[i]);
1028 }
1029 }
1030 break;
1031 }
1032 if (cv == NULL) {
1033 ERR(REG_ESPACE);
1034 }
1035 return cv;
1036 }
1037
1038 #else /* wxUSE_UNICODE */
1039
1040 static struct cvec *
1041 cclass(v, startp, endp, cases)
1042 struct vars *v;
1043 chr *startp; /* where the name starts */
1044 chr *endp; /* just past the end of the name */
1045 int cases; /* case-independent? */
1046 {
1047 size_t len;
1048 char *p;
1049 struct cclass *cc;
1050 struct cvec *cv;
1051 chr *np;
1052 int i;
1053 int count;
1054 char buf[256];
1055 const char *loc;
1056
1057 /* find the name */
1058 len = endp - startp;
1059 np = startp;
1060 if (cases && len == 5 && (wxStrncmp(_T("lower"), np, 5) == 0 ||
1061 wxStrncmp(_T("upper"), np, 5) == 0))
1062 np = _T("alpha");
1063 for (cc = cclasses; cc->name != NULL; cc++)
1064 if (wxStrlen_(cc->name) == len && wxStrncmp(cc->name, np, len) == 0)
1065 break; /* NOTE BREAK OUT */
1066 if (cc->name == NULL) {
1067 ERR(REG_ECTYPE);
1068 return NULL;
1069 }
1070
1071 loc = setlocale(LC_CTYPE, NULL);
1072
1073 if (!cc->isfunc || loc == NULL || strcmp(loc, "C") == 0)
1074 {
1075 /* set up vector */
1076 cv = getcvec(v, (int)strlen(cc->chars), 0, 0);
1077 if (cv == NULL) {
1078 ERR(REG_ESPACE);
1079 return NULL;
1080 }
1081
1082 /* fill it in */
1083 for (p = cc->chars; *p != '\0'; p++)
1084 addchr(cv, (chr)*p);
1085 }
1086 else
1087 {
1088 count = 0;
1089 for (i = 0; i < 256; i++)
1090 if (cc->isfunc(i))
1091 buf[count++] = i;
1092
1093 /* set up vector */
1094 cv = getcvec(v, count, 0, 0);
1095 if (cv == NULL) {
1096 ERR(REG_ESPACE);
1097 return NULL;
1098 }
1099
1100 /* fill it in */
1101 for (i = 0; i < count; i++)
1102 addchr(cv, buf[i]);
1103 }
1104
1105 return cv;
1106 }
1107
1108 #endif /* !wxUSE_UNICODE */
1109
1110
1111 /*
1112 - allcases - supply cvec for all case counterparts of a chr (including itself)
1113 * This is a shortcut, preferably an efficient one, for simple characters;
1114 * messy cases are done via range().
1115 ^ static struct cvec *allcases(struct vars *, pchr);
1116 */
1117 static struct cvec *
1118 allcases(v, pc)
1119 struct vars *v; /* context */
1120 pchr pc; /* character to get case equivs of */
1121 {
1122 struct cvec *cv;
1123 chr c = (chr)pc;
1124 chr lc, uc, tc;
1125
1126 lc = Tcl_UniCharToLower((chr)c);
1127 uc = Tcl_UniCharToUpper((chr)c);
1128 tc = Tcl_UniCharToTitle((chr)c);
1129
1130 if (tc != uc) {
1131 cv = getcvec(v, 3, 0, 0);
1132 addchr(cv, tc);
1133 } else {
1134 cv = getcvec(v, 2, 0, 0);
1135 }
1136 addchr(cv, lc);
1137 if (lc != uc) {
1138 addchr(cv, uc);
1139 }
1140 return cv;
1141 }
1142
1143 /*
1144 - cmp - chr-substring compare
1145 * Backrefs need this. It should preferably be efficient.
1146 * Note that it does not need to report anything except equal/unequal.
1147 * Note also that the length is exact, and the comparison should not
1148 * stop at embedded NULs!
1149 ^ static int cmp(CONST chr *, CONST chr *, size_t);
1150 */
1151 static int /* 0 for equal, nonzero for unequal */
1152 cmp(x, y, len)
1153 CONST chr *x, *y; /* strings to compare */
1154 size_t len; /* exact length of comparison */
1155 {
1156 return memcmp(VS(x), VS(y), len*sizeof(chr));
1157 }
1158
1159 /*
1160 - casecmp - case-independent chr-substring compare
1161 * REG_ICASE backrefs need this. It should preferably be efficient.
1162 * Note that it does not need to report anything except equal/unequal.
1163 * Note also that the length is exact, and the comparison should not
1164 * stop at embedded NULs!
1165 ^ static int casecmp(CONST chr *, CONST chr *, size_t);
1166 */
1167 static int /* 0 for equal, nonzero for unequal */
1168 casecmp(x, y, len)
1169 CONST chr *x, *y; /* strings to compare */
1170 size_t len; /* exact length of comparison */
1171 {
1172 for (; len > 0; len--, x++, y++) {
1173 if ((*x!=*y) && (Tcl_UniCharToLower(*x) != Tcl_UniCharToLower(*y))) {
1174 return 1;
1175 }
1176 }
1177 return 0;
1178 }