]> git.saurik.com Git - wxWidgets.git/blob - src/regex/regc_locale.c
695b665b1f2f9de88df97c54abe17c42e33f5125
[wxWidgets.git] / src / regex / regc_locale.c
1 /*
2 * regc_locale.c --
3 *
4 * This file contains the Unicode locale specific regexp routines.
5 * This file is #included by regcomp.c.
6 *
7 * Copyright (c) 1998 by Scriptics Corporation.
8 *
9 * See the file "license.terms" for information on usage and redistribution
10 * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
11 *
12 * RCS: @(#) $Id$
13 */
14
15 /* ASCII character-name table */
16
17 static struct cname {
18 char *name;
19 char code;
20 } cnames[] = {
21 {"NUL", '\0'},
22 {"SOH", '\001'},
23 {"STX", '\002'},
24 {"ETX", '\003'},
25 {"EOT", '\004'},
26 {"ENQ", '\005'},
27 {"ACK", '\006'},
28 {"BEL", '\007'},
29 {"alert", '\007'},
30 {"BS", '\010'},
31 {"backspace", '\b'},
32 {"HT", '\011'},
33 {"tab", '\t'},
34 {"LF", '\012'},
35 {"newline", '\n'},
36 {"VT", '\013'},
37 {"vertical-tab", '\v'},
38 {"FF", '\014'},
39 {"form-feed", '\f'},
40 {"CR", '\015'},
41 {"carriage-return", '\r'},
42 {"SO", '\016'},
43 {"SI", '\017'},
44 {"DLE", '\020'},
45 {"DC1", '\021'},
46 {"DC2", '\022'},
47 {"DC3", '\023'},
48 {"DC4", '\024'},
49 {"NAK", '\025'},
50 {"SYN", '\026'},
51 {"ETB", '\027'},
52 {"CAN", '\030'},
53 {"EM", '\031'},
54 {"SUB", '\032'},
55 {"ESC", '\033'},
56 {"IS4", '\034'},
57 {"FS", '\034'},
58 {"IS3", '\035'},
59 {"GS", '\035'},
60 {"IS2", '\036'},
61 {"RS", '\036'},
62 {"IS1", '\037'},
63 {"US", '\037'},
64 {"space", ' '},
65 {"exclamation-mark",'!'},
66 {"quotation-mark", '"'},
67 {"number-sign", '#'},
68 {"dollar-sign", '$'},
69 {"percent-sign", '%'},
70 {"ampersand", '&'},
71 {"apostrophe", '\''},
72 {"left-parenthesis",'('},
73 {"right-parenthesis", ')'},
74 {"asterisk", '*'},
75 {"plus-sign", '+'},
76 {"comma", ','},
77 {"hyphen", '-'},
78 {"hyphen-minus", '-'},
79 {"period", '.'},
80 {"full-stop", '.'},
81 {"slash", '/'},
82 {"solidus", '/'},
83 {"zero", '0'},
84 {"one", '1'},
85 {"two", '2'},
86 {"three", '3'},
87 {"four", '4'},
88 {"five", '5'},
89 {"six", '6'},
90 {"seven", '7'},
91 {"eight", '8'},
92 {"nine", '9'},
93 {"colon", ':'},
94 {"semicolon", ';'},
95 {"less-than-sign", '<'},
96 {"equals-sign", '='},
97 {"greater-than-sign", '>'},
98 {"question-mark", '?'},
99 {"commercial-at", '@'},
100 {"left-square-bracket", '['},
101 {"backslash", '\\'},
102 {"reverse-solidus", '\\'},
103 {"right-square-bracket", ']'},
104 {"circumflex", '^'},
105 {"circumflex-accent", '^'},
106 {"underscore", '_'},
107 {"low-line", '_'},
108 {"grave-accent", '`'},
109 {"left-brace", '{'},
110 {"left-curly-bracket", '{'},
111 {"vertical-line", '|'},
112 {"right-brace", '}'},
113 {"right-curly-bracket", '}'},
114 {"tilde", '~'},
115 {"DEL", '\177'},
116 {NULL, 0}
117 };
118
119 /* Unicode character-class tables */
120
121 typedef struct crange {
122 chr start;
123 chr end;
124 } crange;
125
126 /*
127 * Declarations of Unicode character ranges. This code
128 * is automatically generated by the tools/uniClass.tcl script
129 * and used in generic/regc_locale.c. Do not modify by hand.
130 */
131
132 /* Unicode: alphabetic characters */
133
134 static crange alphaRangeTable[] = {
135 {0x0041, 0x005a}, {0x0061, 0x007a}, {0x00c0, 0x00d6}, {0x00d8, 0x00f6},
136 {0x00f8, 0x021f}, {0x0222, 0x0233}, {0x0250, 0x02ad}, {0x02b0, 0x02b8},
137 {0x02bb, 0x02c1}, {0x02e0, 0x02e4}, {0x0388, 0x038a}, {0x038e, 0x03a1},
138 {0x03a3, 0x03ce}, {0x03d0, 0x03d7}, {0x03da, 0x03f5}, {0x0400, 0x0481},
139 {0x048c, 0x04c4}, {0x04d0, 0x04f5}, {0x0531, 0x0556}, {0x0561, 0x0587},
140 {0x05d0, 0x05ea}, {0x05f0, 0x05f2}, {0x0621, 0x063a}, {0x0640, 0x064a},
141 {0x0671, 0x06d3}, {0x06fa, 0x06fc}, {0x0712, 0x072c}, {0x0780, 0x07a5},
142 {0x0905, 0x0939}, {0x0958, 0x0961}, {0x0985, 0x098c}, {0x0993, 0x09a8},
143 {0x09aa, 0x09b0}, {0x09b6, 0x09b9}, {0x09df, 0x09e1}, {0x0a05, 0x0a0a},
144 {0x0a13, 0x0a28}, {0x0a2a, 0x0a30}, {0x0a59, 0x0a5c}, {0x0a72, 0x0a74},
145 {0x0a85, 0x0a8b}, {0x0a8f, 0x0a91}, {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0},
146 {0x0ab5, 0x0ab9}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b28}, {0x0b2a, 0x0b30},
147 {0x0b36, 0x0b39}, {0x0b5f, 0x0b61}, {0x0b85, 0x0b8a}, {0x0b8e, 0x0b90},
148 {0x0b92, 0x0b95}, {0x0ba8, 0x0baa}, {0x0bae, 0x0bb5}, {0x0bb7, 0x0bb9},
149 {0x0c05, 0x0c0c}, {0x0c0e, 0x0c10}, {0x0c12, 0x0c28}, {0x0c2a, 0x0c33},
150 {0x0c35, 0x0c39}, {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90}, {0x0c92, 0x0ca8},
151 {0x0caa, 0x0cb3}, {0x0cb5, 0x0cb9}, {0x0d05, 0x0d0c}, {0x0d0e, 0x0d10},
152 {0x0d12, 0x0d28}, {0x0d2a, 0x0d39}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1},
153 {0x0db3, 0x0dbb}, {0x0dc0, 0x0dc6}, {0x0e01, 0x0e30}, {0x0e40, 0x0e46},
154 {0x0e94, 0x0e97}, {0x0e99, 0x0e9f}, {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb0},
155 {0x0ec0, 0x0ec4}, {0x0f40, 0x0f47}, {0x0f49, 0x0f6a}, {0x0f88, 0x0f8b},
156 {0x1000, 0x1021}, {0x1023, 0x1027}, {0x1050, 0x1055}, {0x10a0, 0x10c5},
157 {0x10d0, 0x10f6}, {0x1100, 0x1159}, {0x115f, 0x11a2}, {0x11a8, 0x11f9},
158 {0x1200, 0x1206}, {0x1208, 0x1246}, {0x124a, 0x124d}, {0x1250, 0x1256},
159 {0x125a, 0x125d}, {0x1260, 0x1286}, {0x128a, 0x128d}, {0x1290, 0x12ae},
160 {0x12b2, 0x12b5}, {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12ce},
161 {0x12d0, 0x12d6}, {0x12d8, 0x12ee}, {0x12f0, 0x130e}, {0x1312, 0x1315},
162 {0x1318, 0x131e}, {0x1320, 0x1346}, {0x1348, 0x135a}, {0x13a0, 0x13f4},
163 {0x1401, 0x166c}, {0x166f, 0x1676}, {0x1681, 0x169a}, {0x16a0, 0x16ea},
164 {0x1780, 0x17b3}, {0x1820, 0x1877}, {0x1880, 0x18a8}, {0x1e00, 0x1e9b},
165 {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45},
166 {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4},
167 {0x1fb6, 0x1fbc}, {0x1fc2, 0x1fc4}, {0x1fc6, 0x1fcc}, {0x1fd0, 0x1fd3},
168 {0x1fd6, 0x1fdb}, {0x1fe0, 0x1fec}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffc},
169 {0x210a, 0x2113}, {0x2119, 0x211d}, {0x212a, 0x212d}, {0x212f, 0x2131},
170 {0x2133, 0x2139}, {0x3031, 0x3035}, {0x3041, 0x3094}, {0x30a1, 0x30fa},
171 {0x30fc, 0x30fe}, {0x3105, 0x312c}, {0x3131, 0x318e}, {0x31a0, 0x31b7},
172 {0x3400, 0x4db5}, {0x4e00, 0x9fa5}, {0xa000, 0xa48c}, {0xac00, 0xd7a3},
173 {0xf900, 0xfa2d}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1f, 0xfb28},
174 {0xfb2a, 0xfb36}, {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfd3d},
175 {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb}, {0xfe70, 0xfe72},
176 {0xfe76, 0xfefc}, {0xff21, 0xff3a}, {0xff41, 0xff5a}, {0xff66, 0xffbe},
177 {0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7}, {0xffda, 0xffdc}
178 };
179
180 #define NUM_ALPHA_RANGE (sizeof(alphaRangeTable)/sizeof(crange))
181
182 static chr alphaCharTable[] = {
183 0x00aa, 0x00b5, 0x00ba, 0x02d0, 0x02d1, 0x02ee, 0x037a, 0x0386, 0x038c,
184 0x04c7, 0x04c8, 0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0559, 0x06d5, 0x06e5,
185 0x06e6, 0x0710, 0x093d, 0x0950, 0x098f, 0x0990, 0x09b2, 0x09dc, 0x09dd,
186 0x09f0, 0x09f1, 0x0a0f, 0x0a10, 0x0a32, 0x0a33, 0x0a35, 0x0a36, 0x0a38,
187 0x0a39, 0x0a5e, 0x0a8d, 0x0ab2, 0x0ab3, 0x0abd, 0x0ad0, 0x0ae0, 0x0b0f,
188 0x0b10, 0x0b32, 0x0b33, 0x0b3d, 0x0b5c, 0x0b5d, 0x0b99, 0x0b9a, 0x0b9c,
189 0x0b9e, 0x0b9f, 0x0ba3, 0x0ba4, 0x0c60, 0x0c61, 0x0cde, 0x0ce0, 0x0ce1,
190 0x0d60, 0x0d61, 0x0dbd, 0x0e32, 0x0e33, 0x0e81, 0x0e82, 0x0e84, 0x0e87,
191 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0eb2, 0x0eb3,
192 0x0ebd, 0x0ec6, 0x0edc, 0x0edd, 0x0f00, 0x1029, 0x102a, 0x1248, 0x1258,
193 0x1288, 0x12b0, 0x12c0, 0x1310, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe, 0x207f,
194 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x3005, 0x3006, 0x309d,
195 0x309e, 0xfb1d, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfe74, 0xfffe
196 };
197
198 #define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr))
199
200 /* Unicode: decimal digit characters */
201
202 static crange digitRangeTable[] = {
203 {0x0030, 0x0039}, {0x0660, 0x0669}, {0x06f0, 0x06f9}, {0x0966, 0x096f},
204 {0x09e6, 0x09ef}, {0x0a66, 0x0a6f}, {0x0ae6, 0x0aef}, {0x0b66, 0x0b6f},
205 {0x0be7, 0x0bef}, {0x0c66, 0x0c6f}, {0x0ce6, 0x0cef}, {0x0d66, 0x0d6f},
206 {0x0e50, 0x0e59}, {0x0ed0, 0x0ed9}, {0x0f20, 0x0f29}, {0x1040, 0x1049},
207 {0x1369, 0x1371}, {0x17e0, 0x17e9}, {0x1810, 0x1819}, {0xff10, 0xff19}
208 };
209
210 #define NUM_DIGIT_RANGE (sizeof(digitRangeTable)/sizeof(crange))
211
212 /* no singletons of digit characters */
213
214 /* Unicode: punctuation characters */
215
216 static crange punctRangeTable[] = {
217 {0x0021, 0x0023}, {0x0025, 0x002a}, {0x002c, 0x002f}, {0x005b, 0x005d},
218 {0x055a, 0x055f}, {0x066a, 0x066d}, {0x0700, 0x070d}, {0x0f04, 0x0f12},
219 {0x0f3a, 0x0f3d}, {0x104a, 0x104f}, {0x1361, 0x1368}, {0x16eb, 0x16ed},
220 {0x17d4, 0x17da}, {0x1800, 0x180a}, {0x2010, 0x2027}, {0x2030, 0x2043},
221 {0x2048, 0x204d}, {0x3001, 0x3003}, {0x3008, 0x3011}, {0x3014, 0x301f},
222 {0xfe30, 0xfe44}, {0xfe49, 0xfe52}, {0xfe54, 0xfe61}, {0xff01, 0xff03},
223 {0xff05, 0xff0a}, {0xff0c, 0xff0f}, {0xff3b, 0xff3d}, {0xff61, 0xff65}
224 };
225
226 #define NUM_PUNCT_RANGE (sizeof(punctRangeTable)/sizeof(crange))
227
228 static chr punctCharTable[] = {
229 0x003a, 0x003b, 0x003f, 0x0040, 0x005f, 0x007b, 0x007d, 0x00a1, 0x00ab,
230 0x00ad, 0x00b7, 0x00bb, 0x00bf, 0x037e, 0x0387, 0x0589, 0x058a, 0x05be,
231 0x05c0, 0x05c3, 0x05f3, 0x05f4, 0x060c, 0x061b, 0x061f, 0x06d4, 0x0964,
232 0x0965, 0x0970, 0x0df4, 0x0e4f, 0x0e5a, 0x0e5b, 0x0f85, 0x10fb, 0x166d,
233 0x166e, 0x169b, 0x169c, 0x17dc, 0x2045, 0x2046, 0x207d, 0x207e, 0x208d,
234 0x208e, 0x2329, 0x232a, 0x3030, 0x30fb, 0xfd3e, 0xfd3f, 0xfe63, 0xfe68,
235 0xfe6a, 0xfe6b, 0xff1a, 0xff1b, 0xff1f, 0xff20, 0xff3f, 0xff5b, 0xff5d
236 };
237
238 #define NUM_PUNCT_CHAR (sizeof(punctCharTable)/sizeof(chr))
239
240 /* Unicode: white space characters */
241
242 static crange spaceRangeTable[] = {
243 {0x0009, 0x000d}, {0x2000, 0x200b}
244 };
245
246 #define NUM_SPACE_RANGE (sizeof(spaceRangeTable)/sizeof(crange))
247
248 static chr spaceCharTable[] = {
249 0x0020, 0x00a0, 0x1680, 0x2028, 0x2029, 0x202f, 0x3000
250 };
251
252 #define NUM_SPACE_CHAR (sizeof(spaceCharTable)/sizeof(chr))
253
254 /* Unicode: lowercase characters */
255
256 static crange lowerRangeTable[] = {
257 {0x0061, 0x007a}, {0x00df, 0x00f6}, {0x00f8, 0x00ff}, {0x017e, 0x0180},
258 {0x0199, 0x019b}, {0x01bd, 0x01bf}, {0x0250, 0x02ad}, {0x03ac, 0x03ce},
259 {0x03d5, 0x03d7}, {0x03ef, 0x03f3}, {0x0430, 0x045f}, {0x0561, 0x0587},
260 {0x1e95, 0x1e9b}, {0x1f00, 0x1f07}, {0x1f10, 0x1f15}, {0x1f20, 0x1f27},
261 {0x1f30, 0x1f37}, {0x1f40, 0x1f45}, {0x1f50, 0x1f57}, {0x1f60, 0x1f67},
262 {0x1f70, 0x1f7d}, {0x1f80, 0x1f87}, {0x1f90, 0x1f97}, {0x1fa0, 0x1fa7},
263 {0x1fb0, 0x1fb4}, {0x1fc2, 0x1fc4}, {0x1fd0, 0x1fd3}, {0x1fe0, 0x1fe7},
264 {0x1ff2, 0x1ff4}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xff41, 0xff5a}
265 };
266
267 #define NUM_LOWER_RANGE (sizeof(lowerRangeTable)/sizeof(crange))
268
269 static chr lowerCharTable[] = {
270 0x00aa, 0x00b5, 0x00ba, 0x0101, 0x0103, 0x0105, 0x0107, 0x0109, 0x010b,
271 0x010d, 0x010f, 0x0111, 0x0113, 0x0115, 0x0117, 0x0119, 0x011b, 0x011d,
272 0x011f, 0x0121, 0x0123, 0x0125, 0x0127, 0x0129, 0x012b, 0x012d, 0x012f,
273 0x0131, 0x0133, 0x0135, 0x0137, 0x0138, 0x013a, 0x013c, 0x013e, 0x0140,
274 0x0142, 0x0144, 0x0146, 0x0148, 0x0149, 0x014b, 0x014d, 0x014f, 0x0151,
275 0x0153, 0x0155, 0x0157, 0x0159, 0x015b, 0x015d, 0x015f, 0x0161, 0x0163,
276 0x0165, 0x0167, 0x0169, 0x016b, 0x016d, 0x016f, 0x0171, 0x0173, 0x0175,
277 0x0177, 0x017a, 0x017c, 0x0183, 0x0185, 0x0188, 0x018c, 0x018d, 0x0192,
278 0x0195, 0x019e, 0x01a1, 0x01a3, 0x01a5, 0x01a8, 0x01aa, 0x01ab, 0x01ad,
279 0x01b0, 0x01b4, 0x01b6, 0x01b9, 0x01ba, 0x01c6, 0x01c9, 0x01cc, 0x01ce,
280 0x01d0, 0x01d2, 0x01d4, 0x01d6, 0x01d8, 0x01da, 0x01dc, 0x01dd, 0x01df,
281 0x01e1, 0x01e3, 0x01e5, 0x01e7, 0x01e9, 0x01eb, 0x01ed, 0x01ef, 0x01f0,
282 0x01f3, 0x01f5, 0x01f9, 0x01fb, 0x01fd, 0x01ff, 0x0201, 0x0203, 0x0205,
283 0x0207, 0x0209, 0x020b, 0x020d, 0x020f, 0x0211, 0x0213, 0x0215, 0x0217,
284 0x0219, 0x021b, 0x021d, 0x021f, 0x0223, 0x0225, 0x0227, 0x0229, 0x022b,
285 0x022d, 0x022f, 0x0231, 0x0233, 0x0390, 0x03d0, 0x03d1, 0x03db, 0x03dd,
286 0x03df, 0x03e1, 0x03e3, 0x03e5, 0x03e7, 0x03e9, 0x03eb, 0x03ed, 0x03f5,
287 0x0461, 0x0463, 0x0465, 0x0467, 0x0469, 0x046b, 0x046d, 0x046f, 0x0471,
288 0x0473, 0x0475, 0x0477, 0x0479, 0x047b, 0x047d, 0x047f, 0x0481, 0x048d,
289 0x048f, 0x0491, 0x0493, 0x0495, 0x0497, 0x0499, 0x049b, 0x049d, 0x049f,
290 0x04a1, 0x04a3, 0x04a5, 0x04a7, 0x04a9, 0x04ab, 0x04ad, 0x04af, 0x04b1,
291 0x04b3, 0x04b5, 0x04b7, 0x04b9, 0x04bb, 0x04bd, 0x04bf, 0x04c2, 0x04c4,
292 0x04c8, 0x04cc, 0x04d1, 0x04d3, 0x04d5, 0x04d7, 0x04d9, 0x04db, 0x04dd,
293 0x04df, 0x04e1, 0x04e3, 0x04e5, 0x04e7, 0x04e9, 0x04eb, 0x04ed, 0x04ef,
294 0x04f1, 0x04f3, 0x04f5, 0x04f9, 0x1e01, 0x1e03, 0x1e05, 0x1e07, 0x1e09,
295 0x1e0b, 0x1e0d, 0x1e0f, 0x1e11, 0x1e13, 0x1e15, 0x1e17, 0x1e19, 0x1e1b,
296 0x1e1d, 0x1e1f, 0x1e21, 0x1e23, 0x1e25, 0x1e27, 0x1e29, 0x1e2b, 0x1e2d,
297 0x1e2f, 0x1e31, 0x1e33, 0x1e35, 0x1e37, 0x1e39, 0x1e3b, 0x1e3d, 0x1e3f,
298 0x1e41, 0x1e43, 0x1e45, 0x1e47, 0x1e49, 0x1e4b, 0x1e4d, 0x1e4f, 0x1e51,
299 0x1e53, 0x1e55, 0x1e57, 0x1e59, 0x1e5b, 0x1e5d, 0x1e5f, 0x1e61, 0x1e63,
300 0x1e65, 0x1e67, 0x1e69, 0x1e6b, 0x1e6d, 0x1e6f, 0x1e71, 0x1e73, 0x1e75,
301 0x1e77, 0x1e79, 0x1e7b, 0x1e7d, 0x1e7f, 0x1e81, 0x1e83, 0x1e85, 0x1e87,
302 0x1e89, 0x1e8b, 0x1e8d, 0x1e8f, 0x1e91, 0x1e93, 0x1ea1, 0x1ea3, 0x1ea5,
303 0x1ea7, 0x1ea9, 0x1eab, 0x1ead, 0x1eaf, 0x1eb1, 0x1eb3, 0x1eb5, 0x1eb7,
304 0x1eb9, 0x1ebb, 0x1ebd, 0x1ebf, 0x1ec1, 0x1ec3, 0x1ec5, 0x1ec7, 0x1ec9,
305 0x1ecb, 0x1ecd, 0x1ecf, 0x1ed1, 0x1ed3, 0x1ed5, 0x1ed7, 0x1ed9, 0x1edb,
306 0x1edd, 0x1edf, 0x1ee1, 0x1ee3, 0x1ee5, 0x1ee7, 0x1ee9, 0x1eeb, 0x1eed,
307 0x1eef, 0x1ef1, 0x1ef3, 0x1ef5, 0x1ef7, 0x1ef9, 0x1fb6, 0x1fb7, 0x1fbe,
308 0x1fc6, 0x1fc7, 0x1fd6, 0x1fd7, 0x1ff6, 0x1ff7, 0x207f, 0x210a, 0x210e,
309 0x210f, 0x2113, 0x212f, 0x2134, 0x2139
310 };
311
312 #define NUM_LOWER_CHAR (sizeof(lowerCharTable)/sizeof(chr))
313
314 /* Unicode: uppercase characters */
315
316 static crange upperRangeTable[] = {
317 {0x0041, 0x005a}, {0x00c0, 0x00d6}, {0x00d8, 0x00de}, {0x0189, 0x018b},
318 {0x018e, 0x0191}, {0x0196, 0x0198}, {0x01b1, 0x01b3}, {0x01f6, 0x01f8},
319 {0x0388, 0x038a}, {0x0391, 0x03a1}, {0x03a3, 0x03ab}, {0x03d2, 0x03d4},
320 {0x0400, 0x042f}, {0x0531, 0x0556}, {0x10a0, 0x10c5}, {0x1f08, 0x1f0f},
321 {0x1f18, 0x1f1d}, {0x1f28, 0x1f2f}, {0x1f38, 0x1f3f}, {0x1f48, 0x1f4d},
322 {0x1f68, 0x1f6f}, {0x1fb8, 0x1fbb}, {0x1fc8, 0x1fcb}, {0x1fd8, 0x1fdb},
323 {0x1fe8, 0x1fec}, {0x1ff8, 0x1ffb}, {0x210b, 0x210d}, {0x2110, 0x2112},
324 {0x2119, 0x211d}, {0x212a, 0x212d}, {0xff21, 0xff3a}
325 };
326
327 #define NUM_UPPER_RANGE (sizeof(upperRangeTable)/sizeof(crange))
328
329 static chr upperCharTable[] = {
330 0x0100, 0x0102, 0x0104, 0x0106, 0x0108, 0x010a, 0x010c, 0x010e, 0x0110,
331 0x0112, 0x0114, 0x0116, 0x0118, 0x011a, 0x011c, 0x011e, 0x0120, 0x0122,
332 0x0124, 0x0126, 0x0128, 0x012a, 0x012c, 0x012e, 0x0130, 0x0132, 0x0134,
333 0x0136, 0x0139, 0x013b, 0x013d, 0x013f, 0x0141, 0x0143, 0x0145, 0x0147,
334 0x014a, 0x014c, 0x014e, 0x0150, 0x0152, 0x0154, 0x0156, 0x0158, 0x015a,
335 0x015c, 0x015e, 0x0160, 0x0162, 0x0164, 0x0166, 0x0168, 0x016a, 0x016c,
336 0x016e, 0x0170, 0x0172, 0x0174, 0x0176, 0x0178, 0x0179, 0x017b, 0x017d,
337 0x0181, 0x0182, 0x0184, 0x0186, 0x0187, 0x0193, 0x0194, 0x019c, 0x019d,
338 0x019f, 0x01a0, 0x01a2, 0x01a4, 0x01a6, 0x01a7, 0x01a9, 0x01ac, 0x01ae,
339 0x01af, 0x01b5, 0x01b7, 0x01b8, 0x01bc, 0x01c4, 0x01c7, 0x01ca, 0x01cd,
340 0x01cf, 0x01d1, 0x01d3, 0x01d5, 0x01d7, 0x01d9, 0x01db, 0x01de, 0x01e0,
341 0x01e2, 0x01e4, 0x01e6, 0x01e8, 0x01ea, 0x01ec, 0x01ee, 0x01f1, 0x01f4,
342 0x01fa, 0x01fc, 0x01fe, 0x0200, 0x0202, 0x0204, 0x0206, 0x0208, 0x020a,
343 0x020c, 0x020e, 0x0210, 0x0212, 0x0214, 0x0216, 0x0218, 0x021a, 0x021c,
344 0x021e, 0x0222, 0x0224, 0x0226, 0x0228, 0x022a, 0x022c, 0x022e, 0x0230,
345 0x0232, 0x0386, 0x038c, 0x038e, 0x038f, 0x03da, 0x03dc, 0x03de, 0x03e0,
346 0x03e2, 0x03e4, 0x03e6, 0x03e8, 0x03ea, 0x03ec, 0x03ee, 0x03f4, 0x0460,
347 0x0462, 0x0464, 0x0466, 0x0468, 0x046a, 0x046c, 0x046e, 0x0470, 0x0472,
348 0x0474, 0x0476, 0x0478, 0x047a, 0x047c, 0x047e, 0x0480, 0x048c, 0x048e,
349 0x0490, 0x0492, 0x0494, 0x0496, 0x0498, 0x049a, 0x049c, 0x049e, 0x04a0,
350 0x04a2, 0x04a4, 0x04a6, 0x04a8, 0x04aa, 0x04ac, 0x04ae, 0x04b0, 0x04b2,
351 0x04b4, 0x04b6, 0x04b8, 0x04ba, 0x04bc, 0x04be, 0x04c0, 0x04c1, 0x04c3,
352 0x04c7, 0x04cb, 0x04d0, 0x04d2, 0x04d4, 0x04d6, 0x04d8, 0x04da, 0x04dc,
353 0x04de, 0x04e0, 0x04e2, 0x04e4, 0x04e6, 0x04e8, 0x04ea, 0x04ec, 0x04ee,
354 0x04f0, 0x04f2, 0x04f4, 0x04f8, 0x1e00, 0x1e02, 0x1e04, 0x1e06, 0x1e08,
355 0x1e0a, 0x1e0c, 0x1e0e, 0x1e10, 0x1e12, 0x1e14, 0x1e16, 0x1e18, 0x1e1a,
356 0x1e1c, 0x1e1e, 0x1e20, 0x1e22, 0x1e24, 0x1e26, 0x1e28, 0x1e2a, 0x1e2c,
357 0x1e2e, 0x1e30, 0x1e32, 0x1e34, 0x1e36, 0x1e38, 0x1e3a, 0x1e3c, 0x1e3e,
358 0x1e40, 0x1e42, 0x1e44, 0x1e46, 0x1e48, 0x1e4a, 0x1e4c, 0x1e4e, 0x1e50,
359 0x1e52, 0x1e54, 0x1e56, 0x1e58, 0x1e5a, 0x1e5c, 0x1e5e, 0x1e60, 0x1e62,
360 0x1e64, 0x1e66, 0x1e68, 0x1e6a, 0x1e6c, 0x1e6e, 0x1e70, 0x1e72, 0x1e74,
361 0x1e76, 0x1e78, 0x1e7a, 0x1e7c, 0x1e7e, 0x1e80, 0x1e82, 0x1e84, 0x1e86,
362 0x1e88, 0x1e8a, 0x1e8c, 0x1e8e, 0x1e90, 0x1e92, 0x1e94, 0x1ea0, 0x1ea2,
363 0x1ea4, 0x1ea6, 0x1ea8, 0x1eaa, 0x1eac, 0x1eae, 0x1eb0, 0x1eb2, 0x1eb4,
364 0x1eb6, 0x1eb8, 0x1eba, 0x1ebc, 0x1ebe, 0x1ec0, 0x1ec2, 0x1ec4, 0x1ec6,
365 0x1ec8, 0x1eca, 0x1ecc, 0x1ece, 0x1ed0, 0x1ed2, 0x1ed4, 0x1ed6, 0x1ed8,
366 0x1eda, 0x1edc, 0x1ede, 0x1ee0, 0x1ee2, 0x1ee4, 0x1ee6, 0x1ee8, 0x1eea,
367 0x1eec, 0x1eee, 0x1ef0, 0x1ef2, 0x1ef4, 0x1ef6, 0x1ef8, 0x1f59, 0x1f5b,
368 0x1f5d, 0x1f5f, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x2130,
369 0x2131, 0x2133
370 };
371
372 #define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr))
373
374 /* Unicode: unicode print characters excluding space */
375
376 static crange graphRangeTable[] = {
377 {0x0021, 0x007e}, {0x00a0, 0x011f}, {0x0121, 0x021f}, {0x0222, 0x0233},
378 {0x0250, 0x02ad}, {0x02b0, 0x02ee}, {0x0300, 0x031f}, {0x0321, 0x034e},
379 {0x0360, 0x0362}, {0x0384, 0x038a}, {0x038e, 0x03a1}, {0x03a3, 0x03ce},
380 {0x03d0, 0x03d7}, {0x03da, 0x03f5}, {0x0400, 0x041f}, {0x0421, 0x0486},
381 {0x048c, 0x04c4}, {0x04d0, 0x04f5}, {0x0531, 0x0556}, {0x0559, 0x055f},
382 {0x0561, 0x0587}, {0x0591, 0x05a1}, {0x05a3, 0x05b9}, {0x05bb, 0x05c4},
383 {0x05d0, 0x05ea}, {0x05f0, 0x05f4}, {0x0621, 0x063a}, {0x0640, 0x0655},
384 {0x0660, 0x066d}, {0x0670, 0x06ed}, {0x06f0, 0x06fe}, {0x0700, 0x070d},
385 {0x0710, 0x071f}, {0x0721, 0x072c}, {0x0730, 0x074a}, {0x0780, 0x07b0},
386 {0x0901, 0x0903}, {0x0905, 0x091f}, {0x0921, 0x0939}, {0x093c, 0x094d},
387 {0x0950, 0x0954}, {0x0958, 0x0970}, {0x0981, 0x0983}, {0x0985, 0x098c},
388 {0x0993, 0x09a8}, {0x09aa, 0x09b0}, {0x09b6, 0x09b9}, {0x09be, 0x09c4},
389 {0x09cb, 0x09cd}, {0x09df, 0x09e3}, {0x09e6, 0x09fa}, {0x0a05, 0x0a0a},
390 {0x0a13, 0x0a1f}, {0x0a21, 0x0a28}, {0x0a2a, 0x0a30}, {0x0a3e, 0x0a42},
391 {0x0a4b, 0x0a4d}, {0x0a59, 0x0a5c}, {0x0a66, 0x0a74}, {0x0a81, 0x0a83},
392 {0x0a85, 0x0a8b}, {0x0a8f, 0x0a91}, {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0},
393 {0x0ab5, 0x0ab9}, {0x0abc, 0x0ac5}, {0x0ac7, 0x0ac9}, {0x0acb, 0x0acd},
394 {0x0ae6, 0x0aef}, {0x0b01, 0x0b03}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b1f},
395 {0x0b21, 0x0b28}, {0x0b2a, 0x0b30}, {0x0b36, 0x0b39}, {0x0b3c, 0x0b43},
396 {0x0b4b, 0x0b4d}, {0x0b5f, 0x0b61}, {0x0b66, 0x0b70}, {0x0b85, 0x0b8a},
397 {0x0b8e, 0x0b90}, {0x0b92, 0x0b95}, {0x0ba8, 0x0baa}, {0x0bae, 0x0bb5},
398 {0x0bb7, 0x0bb9}, {0x0bbe, 0x0bc2}, {0x0bc6, 0x0bc8}, {0x0bca, 0x0bcd},
399 {0x0be7, 0x0bf2}, {0x0c01, 0x0c03}, {0x0c05, 0x0c0c}, {0x0c0e, 0x0c10},
400 {0x0c12, 0x0c1f}, {0x0c21, 0x0c28}, {0x0c2a, 0x0c33}, {0x0c35, 0x0c39},
401 {0x0c3e, 0x0c44}, {0x0c46, 0x0c48}, {0x0c4a, 0x0c4d}, {0x0c66, 0x0c6f},
402 {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90}, {0x0c92, 0x0ca8}, {0x0caa, 0x0cb3},
403 {0x0cb5, 0x0cb9}, {0x0cbe, 0x0cc4}, {0x0cc6, 0x0cc8}, {0x0cca, 0x0ccd},
404 {0x0ce6, 0x0cef}, {0x0d05, 0x0d0c}, {0x0d0e, 0x0d10}, {0x0d12, 0x0d1f},
405 {0x0d21, 0x0d28}, {0x0d2a, 0x0d39}, {0x0d3e, 0x0d43}, {0x0d46, 0x0d48},
406 {0x0d4a, 0x0d4d}, {0x0d66, 0x0d6f}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1},
407 {0x0db3, 0x0dbb}, {0x0dc0, 0x0dc6}, {0x0dcf, 0x0dd4}, {0x0dd8, 0x0ddf},
408 {0x0df2, 0x0df4}, {0x0e01, 0x0e1f}, {0x0e21, 0x0e3a}, {0x0e3f, 0x0e5b},
409 {0x0e94, 0x0e97}, {0x0e99, 0x0e9f}, {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb9},
410 {0x0ebb, 0x0ebd}, {0x0ec0, 0x0ec4}, {0x0ec8, 0x0ecd}, {0x0ed0, 0x0ed9},
411 {0x0f00, 0x0f1f}, {0x0f21, 0x0f47}, {0x0f49, 0x0f6a}, {0x0f71, 0x0f8b},
412 {0x0f90, 0x0f97}, {0x0f99, 0x0fbc}, {0x0fbe, 0x0fcc}, {0x1000, 0x101f},
413 {0x1023, 0x1027}, {0x102c, 0x1032}, {0x1036, 0x1039}, {0x1040, 0x1059},
414 {0x10a0, 0x10c5}, {0x10d0, 0x10f6}, {0x1100, 0x111f}, {0x1121, 0x1159},
415 {0x115f, 0x11a2}, {0x11a8, 0x11f9}, {0x1200, 0x1206}, {0x1208, 0x121f},
416 {0x1221, 0x1246}, {0x124a, 0x124d}, {0x1250, 0x1256}, {0x125a, 0x125d},
417 {0x1260, 0x1286}, {0x128a, 0x128d}, {0x1290, 0x12ae}, {0x12b2, 0x12b5},
418 {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12ce}, {0x12d0, 0x12d6},
419 {0x12d8, 0x12ee}, {0x12f0, 0x130e}, {0x1312, 0x1315}, {0x1318, 0x131e},
420 {0x1321, 0x1346}, {0x1348, 0x135a}, {0x1361, 0x137c}, {0x13a0, 0x13f4},
421 {0x1401, 0x141f}, {0x1421, 0x151f}, {0x1521, 0x161f}, {0x1621, 0x1676},
422 {0x1680, 0x169c}, {0x16a0, 0x16f0}, {0x1780, 0x17dc}, {0x17e0, 0x17e9},
423 {0x1800, 0x180a}, {0x1810, 0x1819}, {0x1821, 0x1877}, {0x1880, 0x18a9},
424 {0x1e00, 0x1e1f}, {0x1e21, 0x1e9b}, {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15},
425 {0x1f18, 0x1f1d}, {0x1f21, 0x1f45}, {0x1f48, 0x1f4d}, {0x1f50, 0x1f57},
426 {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fc4}, {0x1fc6, 0x1fd3},
427 {0x1fd6, 0x1fdb}, {0x1fdd, 0x1fef}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffe},
428 {0x2000, 0x200b}, {0x2010, 0x201f}, {0x2021, 0x2029}, {0x202f, 0x2046},
429 {0x2048, 0x204d}, {0x2074, 0x208e}, {0x20a0, 0x20af}, {0x20d0, 0x20e3},
430 {0x2100, 0x211f}, {0x2121, 0x213a}, {0x2153, 0x2183}, {0x2190, 0x21f3},
431 {0x2200, 0x221f}, {0x2221, 0x22f1}, {0x2300, 0x231f}, {0x2321, 0x237b},
432 {0x237d, 0x239a}, {0x2400, 0x241f}, {0x2421, 0x2426}, {0x2440, 0x244a},
433 {0x2460, 0x24ea}, {0x2500, 0x251f}, {0x2521, 0x2595}, {0x25a0, 0x25f7},
434 {0x2600, 0x2613}, {0x2619, 0x261f}, {0x2621, 0x2671}, {0x2701, 0x2704},
435 {0x2706, 0x2709}, {0x270c, 0x271f}, {0x2721, 0x2727}, {0x2729, 0x274b},
436 {0x274f, 0x2752}, {0x2758, 0x275e}, {0x2761, 0x2767}, {0x2776, 0x2794},
437 {0x2798, 0x27af}, {0x27b1, 0x27be}, {0x2800, 0x281f}, {0x2821, 0x28ff},
438 {0x2e80, 0x2e99}, {0x2e9b, 0x2ef3}, {0x2f00, 0x2f1f}, {0x2f21, 0x2fd5},
439 {0x2ff0, 0x2ffb}, {0x3000, 0x301f}, {0x3021, 0x303a}, {0x3041, 0x3094},
440 {0x3099, 0x309e}, {0x30a1, 0x30fe}, {0x3105, 0x311f}, {0x3121, 0x312c},
441 {0x3131, 0x318e}, {0x3190, 0x31b7}, {0x3200, 0x321c}, {0x3221, 0x3243},
442 {0x3260, 0x327b}, {0x327f, 0x32b0}, {0x32c0, 0x32cb}, {0x32d0, 0x32fe},
443 {0x3300, 0x331f}, {0x3321, 0x3376}, {0x337b, 0x33dd}, {0x33e0, 0x33fe},
444 {0x3400, 0x341f}, {0x3421, 0x351f}, {0x3521, 0x361f}, {0x3621, 0x371f},
445 {0x3721, 0x381f}, {0x3821, 0x391f}, {0x3921, 0x3a1f}, {0x3a21, 0x3b1f},
446 {0x3b21, 0x3c1f}, {0x3c21, 0x3d1f}, {0x3d21, 0x3e1f}, {0x3e21, 0x3f1f},
447 {0x3f21, 0x401f}, {0x4021, 0x411f}, {0x4121, 0x421f}, {0x4221, 0x431f},
448 {0x4321, 0x441f}, {0x4421, 0x451f}, {0x4521, 0x461f}, {0x4621, 0x471f},
449 {0x4721, 0x481f}, {0x4821, 0x491f}, {0x4921, 0x4a1f}, {0x4a21, 0x4b1f},
450 {0x4b21, 0x4c1f}, {0x4c21, 0x4d1f}, {0x4d21, 0x4db5}, {0x4e00, 0x4e1f},
451 {0x4e21, 0x4f1f}, {0x4f21, 0x501f}, {0x5021, 0x511f}, {0x5121, 0x521f},
452 {0x5221, 0x531f}, {0x5321, 0x541f}, {0x5421, 0x551f}, {0x5521, 0x561f},
453 {0x5621, 0x571f}, {0x5721, 0x581f}, {0x5821, 0x591f}, {0x5921, 0x5a1f},
454 {0x5a21, 0x5b1f}, {0x5b21, 0x5c1f}, {0x5c21, 0x5d1f}, {0x5d21, 0x5e1f},
455 {0x5e21, 0x5f1f}, {0x5f21, 0x601f}, {0x6021, 0x611f}, {0x6121, 0x621f},
456 {0x6221, 0x631f}, {0x6321, 0x641f}, {0x6421, 0x651f}, {0x6521, 0x661f},
457 {0x6621, 0x671f}, {0x6721, 0x681f}, {0x6821, 0x691f}, {0x6921, 0x6a1f},
458 {0x6a21, 0x6b1f}, {0x6b21, 0x6c1f}, {0x6c21, 0x6d1f}, {0x6d21, 0x6e1f},
459 {0x6e21, 0x6f1f}, {0x6f21, 0x701f}, {0x7021, 0x711f}, {0x7121, 0x721f},
460 {0x7221, 0x731f}, {0x7321, 0x741f}, {0x7421, 0x751f}, {0x7521, 0x761f},
461 {0x7621, 0x771f}, {0x7721, 0x781f}, {0x7821, 0x791f}, {0x7921, 0x7a1f},
462 {0x7a21, 0x7b1f}, {0x7b21, 0x7c1f}, {0x7c21, 0x7d1f}, {0x7d21, 0x7e1f},
463 {0x7e21, 0x7f1f}, {0x7f21, 0x801f}, {0x8021, 0x811f}, {0x8121, 0x821f},
464 {0x8221, 0x831f}, {0x8321, 0x841f}, {0x8421, 0x851f}, {0x8521, 0x861f},
465 {0x8621, 0x871f}, {0x8721, 0x881f}, {0x8821, 0x891f}, {0x8921, 0x8a1f},
466 {0x8a21, 0x8b1f}, {0x8b21, 0x8c1f}, {0x8c21, 0x8d1f}, {0x8d21, 0x8e1f},
467 {0x8e21, 0x8f1f}, {0x8f21, 0x901f}, {0x9021, 0x911f}, {0x9121, 0x921f},
468 {0x9221, 0x931f}, {0x9321, 0x941f}, {0x9421, 0x951f}, {0x9521, 0x961f},
469 {0x9621, 0x971f}, {0x9721, 0x981f}, {0x9821, 0x991f}, {0x9921, 0x9a1f},
470 {0x9a21, 0x9b1f}, {0x9b21, 0x9c1f}, {0x9c21, 0x9d1f}, {0x9d21, 0x9e1f},
471 {0x9e21, 0x9f1f}, {0x9f21, 0x9fa5}, {0xa000, 0xa01f}, {0xa021, 0xa11f},
472 {0xa121, 0xa21f}, {0xa221, 0xa31f}, {0xa321, 0xa41f}, {0xa421, 0xa48c},
473 {0xa490, 0xa4a1}, {0xa4a4, 0xa4b3}, {0xa4b5, 0xa4c0}, {0xa4c2, 0xa4c4},
474 {0xac00, 0xac1f}, {0xac21, 0xad1f}, {0xad21, 0xae1f}, {0xae21, 0xaf1f},
475 {0xaf21, 0xb01f}, {0xb021, 0xb11f}, {0xb121, 0xb21f}, {0xb221, 0xb31f},
476 {0xb321, 0xb41f}, {0xb421, 0xb51f}, {0xb521, 0xb61f}, {0xb621, 0xb71f},
477 {0xb721, 0xb81f}, {0xb821, 0xb91f}, {0xb921, 0xba1f}, {0xba21, 0xbb1f},
478 {0xbb21, 0xbc1f}, {0xbc21, 0xbd1f}, {0xbd21, 0xbe1f}, {0xbe21, 0xbf1f},
479 {0xbf21, 0xc01f}, {0xc021, 0xc11f}, {0xc121, 0xc21f}, {0xc221, 0xc31f},
480 {0xc321, 0xc41f}, {0xc421, 0xc51f}, {0xc521, 0xc61f}, {0xc621, 0xc71f},
481 {0xc721, 0xc81f}, {0xc821, 0xc91f}, {0xc921, 0xca1f}, {0xca21, 0xcb1f},
482 {0xcb21, 0xcc1f}, {0xcc21, 0xcd1f}, {0xcd21, 0xce1f}, {0xce21, 0xcf1f},
483 {0xcf21, 0xd01f}, {0xd021, 0xd11f}, {0xd121, 0xd21f}, {0xd221, 0xd31f},
484 {0xd321, 0xd41f}, {0xd421, 0xd51f}, {0xd521, 0xd61f}, {0xd621, 0xd71f},
485 {0xd721, 0xd7a3}, {0xf900, 0xf91f}, {0xf921, 0xfa1f}, {0xfa21, 0xfa2d},
486 {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1d, 0xfb1f}, {0xfb21, 0xfb36},
487 {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfc1f}, {0xfc21, 0xfd1f},
488 {0xfd21, 0xfd3f}, {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb},
489 {0xfe21, 0xfe23}, {0xfe30, 0xfe44}, {0xfe49, 0xfe52}, {0xfe54, 0xfe66},
490 {0xfe68, 0xfe6b}, {0xfe70, 0xfe72}, {0xfe76, 0xfefc}, {0xff01, 0xff1f},
491 {0xff21, 0xff5e}, {0xff61, 0xffbe}, {0xffc2, 0xffc7}, {0xffca, 0xffcf},
492 {0xffd2, 0xffd7}, {0xffda, 0xffdc}, {0xffe0, 0xffe6}, {0xffe8, 0xffee},
493 {0xfffc, 0xffff}
494 };
495
496 #define NUM_GRAPH_RANGE (sizeof(graphRangeTable)/sizeof(crange))
497
498 static chr graphCharTable[] = {
499 0x0374, 0x0375, 0x037a, 0x037e, 0x038c, 0x0488, 0x0489, 0x04c7, 0x04c8,
500 0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0589, 0x058a, 0x060c, 0x061b, 0x061f,
501 0x098f, 0x0990, 0x09b2, 0x09bc, 0x09c7, 0x09c8, 0x09d7, 0x09dc, 0x09dd,
502 0x0a02, 0x0a0f, 0x0a10, 0x0a32, 0x0a33, 0x0a35, 0x0a36, 0x0a38, 0x0a39,
503 0x0a3c, 0x0a47, 0x0a48, 0x0a5e, 0x0a8d, 0x0ab2, 0x0ab3, 0x0ad0, 0x0ae0,
504 0x0b0f, 0x0b10, 0x0b32, 0x0b33, 0x0b47, 0x0b48, 0x0b56, 0x0b57, 0x0b5c,
505 0x0b5d, 0x0b82, 0x0b83, 0x0b99, 0x0b9a, 0x0b9c, 0x0b9e, 0x0b9f, 0x0ba3,
506 0x0ba4, 0x0bd7, 0x0c55, 0x0c56, 0x0c60, 0x0c61, 0x0c82, 0x0c83, 0x0cd5,
507 0x0cd6, 0x0cde, 0x0ce0, 0x0ce1, 0x0d02, 0x0d03, 0x0d57, 0x0d60, 0x0d61,
508 0x0d82, 0x0d83, 0x0dbd, 0x0dca, 0x0dd6, 0x0e81, 0x0e82, 0x0e84, 0x0e87,
509 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0ec6, 0x0edc,
510 0x0edd, 0x0fcf, 0x1021, 0x1029, 0x102a, 0x10fb, 0x1248, 0x1258, 0x1288,
511 0x12b0, 0x12c0, 0x1310, 0x1f59, 0x1f5b, 0x1f5d, 0x2070, 0x274d, 0x2756,
512 0x303e, 0x303f, 0xa4c6, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfe74
513 };
514
515 #define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(chr))
516
517 /*
518 * End of auto-generated Unicode character ranges declarations.
519 */
520
521 #define CH NOCELT
522
523 /*
524 - nmcces - how many distinct MCCEs are there?
525 ^ static int nmcces(struct vars *);
526 */
527 static int
528 nmcces(v)
529 struct vars *v; /* context */
530 {
531 /*
532 * No multi-character collating elements defined at the moment.
533 */
534 return 0;
535 }
536
537 /*
538 - nleaders - how many chrs can be first chrs of MCCEs?
539 ^ static int nleaders(struct vars *);
540 */
541 static int
542 nleaders(v)
543 struct vars *v; /* context */
544 {
545 return 0;
546 }
547
548 /*
549 - allmcces - return a cvec with all the MCCEs of the locale
550 ^ static struct cvec *allmcces(struct vars *, struct cvec *);
551 */
552 static struct cvec *
553 allmcces(v, cv)
554 struct vars *v; /* context */
555 struct cvec *cv; /* this is supposed to have enough room */
556 {
557 return clearcvec(cv);
558 }
559
560 /*
561 - element - map collating-element name to celt
562 ^ static celt element(struct vars *, chr *, chr *);
563 */
564 static celt
565 element(v, startp, endp)
566 struct vars *v; /* context */
567 chr *startp; /* points to start of name */
568 chr *endp; /* points just past end of name */
569 {
570 struct cname *cn;
571 size_t len;
572 Tcl_DString ds;
573 CONST char *np;
574
575 /* generic: one-chr names stand for themselves */
576 assert(startp < endp);
577 len = endp - startp;
578 if (len == 1) {
579 return *startp;
580 }
581
582 NOTE(REG_ULOCALE);
583
584 /* search table */
585 Tcl_DStringInit(&ds);
586 np = Tcl_UniCharToUtfDString(startp, (int)len, &ds);
587 for (cn=cnames; cn->name!=NULL; cn++) {
588 if (strlen(cn->name)==len && strncmp(cn->name, np, len)==0) {
589 break; /* NOTE BREAK OUT */
590 }
591 }
592 Tcl_DStringFree(&ds);
593 if (cn->name != NULL) {
594 return CHR(cn->code);
595 }
596
597 /* couldn't find it */
598 ERR(REG_ECOLLATE);
599 return 0;
600 }
601
602 /*
603 - range - supply cvec for a range, including legality check
604 ^ static struct cvec *range(struct vars *, celt, celt, int);
605 */
606 static struct cvec *
607 range(v, a, b, cases)
608 struct vars *v; /* context */
609 celt a; /* range start */
610 celt b; /* range end, might equal a */
611 int cases; /* case-independent? */
612 {
613 int nchrs;
614 struct cvec *cv;
615 celt c, lc, uc, tc;
616
617 if (a != b && !before(a, b)) {
618 ERR(REG_ERANGE);
619 return NULL;
620 }
621
622 if (!cases) { /* easy version */
623 cv = getcvec(v, 0, 1, 0);
624 NOERRN();
625 addrange(cv, a, b);
626 return cv;
627 }
628
629 /*
630 * When case-independent, it's hard to decide when cvec ranges are
631 * usable, so for now at least, we won't try. We allocate enough
632 * space for two case variants plus a little extra for the two
633 * title case variants.
634 */
635
636 nchrs = (b - a + 1)*2 + 4;
637
638 cv = getcvec(v, nchrs, 0, 0);
639 NOERRN();
640
641 for (c=a; c<=b; c++) {
642 addchr(cv, c);
643 lc = Tcl_UniCharToLower((chr)c);
644 uc = Tcl_UniCharToUpper((chr)c);
645 tc = Tcl_UniCharToTitle((chr)c);
646 if (c != lc) {
647 addchr(cv, lc);
648 }
649 if (c != uc) {
650 addchr(cv, uc);
651 }
652 if (c != tc && tc != uc) {
653 addchr(cv, tc);
654 }
655 }
656
657 return cv;
658 }
659
660 /*
661 - before - is celt x before celt y, for purposes of range legality?
662 ^ static int before(celt, celt);
663 */
664 static int /* predicate */
665 before(x, y)
666 celt x, y; /* collating elements */
667 {
668 /* trivial because no MCCEs */
669 if (x < y) {
670 return 1;
671 }
672 return 0;
673 }
674
675 /*
676 - eclass - supply cvec for an equivalence class
677 * Must include case counterparts on request.
678 ^ static struct cvec *eclass(struct vars *, celt, int);
679 */
680 static struct cvec *
681 eclass(v, c, cases)
682 struct vars *v; /* context */
683 celt c; /* Collating element representing
684 * the equivalence class. */
685 int cases; /* all cases? */
686 {
687 struct cvec *cv;
688
689 /* crude fake equivalence class for testing */
690 if ((v->cflags&REG_FAKE) && c == 'x') {
691 cv = getcvec(v, 4, 0, 0);
692 addchr(cv, (chr)'x');
693 addchr(cv, (chr)'y');
694 if (cases) {
695 addchr(cv, (chr)'X');
696 addchr(cv, (chr)'Y');
697 }
698 return cv;
699 }
700
701 /* otherwise, none */
702 if (cases) {
703 return allcases(v, c);
704 }
705 cv = getcvec(v, 1, 0, 0);
706 assert(cv != NULL);
707 addchr(cv, (chr)c);
708 return cv;
709 }
710
711 /*
712 - cclass - supply cvec for a character class
713 * Must include case counterparts on request.
714 ^ static struct cvec *cclass(struct vars *, chr *, chr *, int);
715 */
716 static struct cvec *
717 cclass(v, startp, endp, cases)
718 struct vars *v; /* context */
719 chr *startp; /* where the name starts */
720 chr *endp; /* just past the end of the name */
721 int cases; /* case-independent? */
722 {
723 size_t len;
724 struct cvec *cv = NULL;
725 Tcl_DString ds;
726 CONST char *np;
727 char **namePtr;
728 int i, index;
729
730 /*
731 * The following arrays define the valid character class names.
732 */
733
734 static char *classNames[] = {
735 "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
736 "lower", "print", "punct", "space", "upper", "xdigit", NULL
737 };
738
739 enum classes {
740 CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
741 CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT
742 };
743
744
745 /*
746 * Extract the class name
747 */
748
749 len = endp - startp;
750 Tcl_DStringInit(&ds);
751 np = Tcl_UniCharToUtfDString(startp, (int)len, &ds);
752
753 /*
754 * Remap lower and upper to alpha if the match is case insensitive.
755 */
756
757 if (cases && len == 5 && (strncmp("lower", np, 5) == 0
758 || strncmp("upper", np, 5) == 0)) {
759 np = "alpha";
760 }
761
762 /*
763 * Map the name to the corresponding enumerated value.
764 */
765
766 index = -1;
767 for (namePtr=classNames,i=0 ; *namePtr!=NULL ; namePtr++,i++) {
768 if ((strlen(*namePtr) == len) && (strncmp(*namePtr, np, len) == 0)) {
769 index = i;
770 break;
771 }
772 }
773 Tcl_DStringInit(&ds);
774 if (index == -1) {
775 ERR(REG_ECTYPE);
776 return NULL;
777 }
778
779 /*
780 * Now compute the character class contents.
781 */
782
783 switch((enum classes) index) {
784 case CC_PRINT:
785 case CC_ALNUM:
786 cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE, 0);
787 if (cv) {
788 for (i=0 ; i<NUM_ALPHA_CHAR ; i++) {
789 addchr(cv, alphaCharTable[i]);
790 }
791 for (i=0 ; i<NUM_ALPHA_RANGE ; i++) {
792 addrange(cv, alphaRangeTable[i].start,
793 alphaRangeTable[i].end);
794 }
795 for (i=0 ; i<NUM_DIGIT_RANGE ; i++) {
796 addrange(cv, digitRangeTable[i].start,
797 digitRangeTable[i].end);
798 }
799 }
800 break;
801 case CC_ALPHA:
802 cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE, 0);
803 if (cv) {
804 for (i=0 ; i<NUM_ALPHA_RANGE ; i++) {
805 addrange(cv, alphaRangeTable[i].start,
806 alphaRangeTable[i].end);
807 }
808 for (i=0 ; i<NUM_ALPHA_CHAR ; i++) {
809 addchr(cv, alphaCharTable[i]);
810 }
811 }
812 break;
813 case CC_ASCII:
814 cv = getcvec(v, 0, 1, 0);
815 if (cv) {
816 addrange(cv, 0, 0x7f);
817 }
818 break;
819 case CC_BLANK:
820 cv = getcvec(v, 2, 0, 0);
821 addchr(cv, '\t');
822 addchr(cv, ' ');
823 break;
824 case CC_CNTRL:
825 cv = getcvec(v, 0, 2, 0);
826 addrange(cv, 0x0, 0x1f);
827 addrange(cv, 0x7f, 0x9f);
828 break;
829 case CC_DIGIT:
830 cv = getcvec(v, 0, NUM_DIGIT_RANGE, 0);
831 if (cv) {
832 for (i=0 ; i<NUM_DIGIT_RANGE ; i++) {
833 addrange(cv, digitRangeTable[i].start,
834 digitRangeTable[i].end);
835 }
836 }
837 break;
838 case CC_PUNCT:
839 cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE, 0);
840 if (cv) {
841 for (i=0 ; i<NUM_PUNCT_RANGE ; i++) {
842 addrange(cv, punctRangeTable[i].start,
843 punctRangeTable[i].end);
844 }
845 for (i=0 ; i<NUM_PUNCT_CHAR ; i++) {
846 addchr(cv, punctCharTable[i]);
847 }
848 }
849 break;
850 case CC_XDIGIT:
851 /*
852 * This is a 3 instead of (NUM_DIGIT_RANGE+2) because I've no
853 * idea how to define the digits 'a' through 'f' in
854 * non-western locales. The concept is quite possibly non
855 * portable, or only used in contextx where the characters
856 * used would be the western ones anyway! Whatever is
857 * actually the case, the number of ranges is fixed (until
858 * someone comes up with a better arrangement!)
859 */
860 cv = getcvec(v, 0, 3, 0);
861 if (cv) {
862 addrange(cv, '0', '9');
863 addrange(cv, 'a', 'f');
864 addrange(cv, 'A', 'F');
865 }
866 break;
867 case CC_SPACE:
868 cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE, 0);
869 if (cv) {
870 for (i=0 ; i<NUM_SPACE_RANGE ; i++) {
871 addrange(cv, spaceRangeTable[i].start,
872 spaceRangeTable[i].end);
873 }
874 for (i=0 ; i<NUM_SPACE_CHAR ; i++) {
875 addchr(cv, spaceCharTable[i]);
876 }
877 }
878 break;
879 case CC_LOWER:
880 cv = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE, 0);
881 if (cv) {
882 for (i=0 ; i<NUM_LOWER_RANGE ; i++) {
883 addrange(cv, lowerRangeTable[i].start,
884 lowerRangeTable[i].end);
885 }
886 for (i=0 ; i<NUM_LOWER_CHAR ; i++) {
887 addchr(cv, lowerCharTable[i]);
888 }
889 }
890 break;
891 case CC_UPPER:
892 cv = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE, 0);
893 if (cv) {
894 for (i=0 ; i<NUM_UPPER_RANGE ; i++) {
895 addrange(cv, upperRangeTable[i].start,
896 upperRangeTable[i].end);
897 }
898 for (i=0 ; i<NUM_UPPER_CHAR ; i++) {
899 addchr(cv, upperCharTable[i]);
900 }
901 }
902 break;
903 case CC_GRAPH:
904 cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE, 0);
905 if (cv) {
906 for (i=0 ; i<NUM_GRAPH_RANGE ; i++) {
907 addrange(cv, graphRangeTable[i].start,
908 graphRangeTable[i].end);
909 }
910 for (i=0 ; i<NUM_GRAPH_CHAR ; i++) {
911 addchr(cv, graphCharTable[i]);
912 }
913 }
914 break;
915 }
916 if (cv == NULL) {
917 ERR(REG_ESPACE);
918 }
919 return cv;
920 }
921
922 /*
923 - allcases - supply cvec for all case counterparts of a chr (including itself)
924 * This is a shortcut, preferably an efficient one, for simple characters;
925 * messy cases are done via range().
926 ^ static struct cvec *allcases(struct vars *, pchr);
927 */
928 static struct cvec *
929 allcases(v, pc)
930 struct vars *v; /* context */
931 pchr pc; /* character to get case equivs of */
932 {
933 struct cvec *cv;
934 chr c = (chr)pc;
935 chr lc, uc, tc;
936
937 lc = Tcl_UniCharToLower((chr)c);
938 uc = Tcl_UniCharToUpper((chr)c);
939 tc = Tcl_UniCharToTitle((chr)c);
940
941 if (tc != uc) {
942 cv = getcvec(v, 3, 0, 0);
943 addchr(cv, tc);
944 } else {
945 cv = getcvec(v, 2, 0, 0);
946 }
947 addchr(cv, lc);
948 if (lc != uc) {
949 addchr(cv, uc);
950 }
951 return cv;
952 }
953
954 /*
955 - cmp - chr-substring compare
956 * Backrefs need this. It should preferably be efficient.
957 * Note that it does not need to report anything except equal/unequal.
958 * Note also that the length is exact, and the comparison should not
959 * stop at embedded NULs!
960 ^ static int cmp(CONST chr *, CONST chr *, size_t);
961 */
962 static int /* 0 for equal, nonzero for unequal */
963 cmp(x, y, len)
964 CONST chr *x, *y; /* strings to compare */
965 size_t len; /* exact length of comparison */
966 {
967 return memcmp(VS(x), VS(y), len*sizeof(chr));
968 }
969
970 /*
971 - casecmp - case-independent chr-substring compare
972 * REG_ICASE backrefs need this. It should preferably be efficient.
973 * Note that it does not need to report anything except equal/unequal.
974 * Note also that the length is exact, and the comparison should not
975 * stop at embedded NULs!
976 ^ static int casecmp(CONST chr *, CONST chr *, size_t);
977 */
978 static int /* 0 for equal, nonzero for unequal */
979 casecmp(x, y, len)
980 CONST chr *x, *y; /* strings to compare */
981 size_t len; /* exact length of comparison */
982 {
983 for (; len > 0; len--, x++, y++) {
984 if ((*x!=*y) && (Tcl_UniCharToLower(*x) != Tcl_UniCharToLower(*y))) {
985 return 1;
986 }
987 }
988 return 0;
989 }