]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
4 | * Copyright (C) 1999-2003, International Business Machines | |
5 | * Corporation and others. All Rights Reserved. | |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: genprops.c | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 1999dec08 | |
14 | * created by: Markus W. Scherer | |
15 | * | |
16 | * This program reads several of the Unicode character database text files, | |
17 | * parses them, and extracts most of the properties for each character. | |
18 | * It then writes a binary file containing the properties | |
19 | * that is designed to be used directly for random-access to | |
20 | * the properties of each Unicode character. | |
21 | */ | |
22 | ||
23 | #include <stdio.h> | |
24 | #include <stdlib.h> | |
25 | #include "unicode/utypes.h" | |
26 | #include "unicode/uchar.h" | |
27 | #include "unicode/uset.h" | |
28 | #include "unicode/putil.h" | |
374ca955 | 29 | #include "unicode/uclean.h" |
b75a7d8f A |
30 | #include "cmemory.h" |
31 | #include "cstring.h" | |
32 | #include "unewdata.h" | |
33 | #include "uoptions.h" | |
34 | #include "uparse.h" | |
35 | #include "uprops.h" | |
36 | #include "propsvec.h" | |
37 | ||
38 | U_CDECL_BEGIN | |
39 | #include "genprops.h" | |
40 | U_CDECL_END | |
41 | ||
42 | #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0])) | |
43 | ||
44 | UBool beVerbose=FALSE, haveCopyright=TRUE; | |
45 | ||
46 | /* | |
47 | * Unicode set collecting the case-sensitive characters; | |
48 | * see uchar.h UCHAR_CASE_SENSITIVE. | |
49 | * Add code points from case mappings/foldings in | |
50 | * the root locale and with default options. | |
51 | */ | |
52 | static USet *caseSensitive; | |
53 | ||
54 | /* prototypes --------------------------------------------------------------- */ | |
55 | ||
56 | static void | |
57 | parseBidiMirroring(const char *filename, UErrorCode *pErrorCode); | |
58 | ||
59 | static void | |
60 | parseSpecialCasing(const char *filename, UErrorCode *pErrorCode); | |
61 | ||
62 | static void | |
63 | parseCaseFolding(const char *filename, UErrorCode *pErrorCode); | |
64 | ||
65 | static void | |
66 | parseDB(const char *filename, UErrorCode *pErrorCode); | |
67 | ||
68 | /* -------------------------------------------------------------------------- */ | |
69 | ||
374ca955 A |
70 | |
71 | enum | |
72 | { | |
73 | HELP_H, | |
74 | HELP_QUESTION_MARK, | |
75 | VERBOSE, | |
76 | COPYRIGHT, | |
77 | DESTDIR, | |
78 | SOURCEDIR, | |
79 | UNICODE_VERSION, | |
80 | ICUDATADIR | |
81 | }; | |
82 | ||
83 | /* Keep these values in sync with the above enums */ | |
b75a7d8f A |
84 | static UOption options[]={ |
85 | UOPTION_HELP_H, | |
86 | UOPTION_HELP_QUESTION_MARK, | |
87 | UOPTION_VERBOSE, | |
88 | UOPTION_COPYRIGHT, | |
89 | UOPTION_DESTDIR, | |
90 | UOPTION_SOURCEDIR, | |
374ca955 A |
91 | { "unicode", NULL, NULL, NULL, 'u', UOPT_REQUIRES_ARG, 0 }, |
92 | UOPTION_ICUDATADIR | |
b75a7d8f A |
93 | }; |
94 | ||
95 | extern int | |
96 | main(int argc, char* argv[]) { | |
97 | char filename[300]; | |
98 | const char *srcDir=NULL, *destDir=NULL, *suffix=NULL; | |
99 | char *basename=NULL; | |
100 | UErrorCode errorCode=U_ZERO_ERROR; | |
101 | ||
102 | U_MAIN_INIT_ARGS(argc, argv); | |
103 | ||
104 | /* preset then read command line options */ | |
374ca955 A |
105 | options[DESTDIR].value=u_getDataDirectory(); |
106 | options[SOURCEDIR].value=""; | |
107 | options[UNICODE_VERSION].value=""; | |
108 | options[ICUDATADIR].value=u_getDataDirectory(); | |
b75a7d8f A |
109 | argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); |
110 | ||
111 | /* error handling, printing usage message */ | |
112 | if(argc<0) { | |
113 | fprintf(stderr, | |
114 | "error in command line argument \"%s\"\n", | |
115 | argv[-argc]); | |
116 | } | |
374ca955 | 117 | if(argc<0 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur) { |
b75a7d8f A |
118 | /* |
119 | * Broken into chucks because the C89 standard says the minimum | |
120 | * required supported string length is 509 bytes. | |
121 | */ | |
122 | fprintf(stderr, | |
123 | "Usage: %s [-options] [suffix]\n" | |
124 | "\n" | |
125 | "read the UnicodeData.txt file and other Unicode properties files and\n" | |
126 | "create a binary file " DATA_NAME "." DATA_TYPE " with the character properties\n" | |
127 | "\n", | |
128 | argv[0]); | |
129 | fprintf(stderr, | |
130 | "Options:\n" | |
131 | "\t-h or -? or --help this usage text\n" | |
132 | "\t-v or --verbose verbose output\n" | |
133 | "\t-c or --copyright include a copyright notice\n" | |
134 | "\t-u or --unicode Unicode version, followed by the version like 3.0.0\n"); | |
135 | fprintf(stderr, | |
136 | "\t-d or --destdir destination directory, followed by the path\n" | |
137 | "\t-s or --sourcedir source directory, followed by the path\n" | |
374ca955 A |
138 | "\t-i or --icudatadir directory for locating any needed intermediate data files,\n" |
139 | "\t followed by path, defaults to %s\n" | |
b75a7d8f A |
140 | "\tsuffix suffix that is to be appended with a '-'\n" |
141 | "\t to the source file basenames before opening;\n" | |
374ca955 A |
142 | "\t 'genprops new' will read UnicodeData-new.txt etc.\n", |
143 | u_getDataDirectory()); | |
b75a7d8f A |
144 | return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; |
145 | } | |
146 | ||
147 | /* get the options values */ | |
374ca955 A |
148 | beVerbose=options[VERBOSE].doesOccur; |
149 | haveCopyright=options[COPYRIGHT].doesOccur; | |
150 | srcDir=options[SOURCEDIR].value; | |
151 | destDir=options[DESTDIR].value; | |
b75a7d8f A |
152 | |
153 | if(argc>=2) { | |
154 | suffix=argv[1]; | |
155 | } else { | |
156 | suffix=NULL; | |
157 | } | |
158 | ||
374ca955 A |
159 | if(options[UNICODE_VERSION].doesOccur) { |
160 | setUnicodeVersion(options[UNICODE_VERSION].value); | |
b75a7d8f A |
161 | } |
162 | /* else use the default dataVersion in store.c */ | |
163 | ||
374ca955 A |
164 | if (options[ICUDATADIR].doesOccur) { |
165 | u_setDataDirectory(options[ICUDATADIR].value); | |
166 | } | |
167 | ||
b75a7d8f A |
168 | /* prepare the filename beginning with the source dir */ |
169 | uprv_strcpy(filename, srcDir); | |
170 | basename=filename+uprv_strlen(filename); | |
171 | if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) { | |
172 | *basename++=U_FILE_SEP_CHAR; | |
173 | } | |
174 | ||
175 | /* initialize */ | |
176 | initStore(); | |
177 | caseSensitive=uset_open(1, 0); /* empty set (start>end) */ | |
178 | ||
179 | /* process BidiMirroring.txt */ | |
180 | writeUCDFilename(basename, "BidiMirroring", suffix); | |
181 | parseBidiMirroring(filename, &errorCode); | |
182 | ||
183 | /* process SpecialCasing.txt */ | |
184 | writeUCDFilename(basename, "SpecialCasing", suffix); | |
185 | parseSpecialCasing(filename, &errorCode); | |
186 | ||
187 | /* process CaseFolding.txt */ | |
188 | writeUCDFilename(basename, "CaseFolding", suffix); | |
189 | parseCaseFolding(filename, &errorCode); | |
190 | ||
191 | /* process UnicodeData.txt */ | |
192 | writeUCDFilename(basename, "UnicodeData", suffix); | |
193 | parseDB(filename, &errorCode); | |
194 | ||
195 | /* process additional properties files */ | |
196 | *basename=0; | |
197 | generateAdditionalProperties(filename, suffix, &errorCode); | |
198 | ||
199 | /* process parsed data */ | |
200 | if(U_SUCCESS(errorCode)) { | |
201 | /* write the properties data file */ | |
202 | generateData(destDir); | |
203 | } | |
204 | ||
374ca955 | 205 | u_cleanup(); |
b75a7d8f A |
206 | return errorCode; |
207 | } | |
208 | ||
209 | U_CFUNC void | |
210 | writeUCDFilename(char *basename, const char *filename, const char *suffix) { | |
374ca955 | 211 | int32_t length=(int32_t)uprv_strlen(filename); |
b75a7d8f A |
212 | uprv_strcpy(basename, filename); |
213 | if(suffix!=NULL) { | |
214 | basename[length++]='-'; | |
215 | uprv_strcpy(basename+length, suffix); | |
374ca955 | 216 | length+=(int32_t)uprv_strlen(suffix); |
b75a7d8f A |
217 | } |
218 | uprv_strcpy(basename+length, ".txt"); | |
219 | } | |
220 | ||
221 | U_CFUNC UBool | |
222 | isToken(const char *token, const char *s) { | |
223 | const char *z; | |
224 | int32_t j; | |
225 | ||
226 | s=u_skipWhitespace(s); | |
227 | for(j=0;; ++j) { | |
228 | if(token[j]!=0) { | |
229 | if(s[j]!=token[j]) { | |
230 | break; | |
231 | } | |
232 | } else { | |
233 | z=u_skipWhitespace(s+j); | |
234 | if(*z==';' || *z==0) { | |
235 | return TRUE; | |
236 | } else { | |
237 | break; | |
238 | } | |
239 | } | |
240 | } | |
241 | ||
242 | return FALSE; | |
243 | } | |
244 | ||
245 | U_CFUNC int32_t | |
246 | getTokenIndex(const char *const tokens[], int32_t countTokens, const char *s) { | |
247 | const char *t, *z; | |
248 | int32_t i, j; | |
249 | ||
250 | s=u_skipWhitespace(s); | |
251 | for(i=0; i<countTokens; ++i) { | |
252 | t=tokens[i]; | |
253 | if(t!=NULL) { | |
254 | for(j=0;; ++j) { | |
255 | if(t[j]!=0) { | |
256 | if(s[j]!=t[j]) { | |
257 | break; | |
258 | } | |
259 | } else { | |
260 | z=u_skipWhitespace(s+j); | |
261 | if(*z==';' || *z==0 || *z=='#' || *z=='\r' || *z=='\n') { | |
262 | return i; | |
263 | } else { | |
264 | break; | |
265 | } | |
266 | } | |
267 | } | |
268 | } | |
269 | } | |
270 | return -1; | |
271 | } | |
272 | ||
273 | static void | |
274 | _set_addAll(USet *set, const UChar *s, int32_t length) { | |
275 | UChar32 c; | |
276 | int32_t i; | |
277 | ||
278 | /* needs length>=0 */ | |
279 | for(i=0; i<length; /* U16_NEXT advances i */) { | |
280 | U16_NEXT(s, i, length, c); | |
281 | uset_add(set, c); | |
282 | } | |
283 | } | |
284 | ||
285 | /* parser for BidiMirroring.txt --------------------------------------------- */ | |
286 | ||
287 | #define MAX_MIRROR_COUNT 2000 | |
288 | ||
289 | static uint32_t mirrorMappings[MAX_MIRROR_COUNT][2]; | |
290 | static int32_t mirrorCount=0; | |
291 | ||
292 | static void U_CALLCONV | |
293 | mirrorLineFn(void *context, | |
294 | char *fields[][2], int32_t fieldCount, | |
295 | UErrorCode *pErrorCode) { | |
296 | char *end; | |
297 | static uint32_t prevCode=0; | |
298 | ||
299 | mirrorMappings[mirrorCount][0]=(uint32_t)uprv_strtoul(fields[0][0], &end, 16); | |
300 | if(end<=fields[0][0] || end!=fields[0][1]) { | |
301 | fprintf(stderr, "genprops: syntax error in BidiMirroring.txt field 0 at %s\n", fields[0][0]); | |
302 | *pErrorCode=U_PARSE_ERROR; | |
303 | exit(U_PARSE_ERROR); | |
304 | } | |
305 | ||
306 | mirrorMappings[mirrorCount][1]=(uint32_t)uprv_strtoul(fields[1][0], &end, 16); | |
307 | if(end<=fields[1][0] || end!=fields[1][1]) { | |
308 | fprintf(stderr, "genprops: syntax error in BidiMirroring.txt field 1 at %s\n", fields[1][0]); | |
309 | *pErrorCode=U_PARSE_ERROR; | |
310 | exit(U_PARSE_ERROR); | |
311 | } | |
312 | ||
313 | /* check that the code points (mirrorMappings[mirrorCount][0]) are in ascending order */ | |
314 | if(mirrorMappings[mirrorCount][0]<=prevCode && mirrorMappings[mirrorCount][0]>0) { | |
315 | fprintf(stderr, "genprops: error - BidiMirroring entries out of order, U+%04lx after U+%04lx\n", | |
316 | (unsigned long)mirrorMappings[mirrorCount][0], | |
317 | (unsigned long)prevCode); | |
318 | *pErrorCode=U_PARSE_ERROR; | |
319 | exit(U_PARSE_ERROR); | |
320 | } | |
321 | prevCode=mirrorMappings[mirrorCount][0]; | |
322 | ||
323 | if(++mirrorCount==MAX_MIRROR_COUNT) { | |
324 | fprintf(stderr, "genprops: too many mirror mappings\n"); | |
325 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
326 | exit(U_INDEX_OUTOFBOUNDS_ERROR); | |
327 | } | |
328 | } | |
329 | ||
330 | static void | |
331 | parseBidiMirroring(const char *filename, UErrorCode *pErrorCode) { | |
332 | char *fields[2][2]; | |
333 | ||
334 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
335 | return; | |
336 | } | |
337 | ||
338 | u_parseDelimitedFile(filename, ';', fields, 2, mirrorLineFn, NULL, pErrorCode); | |
339 | } | |
340 | ||
341 | /* parser for SpecialCasing.txt --------------------------------------------- */ | |
342 | ||
343 | #define MAX_SPECIAL_CASING_COUNT 500 | |
344 | ||
345 | static SpecialCasing specialCasings[MAX_SPECIAL_CASING_COUNT]; | |
346 | static int32_t specialCasingCount=0; | |
347 | ||
348 | static void U_CALLCONV | |
349 | specialCasingLineFn(void *context, | |
350 | char *fields[][2], int32_t fieldCount, | |
351 | UErrorCode *pErrorCode) { | |
352 | char *end; | |
353 | ||
354 | /* get code point */ | |
355 | specialCasings[specialCasingCount].code=(uint32_t)uprv_strtoul(u_skipWhitespace(fields[0][0]), &end, 16); | |
356 | end=(char *)u_skipWhitespace(end); | |
357 | if(end<=fields[0][0] || end!=fields[0][1]) { | |
358 | fprintf(stderr, "genprops: syntax error in SpecialCasing.txt field 0 at %s\n", fields[0][0]); | |
359 | *pErrorCode=U_PARSE_ERROR; | |
360 | exit(U_PARSE_ERROR); | |
361 | } | |
362 | ||
363 | /* is this a complex mapping? */ | |
364 | if(*(end=(char *)u_skipWhitespace(fields[4][0]))!=0 && *end!=';' && *end!='#') { | |
365 | /* there is some condition text in the fifth field */ | |
366 | specialCasings[specialCasingCount].isComplex=TRUE; | |
367 | ||
368 | /* do not store any actual mappings for this */ | |
369 | specialCasings[specialCasingCount].lowerCase[0]=0; | |
370 | specialCasings[specialCasingCount].upperCase[0]=0; | |
371 | specialCasings[specialCasingCount].titleCase[0]=0; | |
372 | } else { | |
373 | /* just set the "complex" flag and get the case mappings */ | |
374 | specialCasings[specialCasingCount].isComplex=FALSE; | |
375 | specialCasings[specialCasingCount].lowerCase[0]= | |
376 | (UChar)u_parseString(fields[1][0], specialCasings[specialCasingCount].lowerCase+1, 31, NULL, pErrorCode); | |
377 | specialCasings[specialCasingCount].upperCase[0]= | |
378 | (UChar)u_parseString(fields[3][0], specialCasings[specialCasingCount].upperCase+1, 31, NULL, pErrorCode); | |
379 | specialCasings[specialCasingCount].titleCase[0]= | |
380 | (UChar)u_parseString(fields[2][0], specialCasings[specialCasingCount].titleCase+1, 31, NULL, pErrorCode); | |
381 | if(U_FAILURE(*pErrorCode)) { | |
382 | fprintf(stderr, "genprops: error parsing special casing at %s\n", fields[0][0]); | |
383 | exit(*pErrorCode); | |
384 | } | |
385 | ||
386 | uset_add(caseSensitive, (UChar32)specialCasings[specialCasingCount].code); | |
387 | _set_addAll(caseSensitive, specialCasings[specialCasingCount].lowerCase+1, specialCasings[specialCasingCount].lowerCase[0]); | |
388 | _set_addAll(caseSensitive, specialCasings[specialCasingCount].upperCase+1, specialCasings[specialCasingCount].upperCase[0]); | |
389 | _set_addAll(caseSensitive, specialCasings[specialCasingCount].titleCase+1, specialCasings[specialCasingCount].titleCase[0]); | |
390 | } | |
391 | ||
392 | if(++specialCasingCount==MAX_SPECIAL_CASING_COUNT) { | |
393 | fprintf(stderr, "genprops: too many special casing mappings\n"); | |
394 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
395 | exit(U_INDEX_OUTOFBOUNDS_ERROR); | |
396 | } | |
397 | } | |
398 | ||
399 | static int | |
400 | compareSpecialCasings(const void *left, const void *right) { | |
401 | return ((const SpecialCasing *)left)->code-((const SpecialCasing *)right)->code; | |
402 | } | |
403 | ||
404 | static void | |
405 | parseSpecialCasing(const char *filename, UErrorCode *pErrorCode) { | |
406 | char *fields[5][2]; | |
407 | int32_t i, j; | |
408 | ||
409 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
410 | return; | |
411 | } | |
412 | ||
413 | u_parseDelimitedFile(filename, ';', fields, 5, specialCasingLineFn, NULL, pErrorCode); | |
414 | ||
415 | /* sort the special casing entries by code point */ | |
416 | if(specialCasingCount>0) { | |
417 | qsort(specialCasings, specialCasingCount, sizeof(SpecialCasing), compareSpecialCasings); | |
418 | } | |
419 | ||
420 | /* replace multiple entries for any code point by one "complex" one */ | |
421 | j=0; | |
422 | for(i=1; i<specialCasingCount; ++i) { | |
423 | if(specialCasings[i-1].code==specialCasings[i].code) { | |
424 | /* there is a duplicate code point */ | |
425 | specialCasings[i-1].code=0x7fffffff; /* remove this entry in the following qsort */ | |
426 | specialCasings[i].isComplex=TRUE; /* make the following one complex */ | |
427 | specialCasings[i].lowerCase[0]=0; | |
428 | specialCasings[i].upperCase[0]=0; | |
429 | specialCasings[i].titleCase[0]=0; | |
430 | ++j; | |
431 | } | |
432 | } | |
433 | ||
434 | /* if some entries just were removed, then re-sort */ | |
435 | if(j>0) { | |
436 | qsort(specialCasings, specialCasingCount, sizeof(SpecialCasing), compareSpecialCasings); | |
437 | specialCasingCount-=j; | |
438 | } | |
439 | ||
440 | /* | |
441 | * Add one complex mapping to caseSensitive that was filtered out above: | |
442 | * Greek final Sigma has a conditional mapping but not locale-sensitive, | |
443 | * and it is taken when lowercasing just U+03A3 alone. | |
444 | * 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA | |
445 | */ | |
446 | uset_add(caseSensitive, 0x3c2); | |
447 | } | |
448 | ||
449 | /* parser for CaseFolding.txt ----------------------------------------------- */ | |
450 | ||
451 | #define MAX_CASE_FOLDING_COUNT 2000 | |
452 | ||
453 | static CaseFolding caseFoldings[MAX_CASE_FOLDING_COUNT]; | |
454 | static int32_t caseFoldingCount=0; | |
455 | ||
456 | static void U_CALLCONV | |
457 | caseFoldingLineFn(void *context, | |
458 | char *fields[][2], int32_t fieldCount, | |
459 | UErrorCode *pErrorCode) { | |
460 | char *end; | |
461 | static uint32_t prevCode=0; | |
462 | int32_t count; | |
463 | char status; | |
464 | ||
465 | /* get code point */ | |
466 | caseFoldings[caseFoldingCount].code=(uint32_t)uprv_strtoul(u_skipWhitespace(fields[0][0]), &end, 16); | |
467 | end=(char *)u_skipWhitespace(end); | |
468 | if(end<=fields[0][0] || end!=fields[0][1]) { | |
469 | fprintf(stderr, "genprops: syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]); | |
470 | *pErrorCode=U_PARSE_ERROR; | |
471 | exit(U_PARSE_ERROR); | |
472 | } | |
473 | ||
474 | /* get the status of this mapping */ | |
475 | caseFoldings[caseFoldingCount].status=status=*u_skipWhitespace(fields[1][0]); | |
476 | if(status!='L' && status!='E' && status!='C' && status!='S' && status!='F' && status!='I' && status!='T') { | |
477 | fprintf(stderr, "genprops: unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]); | |
478 | *pErrorCode=U_PARSE_ERROR; | |
479 | exit(U_PARSE_ERROR); | |
480 | } | |
481 | ||
482 | /* ignore all case folding mappings that are the same as the UnicodeData.txt lowercase mappings */ | |
483 | if(status=='L') { | |
484 | return; | |
485 | } | |
486 | ||
487 | /* get the mapping */ | |
488 | count=caseFoldings[caseFoldingCount].full[0]= | |
489 | (UChar)u_parseString(fields[2][0], caseFoldings[caseFoldingCount].full+1, 31, &caseFoldings[caseFoldingCount].simple, pErrorCode); | |
490 | if(U_FAILURE(*pErrorCode)) { | |
491 | fprintf(stderr, "genprops: error parsing CaseFolding.txt mapping at %s\n", fields[0][0]); | |
492 | exit(*pErrorCode); | |
493 | } | |
494 | ||
495 | /* there is a simple mapping only if there is exactly one code point (count is in UChars) */ | |
496 | if(count==0 || count>2 || (count==2 && UTF_IS_SINGLE(caseFoldings[caseFoldingCount].full[1]))) { | |
497 | caseFoldings[caseFoldingCount].simple=0; | |
498 | } | |
499 | ||
500 | /* update the case-sensitive set */ | |
501 | if(status!='T') { | |
502 | uset_add(caseSensitive, (UChar32)caseFoldings[caseFoldingCount].code); | |
503 | _set_addAll(caseSensitive, caseFoldings[caseFoldingCount].full+1, caseFoldings[caseFoldingCount].full[0]); | |
504 | } | |
505 | ||
506 | /* check the status */ | |
507 | if(status=='S') { | |
508 | /* check if there was a full mapping for this code point before */ | |
509 | if( caseFoldingCount>0 && | |
510 | caseFoldings[caseFoldingCount-1].code==caseFoldings[caseFoldingCount].code && | |
511 | caseFoldings[caseFoldingCount-1].status=='F' | |
512 | ) { | |
513 | /* merge the two entries */ | |
514 | caseFoldings[caseFoldingCount-1].simple=caseFoldings[caseFoldingCount].simple; | |
515 | return; | |
516 | } | |
517 | } else if(status=='F') { | |
518 | /* check if there was a simple mapping for this code point before */ | |
519 | if( caseFoldingCount>0 && | |
520 | caseFoldings[caseFoldingCount-1].code==caseFoldings[caseFoldingCount].code && | |
521 | caseFoldings[caseFoldingCount-1].status=='S' | |
522 | ) { | |
523 | /* merge the two entries */ | |
524 | uprv_memcpy(caseFoldings[caseFoldingCount-1].full, caseFoldings[caseFoldingCount].full, 32*U_SIZEOF_UCHAR); | |
525 | return; | |
526 | } | |
527 | } else if(status=='I' || status=='T') { | |
528 | /* check if there was a default mapping for this code point before (remove it) */ | |
529 | while(caseFoldingCount>0 && | |
530 | caseFoldings[caseFoldingCount-1].code==caseFoldings[caseFoldingCount].code | |
531 | ) { | |
532 | prevCode=0; | |
533 | --caseFoldingCount; | |
534 | } | |
535 | /* store only a marker for special handling for cases like dotless i */ | |
536 | caseFoldings[caseFoldingCount].simple=0; | |
537 | caseFoldings[caseFoldingCount].full[0]=0; | |
538 | } | |
539 | ||
540 | /* check that the code points (caseFoldings[caseFoldingCount].code) are in ascending order */ | |
541 | if(caseFoldings[caseFoldingCount].code<=prevCode && caseFoldings[caseFoldingCount].code>0) { | |
542 | fprintf(stderr, "genprops: error - CaseFolding entries out of order, U+%04lx after U+%04lx\n", | |
543 | (unsigned long)caseFoldings[caseFoldingCount].code, | |
544 | (unsigned long)prevCode); | |
545 | *pErrorCode=U_PARSE_ERROR; | |
546 | exit(U_PARSE_ERROR); | |
547 | } | |
548 | prevCode=caseFoldings[caseFoldingCount].code; | |
549 | ||
550 | if(++caseFoldingCount==MAX_CASE_FOLDING_COUNT) { | |
551 | fprintf(stderr, "genprops: too many case folding mappings\n"); | |
552 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
553 | exit(U_INDEX_OUTOFBOUNDS_ERROR); | |
554 | } | |
555 | } | |
556 | ||
557 | static void | |
558 | parseCaseFolding(const char *filename, UErrorCode *pErrorCode) { | |
559 | char *fields[3][2]; | |
560 | ||
561 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
562 | return; | |
563 | } | |
564 | ||
565 | u_parseDelimitedFile(filename, ';', fields, 3, caseFoldingLineFn, NULL, pErrorCode); | |
566 | } | |
567 | ||
568 | /* parser for UnicodeData.txt ----------------------------------------------- */ | |
569 | ||
570 | /* general categories */ | |
571 | const char *const | |
572 | genCategoryNames[U_CHAR_CATEGORY_COUNT]={ | |
573 | "Cn", | |
574 | "Lu", "Ll", "Lt", "Lm", "Lo", "Mn", "Me", | |
575 | "Mc", "Nd", "Nl", "No", | |
576 | "Zs", "Zl", "Zp", | |
577 | "Cc", "Cf", "Co", "Cs", | |
578 | "Pd", "Ps", "Pe", "Pc", "Po", | |
579 | "Sm", "Sc", "Sk", "So", | |
580 | "Pi", "Pf" | |
581 | }; | |
582 | ||
583 | const char *const | |
584 | bidiNames[U_CHAR_DIRECTION_COUNT]={ | |
585 | "L", "R", "EN", "ES", "ET", "AN", "CS", "B", "S", | |
586 | "WS", "ON", "LRE", "LRO", "AL", "RLE", "RLO", "PDF", "NSM", "BN" | |
587 | }; | |
588 | ||
589 | const char *const | |
590 | decompositionTypeNames[U_DT_COUNT]={ | |
591 | NULL, | |
592 | NULL, | |
593 | "compat", | |
594 | "circle", | |
595 | "final", | |
596 | "font", | |
597 | "fraction", | |
598 | "initial", | |
599 | "isolated", | |
600 | "medial", | |
601 | "narrow", | |
602 | "noBreak", | |
603 | "small", | |
604 | "square", | |
605 | "sub", | |
606 | "super", | |
607 | "vertical", | |
608 | "wide" | |
609 | }; | |
610 | ||
611 | static struct { | |
612 | uint32_t first, last, props; | |
613 | char name[80]; | |
614 | } unicodeAreas[32]; | |
615 | ||
616 | static int32_t unicodeAreaIndex=0, mirrorIndex=0, specialCasingIndex=0, caseFoldingIndex=0; | |
617 | ||
618 | static void U_CALLCONV | |
619 | unicodeDataLineFn(void *context, | |
620 | char *fields[][2], int32_t fieldCount, | |
621 | UErrorCode *pErrorCode) { | |
622 | Props p; | |
623 | char *end; | |
624 | static uint32_t prevCode=0; | |
625 | uint32_t value; | |
626 | int32_t i; | |
627 | ||
628 | /* reset the properties */ | |
629 | uprv_memset(&p, 0, sizeof(Props)); | |
630 | ||
631 | /* get the character code, field 0 */ | |
632 | p.code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16); | |
633 | if(end<=fields[0][0] || end!=fields[0][1]) { | |
634 | fprintf(stderr, "genprops: syntax error in field 0 at %s\n", fields[0][0]); | |
635 | *pErrorCode=U_PARSE_ERROR; | |
636 | exit(U_PARSE_ERROR); | |
637 | } | |
638 | ||
639 | /* get general category, field 2 */ | |
640 | i=getTokenIndex(genCategoryNames, U_CHAR_CATEGORY_COUNT, fields[2][0]); | |
641 | if(i>=0) { | |
642 | p.generalCategory=(uint8_t)i; | |
643 | } else { | |
644 | fprintf(stderr, "genprops: unknown general category \"%s\" at code 0x%lx\n", | |
645 | fields[2][0], (unsigned long)p.code); | |
646 | *pErrorCode=U_PARSE_ERROR; | |
647 | exit(U_PARSE_ERROR); | |
648 | } | |
649 | ||
650 | /* get BiDi category, field 4 */ | |
651 | i=getTokenIndex(bidiNames, U_CHAR_DIRECTION_COUNT, fields[4][0]); | |
652 | if(i>=0) { | |
653 | p.bidi=(uint8_t)i; | |
654 | } else { | |
655 | fprintf(stderr, "genprops: unknown BiDi category \"%s\" at code 0x%lx\n", | |
656 | fields[4][0], (unsigned long)p.code); | |
657 | *pErrorCode=U_PARSE_ERROR; | |
658 | exit(U_PARSE_ERROR); | |
659 | } | |
660 | ||
661 | /* get decomposition type, field 5 */ | |
662 | if(fields[5][0]<fields[5][1]) { | |
663 | /* there is some decomposition */ | |
664 | if(*fields[5][0]!='<') { | |
665 | /* canonical */ | |
666 | i=U_DT_CANONICAL; | |
667 | } else { | |
668 | /* get compatibility type */ | |
669 | end=fields[5][0]+1; | |
670 | while(end<fields[5][1] && *end!='>') { | |
671 | ++end; | |
672 | } | |
673 | *end='#'; | |
674 | i=getTokenIndex(decompositionTypeNames, U_DT_COUNT, fields[5][0]+1); | |
675 | if(i<0) { | |
676 | fprintf(stderr, "genprops: unknown decomposition type \"%s\" at code 0x%lx\n", | |
677 | fields[5][0], (unsigned long)p.code); | |
678 | *pErrorCode=U_PARSE_ERROR; | |
679 | exit(U_PARSE_ERROR); | |
680 | } | |
681 | } | |
682 | if(!upvec_setValue(pv, p.code, p.code+1, 2, (uint32_t)i, UPROPS_DT_MASK, pErrorCode)) { | |
683 | fprintf(stderr, "genprops error: unable to set decomposition type: %s\n", u_errorName(*pErrorCode)); | |
684 | exit(*pErrorCode); | |
685 | } | |
686 | } | |
687 | ||
688 | /* decimal digit value, field 6 */ | |
689 | if(fields[6][0]<fields[6][1]) { | |
690 | value=(uint32_t)uprv_strtoul(fields[6][0], &end, 10); | |
691 | if(end!=fields[6][1] || value>0x7fff) { | |
692 | fprintf(stderr, "genprops: syntax error in field 6 at code 0x%lx\n", | |
693 | (unsigned long)p.code); | |
694 | *pErrorCode=U_PARSE_ERROR; | |
695 | exit(U_PARSE_ERROR); | |
696 | } | |
697 | p.numericValue=(int32_t)value; | |
698 | p.numericType=1; | |
699 | } | |
700 | ||
701 | /* digit value, field 7 */ | |
702 | if(fields[7][0]<fields[7][1]) { | |
703 | value=(uint32_t)uprv_strtoul(fields[7][0], &end, 10); | |
704 | if(end!=fields[7][1] || value>0x7fff) { | |
705 | fprintf(stderr, "genprops: syntax error in field 7 at code 0x%lx\n", | |
706 | (unsigned long)p.code); | |
707 | *pErrorCode=U_PARSE_ERROR; | |
708 | exit(U_PARSE_ERROR); | |
709 | } | |
710 | if(p.numericType==0) { | |
711 | p.numericValue=(int32_t)value; | |
712 | p.numericType=2; | |
713 | } else if((int32_t)value!=p.numericValue) { | |
714 | fprintf(stderr, "genprops error: numeric values in fields 6 & 7 different at code 0x%lx\n", | |
715 | (unsigned long)p.code); | |
716 | *pErrorCode=U_PARSE_ERROR; | |
717 | exit(U_PARSE_ERROR); | |
718 | } | |
719 | } | |
720 | ||
721 | /* numeric value, field 8 */ | |
722 | if(fields[8][0]<fields[8][1]) { | |
723 | char *s=fields[8][0]; | |
724 | UBool isNegative; | |
725 | ||
726 | /* get a possible minus sign */ | |
727 | if(*s=='-') { | |
728 | isNegative=TRUE; | |
729 | ++s; | |
730 | } else { | |
731 | isNegative=FALSE; | |
732 | } | |
733 | ||
734 | value=(uint32_t)uprv_strtoul(s, &end, 10); | |
735 | if(value>0 && *end=='/') { | |
736 | /* field 8 may contain a fractional value, get the denominator */ | |
737 | if(p.numericType>0) { | |
738 | fprintf(stderr, "genprops error: numeric values in fields 6..8 different at code 0x%lx\n", | |
739 | (unsigned long)p.code); | |
740 | *pErrorCode=U_PARSE_ERROR; | |
741 | exit(U_PARSE_ERROR); | |
742 | } | |
743 | ||
744 | p.denominator=(uint32_t)uprv_strtoul(end+1, &end, 10); | |
745 | if(p.denominator==0) { | |
746 | fprintf(stderr, "genprops: denominator is 0 in field 8 at code 0x%lx\n", | |
747 | (unsigned long)p.code); | |
748 | *pErrorCode=U_PARSE_ERROR; | |
749 | exit(U_PARSE_ERROR); | |
750 | } | |
751 | } | |
752 | if(end!=fields[8][1] || value>0x7fffffff) { | |
753 | fprintf(stderr, "genprops: syntax error in field 8 at code 0x%lx\n", | |
754 | (unsigned long)p.code); | |
755 | *pErrorCode=U_PARSE_ERROR; | |
756 | exit(U_PARSE_ERROR); | |
757 | } | |
758 | ||
759 | if(p.numericType==0) { | |
760 | if(isNegative) { | |
761 | p.numericValue=-(int32_t)value; | |
762 | } else { | |
763 | p.numericValue=(int32_t)value; | |
764 | } | |
765 | p.numericType=3; | |
766 | } else if((int32_t)value!=p.numericValue) { | |
767 | fprintf(stderr, "genprops error: numeric values in fields 6..8 different at code 0x%lx\n", | |
768 | (unsigned long)p.code); | |
769 | *pErrorCode=U_PARSE_ERROR; | |
770 | exit(U_PARSE_ERROR); | |
771 | } | |
772 | } | |
773 | ||
774 | /* get Mirrored flag, field 9 */ | |
775 | if(*fields[9][0]=='Y') { | |
776 | p.isMirrored=1; | |
777 | } else if(fields[9][1]-fields[9][0]!=1 || *fields[9][0]!='N') { | |
778 | fprintf(stderr, "genprops: syntax error in field 9 at code 0x%lx\n", | |
779 | (unsigned long)p.code); | |
780 | *pErrorCode=U_PARSE_ERROR; | |
781 | exit(U_PARSE_ERROR); | |
782 | } | |
783 | ||
784 | /* get uppercase mapping, field 12 */ | |
785 | value=(uint32_t)uprv_strtoul(fields[12][0], &end, 16); | |
786 | if(end!=fields[12][1]) { | |
787 | fprintf(stderr, "genprops: syntax error in field 12 at code 0x%lx\n", | |
788 | (unsigned long)p.code); | |
789 | *pErrorCode=U_PARSE_ERROR; | |
790 | exit(U_PARSE_ERROR); | |
791 | } | |
792 | if(value!=0 && value!=p.code) { | |
793 | p.upperCase=value; | |
794 | uset_add(caseSensitive, (UChar32)p.code); | |
795 | uset_add(caseSensitive, (UChar32)value); | |
796 | } | |
797 | ||
798 | /* get lowercase value, field 13 */ | |
799 | value=(uint32_t)uprv_strtoul(fields[13][0], &end, 16); | |
800 | if(end!=fields[13][1]) { | |
801 | fprintf(stderr, "genprops: syntax error in field 13 at code 0x%lx\n", | |
802 | (unsigned long)p.code); | |
803 | *pErrorCode=U_PARSE_ERROR; | |
804 | exit(U_PARSE_ERROR); | |
805 | } | |
806 | if(value!=0 && value!=p.code) { | |
807 | p.lowerCase=value; | |
808 | uset_add(caseSensitive, (UChar32)p.code); | |
809 | uset_add(caseSensitive, (UChar32)value); | |
810 | } | |
811 | ||
812 | /* get titlecase value, field 14 */ | |
813 | value=(uint32_t)uprv_strtoul(fields[14][0], &end, 16); | |
814 | if(end!=fields[14][1]) { | |
815 | fprintf(stderr, "genprops: syntax error in field 14 at code 0x%lx\n", | |
816 | (unsigned long)p.code); | |
817 | *pErrorCode=U_PARSE_ERROR; | |
818 | exit(U_PARSE_ERROR); | |
819 | } | |
820 | if(value!=0 && value!=p.code) { | |
821 | p.titleCase=value; | |
822 | uset_add(caseSensitive, (UChar32)p.code); | |
823 | uset_add(caseSensitive, (UChar32)value); | |
824 | } | |
825 | ||
826 | /* set additional properties from previously parsed files */ | |
827 | if(mirrorIndex<mirrorCount && p.code==mirrorMappings[mirrorIndex][0]) { | |
828 | p.mirrorMapping=mirrorMappings[mirrorIndex++][1]; | |
829 | } | |
830 | if(specialCasingIndex<specialCasingCount && p.code==specialCasings[specialCasingIndex].code) { | |
831 | p.specialCasing=specialCasings+specialCasingIndex++; | |
832 | } else { | |
833 | p.specialCasing=NULL; | |
834 | } | |
835 | if(caseFoldingIndex<caseFoldingCount && p.code==caseFoldings[caseFoldingIndex].code) { | |
836 | p.caseFolding=caseFoldings+caseFoldingIndex++; | |
837 | ||
838 | /* ignore "Common" mappings (simple==full) that map to the same code point as the regular lowercase mapping */ | |
839 | if( p.caseFolding->status=='C' && | |
840 | p.caseFolding->simple==p.lowerCase | |
841 | ) { | |
842 | p.caseFolding=NULL; | |
843 | } | |
844 | } else { | |
845 | p.caseFolding=NULL; | |
846 | } | |
847 | ||
848 | value=makeProps(&p); | |
849 | ||
850 | if(*fields[1][0]=='<') { | |
851 | /* first or last entry of a Unicode area */ | |
852 | size_t length=fields[1][1]-fields[1][0]; | |
853 | ||
854 | if(length<9) { | |
855 | /* name too short for an area name */ | |
856 | } else if(0==uprv_memcmp(", First>", fields[1][1]-8, 8)) { | |
857 | /* set the current area */ | |
858 | if(unicodeAreas[unicodeAreaIndex].first==0xffffffff) { | |
859 | length-=9; | |
860 | unicodeAreas[unicodeAreaIndex].first=p.code; | |
861 | unicodeAreas[unicodeAreaIndex].props=value; | |
862 | uprv_memcpy(unicodeAreas[unicodeAreaIndex].name, fields[1][0]+1, length); | |
863 | unicodeAreas[unicodeAreaIndex].name[length]=0; | |
864 | } else { | |
865 | /* error: a previous area is incomplete */ | |
866 | fprintf(stderr, "genprops: error - area \"%s\" is incomplete\n", unicodeAreas[unicodeAreaIndex].name); | |
867 | *pErrorCode=U_PARSE_ERROR; | |
868 | exit(U_PARSE_ERROR); | |
869 | } | |
870 | return; | |
871 | } else if(0==uprv_memcmp(", Last>", fields[1][1]-7, 7)) { | |
872 | /* check that the current area matches, and complete it with the last code point */ | |
873 | length-=8; | |
874 | if( unicodeAreas[unicodeAreaIndex].props==value && | |
875 | 0==uprv_memcmp(unicodeAreas[unicodeAreaIndex].name, fields[1][0]+1, length) && | |
876 | unicodeAreas[unicodeAreaIndex].name[length]==0 && | |
877 | unicodeAreas[unicodeAreaIndex].first<p.code | |
878 | ) { | |
879 | unicodeAreas[unicodeAreaIndex].last=p.code; | |
880 | if(beVerbose) { | |
881 | printf("Unicode area U+%04lx..U+%04lx \"%s\"\n", | |
882 | (unsigned long)unicodeAreas[unicodeAreaIndex].first, | |
883 | (unsigned long)unicodeAreas[unicodeAreaIndex].last, | |
884 | unicodeAreas[unicodeAreaIndex].name); | |
885 | } | |
886 | unicodeAreas[++unicodeAreaIndex].first=0xffffffff; | |
887 | } else { | |
888 | /* error: different properties between first & last, different area name, first>=last */ | |
889 | fprintf(stderr, "genprops: error - Last of area \"%s\" is incorrect\n", unicodeAreas[unicodeAreaIndex].name); | |
890 | *pErrorCode=U_PARSE_ERROR; | |
891 | exit(U_PARSE_ERROR); | |
892 | } | |
893 | return; | |
894 | } else { | |
895 | /* not an area name */ | |
896 | } | |
897 | } | |
898 | ||
899 | /* check for non-character code points */ | |
900 | if((p.code&0xfffe)==0xfffe || (uint32_t)(p.code-0xfdd0)<0x20) { | |
901 | fprintf(stderr, "genprops: error - properties for non-character code point U+%04lx\n", | |
902 | (unsigned long)p.code); | |
903 | *pErrorCode=U_PARSE_ERROR; | |
904 | exit(U_PARSE_ERROR); | |
905 | } | |
906 | ||
907 | /* check that the code points (p.code) are in ascending order */ | |
908 | if(p.code<=prevCode && p.code>0) { | |
909 | fprintf(stderr, "genprops: error - UnicodeData entries out of order, U+%04lx after U+%04lx\n", | |
910 | (unsigned long)p.code, (unsigned long)prevCode); | |
911 | *pErrorCode=U_PARSE_ERROR; | |
912 | exit(U_PARSE_ERROR); | |
913 | } | |
914 | prevCode=p.code; | |
915 | ||
916 | /* properties for a single code point */ | |
917 | addProps(p.code, value); | |
918 | } | |
919 | ||
920 | /* set repeated properties for the areas */ | |
921 | static void | |
922 | repeatAreaProps() { | |
923 | uint32_t puaProps; | |
924 | int32_t i; | |
925 | UBool hasPlane15PUA, hasPlane16PUA; | |
926 | UErrorCode errorCode; | |
927 | ||
928 | /* | |
929 | * UnicodeData.txt before 3.0.1 did not contain the PUAs on | |
930 | * planes 15 and 16. | |
931 | * If that is the case, then we add them here, using the properties | |
932 | * from the BMP PUA. | |
933 | */ | |
934 | puaProps=0; | |
935 | hasPlane15PUA=hasPlane16PUA=FALSE; | |
936 | ||
937 | for(i=0; i<unicodeAreaIndex; ++i) { | |
938 | repeatProps(unicodeAreas[i].first, | |
939 | unicodeAreas[i].last, | |
940 | unicodeAreas[i].props); | |
941 | if(unicodeAreas[i].first==0xe000) { | |
942 | puaProps=unicodeAreas[i].props; | |
943 | } else if(unicodeAreas[i].first==0xf0000) { | |
944 | hasPlane15PUA=TRUE; | |
945 | } else if(unicodeAreas[i].first==0x100000) { | |
946 | hasPlane16PUA=TRUE; | |
947 | } | |
948 | } | |
949 | ||
950 | if(puaProps!=0) { | |
951 | if(!hasPlane15PUA) { | |
952 | repeatProps(0xf0000, 0xffffd, puaProps); | |
953 | } | |
954 | if(!hasPlane16PUA) { | |
955 | repeatProps(0x100000, 0x10fffd, puaProps); | |
956 | } | |
957 | } | |
958 | ||
959 | /* Hangul have canonical decompositions */ | |
960 | errorCode=U_ZERO_ERROR; | |
961 | if(!upvec_setValue(pv, 0xac00, 0xd7a4, 2, (uint32_t)U_DT_CANONICAL, UPROPS_DT_MASK, &errorCode)) { | |
962 | fprintf(stderr, "genprops error: unable to set decomposition type: %s\n", u_errorName(errorCode)); | |
963 | exit(errorCode); | |
964 | } | |
965 | } | |
966 | ||
967 | static void | |
968 | parseDB(const char *filename, UErrorCode *pErrorCode) { | |
969 | /* default Bidi classes for unassigned code points */ | |
970 | static const uint32_t defaultBidi[][2]={ /* { limit, class } */ | |
971 | { 0x0590, U_LEFT_TO_RIGHT }, | |
972 | { 0x0600, U_RIGHT_TO_LEFT }, | |
973 | { 0x07C0, U_RIGHT_TO_LEFT_ARABIC }, | |
974 | { 0xFB1D, U_LEFT_TO_RIGHT }, | |
975 | { 0xFB50, U_RIGHT_TO_LEFT }, | |
976 | { 0xFE00, U_RIGHT_TO_LEFT_ARABIC }, | |
977 | { 0xFE70, U_LEFT_TO_RIGHT }, | |
978 | { 0xFF00, U_RIGHT_TO_LEFT_ARABIC }, | |
979 | { 0x110000, U_LEFT_TO_RIGHT } | |
980 | }; | |
981 | ||
982 | char *fields[15][2]; | |
983 | UChar32 start, end; | |
984 | uint32_t prev; | |
985 | int32_t i; | |
986 | ||
987 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
988 | return; | |
989 | } | |
990 | ||
991 | /* | |
992 | * Set default Bidi classes for unassigned code points. | |
993 | * See table 3-7 "Bidirectional Character Types" in UAX #9. | |
994 | * http://www.unicode.org/reports/tr9/ | |
995 | */ | |
996 | prev=0; | |
997 | for(i=0; i<LENGTHOF(defaultBidi); ++i) { | |
998 | if(defaultBidi[i][1]!=0) { | |
999 | repeatProps(prev, defaultBidi[i][0]-1, defaultBidi[i][1]<<UPROPS_BIDI_SHIFT); | |
1000 | } | |
1001 | prev=defaultBidi[i][0]; | |
1002 | } | |
1003 | ||
1004 | /* while unicodeAreas[unicodeAreaIndex] is unused, set its first to a bogus value */ | |
1005 | unicodeAreas[0].first=0xffffffff; | |
1006 | ||
1007 | u_parseDelimitedFile(filename, ';', fields, 15, unicodeDataLineFn, NULL, pErrorCode); | |
1008 | ||
1009 | if(unicodeAreas[unicodeAreaIndex].first!=0xffffffff) { | |
1010 | fprintf(stderr, "genprops: error - the last area \"%s\" from U+%04lx is incomplete\n", | |
1011 | unicodeAreas[unicodeAreaIndex].name, | |
1012 | (unsigned long)unicodeAreas[unicodeAreaIndex].first); | |
1013 | *pErrorCode=U_PARSE_ERROR; | |
1014 | exit(U_PARSE_ERROR); | |
1015 | } | |
1016 | ||
1017 | repeatAreaProps(); | |
1018 | ||
1019 | /* are all sub-properties consumed? */ | |
1020 | if(mirrorIndex<mirrorCount) { | |
1021 | fprintf(stderr, "genprops: error - some code points in BidiMirroring.txt are missing from UnicodeData.txt\n"); | |
1022 | *pErrorCode=U_PARSE_ERROR; | |
1023 | exit(U_PARSE_ERROR); | |
1024 | } | |
1025 | if(specialCasingIndex<specialCasingCount) { | |
1026 | fprintf(stderr, "genprops: error - some code points in SpecialCasing.txt are missing from UnicodeData.txt\n"); | |
1027 | *pErrorCode=U_PARSE_ERROR; | |
1028 | exit(U_PARSE_ERROR); | |
1029 | } | |
1030 | if(caseFoldingIndex<caseFoldingCount) { | |
1031 | fprintf(stderr, "genprops: error - some code points in CaseFolding.txt are missing from UnicodeData.txt\n"); | |
1032 | *pErrorCode=U_PARSE_ERROR; | |
1033 | exit(U_PARSE_ERROR); | |
1034 | } | |
1035 | ||
1036 | if(U_FAILURE(*pErrorCode)) { | |
1037 | return; | |
1038 | } | |
1039 | ||
1040 | for(i=0; | |
1041 | 0==uset_getItem(caseSensitive, i, &start, &end, NULL, 0, pErrorCode) && U_SUCCESS(*pErrorCode); | |
1042 | ++i | |
1043 | ) { | |
1044 | addCaseSensitive(start, end); | |
1045 | } | |
1046 | if(*pErrorCode==U_INDEX_OUTOFBOUNDS_ERROR) { | |
1047 | *pErrorCode=U_ZERO_ERROR; | |
1048 | } | |
1049 | } | |
1050 | ||
1051 | /* | |
1052 | * Hey, Emacs, please set the following: | |
1053 | * | |
1054 | * Local Variables: | |
1055 | * indent-tabs-mode: nil | |
1056 | * End: | |
1057 | * | |
1058 | */ | |
374ca955 | 1059 |