]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | ******************************************************************************* | |
3 | * | |
4 | * Copyright (C) 2004-2006, International Business Machines | |
5 | * Corporation and others. All Rights Reserved. | |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: genbidi.c | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2004dec30 | |
14 | * created by: Markus W. Scherer | |
15 | * | |
16 | * This program reads several of the Unicode character database text files, | |
17 | * parses them, and extracts the bidi/shaping properties for each character. | |
18 | * It then writes a binary file containing the properties | |
19 | * that is designed to be used directly for random-access to | |
20 | * the properties of each Unicode character. | |
21 | */ | |
22 | ||
23 | #include <stdio.h> | |
24 | #include "unicode/utypes.h" | |
25 | #include "unicode/uchar.h" | |
26 | #include "unicode/putil.h" | |
27 | #include "unicode/uclean.h" | |
28 | #include "cmemory.h" | |
29 | #include "cstring.h" | |
30 | #include "uarrsort.h" | |
31 | #include "unewdata.h" | |
32 | #include "uoptions.h" | |
33 | #include "uparse.h" | |
34 | #include "propsvec.h" | |
35 | #include "ubidi_props.h" | |
36 | #include "genbidi.h" | |
37 | ||
38 | #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0])) | |
39 | ||
40 | /* data --------------------------------------------------------------------- */ | |
41 | ||
42 | uint32_t *pv; | |
43 | ||
44 | UBool beVerbose=FALSE, haveCopyright=TRUE; | |
45 | ||
46 | /* prototypes --------------------------------------------------------------- */ | |
47 | ||
48 | static UBool | |
49 | isToken(const char *token, const char *s); | |
50 | ||
51 | static void | |
52 | parseBidiMirroring(const char *filename, UErrorCode *pErrorCode); | |
53 | ||
54 | static void | |
55 | parseDB(const char *filename, UErrorCode *pErrorCode); | |
56 | ||
57 | /* miscellaneous ------------------------------------------------------------ */ | |
58 | ||
59 | /* TODO: more common code, move functions to uparse.h|c */ | |
60 | ||
61 | static char * | |
62 | trimTerminateField(char *s, char *limit) { | |
63 | /* trim leading whitespace */ | |
64 | s=(char *)u_skipWhitespace(s); | |
65 | ||
66 | /* trim trailing whitespace */ | |
67 | while(s<limit && (*(limit-1)==' ' || *(limit-1)=='\t')) { | |
68 | --limit; | |
69 | } | |
70 | *limit=0; | |
71 | ||
72 | return s; | |
73 | } | |
74 | ||
75 | static void | |
76 | parseTwoFieldFile(char *filename, char *basename, | |
77 | const char *ucdFile, const char *suffix, | |
78 | UParseLineFn *lineFn, | |
79 | UErrorCode *pErrorCode) { | |
80 | char *fields[2][2]; | |
81 | ||
82 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
83 | return; | |
84 | } | |
85 | ||
86 | writeUCDFilename(basename, ucdFile, suffix); | |
87 | ||
88 | u_parseDelimitedFile(filename, ';', fields, 2, lineFn, NULL, pErrorCode); | |
89 | if(U_FAILURE(*pErrorCode)) { | |
90 | fprintf(stderr, "error parsing %s.txt: %s\n", ucdFile, u_errorName(*pErrorCode)); | |
91 | } | |
92 | } | |
93 | ||
94 | static void U_CALLCONV | |
95 | bidiClassLineFn(void *context, | |
96 | char *fields[][2], int32_t fieldCount, | |
97 | UErrorCode *pErrorCode); | |
98 | ||
99 | /* parse files with single enumerated properties ---------------------------- */ | |
100 | ||
101 | /* TODO: more common code, move functions to uparse.h|c */ | |
102 | ||
103 | struct SingleEnum { | |
104 | const char *ucdFile, *propName; | |
105 | UProperty prop; | |
106 | int32_t vecWord, vecShift; | |
107 | uint32_t vecMask; | |
108 | }; | |
109 | typedef struct SingleEnum SingleEnum; | |
110 | ||
111 | static void | |
112 | parseSingleEnumFile(char *filename, char *basename, const char *suffix, | |
113 | const SingleEnum *sen, | |
114 | UErrorCode *pErrorCode); | |
115 | ||
116 | static const SingleEnum jtSingleEnum={ | |
117 | "DerivedJoiningType", "joining type", | |
118 | UCHAR_JOINING_TYPE, | |
119 | 0, UBIDI_JT_SHIFT, UBIDI_JT_MASK | |
120 | }; | |
121 | ||
122 | static const SingleEnum jgSingleEnum={ | |
123 | "DerivedJoiningGroup", "joining group", | |
124 | UCHAR_JOINING_GROUP, | |
125 | 1, 0, 0xff /* column 1 bits 7..0 */ | |
126 | }; | |
127 | ||
128 | static void U_CALLCONV | |
129 | singleEnumLineFn(void *context, | |
130 | char *fields[][2], int32_t fieldCount, | |
131 | UErrorCode *pErrorCode) { | |
132 | const SingleEnum *sen; | |
133 | char *s; | |
134 | uint32_t start, limit, uv; | |
135 | int32_t value; | |
136 | ||
137 | sen=(const SingleEnum *)context; | |
138 | ||
139 | u_parseCodePointRange(fields[0][0], &start, &limit, pErrorCode); | |
140 | if(U_FAILURE(*pErrorCode)) { | |
141 | fprintf(stderr, "genbidi: syntax error in %s.txt field 0 at %s\n", sen->ucdFile, fields[0][0]); | |
142 | exit(*pErrorCode); | |
143 | } | |
144 | ++limit; | |
145 | ||
146 | /* parse property alias */ | |
147 | s=trimTerminateField(fields[1][0], fields[1][1]); | |
148 | value=u_getPropertyValueEnum(sen->prop, s); | |
149 | if(value<0) { | |
150 | if(sen->prop==UCHAR_BLOCK) { | |
151 | if(isToken("Greek", s)) { | |
152 | value=UBLOCK_GREEK; /* Unicode 3.2 renames this to "Greek and Coptic" */ | |
153 | } else if(isToken("Combining Marks for Symbols", s)) { | |
154 | value=UBLOCK_COMBINING_MARKS_FOR_SYMBOLS; /* Unicode 3.2 renames this to "Combining Diacritical Marks for Symbols" */ | |
155 | } else if(isToken("Private Use", s)) { | |
156 | value=UBLOCK_PRIVATE_USE; /* Unicode 3.2 renames this to "Private Use Area" */ | |
157 | } | |
158 | } | |
159 | } | |
160 | if(value<0) { | |
161 | fprintf(stderr, "genbidi error: unknown %s name in %s.txt field 1 at %s\n", | |
162 | sen->propName, sen->ucdFile, s); | |
163 | exit(U_PARSE_ERROR); | |
164 | } | |
165 | ||
166 | uv=(uint32_t)(value<<sen->vecShift); | |
167 | if((uv&sen->vecMask)!=uv) { | |
168 | fprintf(stderr, "genbidi error: %s value overflow (0x%x) at %s\n", | |
169 | sen->propName, (int)uv, s); | |
170 | exit(U_INTERNAL_PROGRAM_ERROR); | |
171 | } | |
172 | ||
173 | if(!upvec_setValue(pv, start, limit, sen->vecWord, uv, sen->vecMask, pErrorCode)) { | |
174 | fprintf(stderr, "genbidi error: unable to set %s code: %s\n", | |
175 | sen->propName, u_errorName(*pErrorCode)); | |
176 | exit(*pErrorCode); | |
177 | } | |
178 | } | |
179 | ||
180 | static void | |
181 | parseSingleEnumFile(char *filename, char *basename, const char *suffix, | |
182 | const SingleEnum *sen, | |
183 | UErrorCode *pErrorCode) { | |
184 | char *fields[2][2]; | |
185 | ||
186 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
187 | return; | |
188 | } | |
189 | ||
190 | writeUCDFilename(basename, sen->ucdFile, suffix); | |
191 | ||
192 | u_parseDelimitedFile(filename, ';', fields, 2, singleEnumLineFn, (void *)sen, pErrorCode); | |
193 | if(U_FAILURE(*pErrorCode)) { | |
194 | fprintf(stderr, "error parsing %s.txt: %s\n", sen->ucdFile, u_errorName(*pErrorCode)); | |
195 | } | |
196 | } | |
197 | ||
198 | /* parse files with multiple binary properties ------------------------------ */ | |
199 | ||
200 | /* TODO: more common code, move functions to uparse.h|c */ | |
201 | ||
202 | /* TODO: similar to genbidi/props2.c but not the same; same as in gencase/gencase.c */ | |
203 | ||
204 | struct Binary { | |
205 | const char *propName; | |
206 | int32_t vecWord; | |
207 | uint32_t vecValue, vecMask; | |
208 | }; | |
209 | typedef struct Binary Binary; | |
210 | ||
211 | struct Binaries { | |
212 | const char *ucdFile; | |
213 | const Binary *binaries; | |
214 | int32_t binariesCount; | |
215 | }; | |
216 | typedef struct Binaries Binaries; | |
217 | ||
218 | static const Binary | |
219 | propListNames[]={ | |
220 | { "Bidi_Control", 0, U_MASK(UBIDI_BIDI_CONTROL_SHIFT), U_MASK(UBIDI_BIDI_CONTROL_SHIFT) }, | |
221 | { "Join_Control", 0, U_MASK(UBIDI_JOIN_CONTROL_SHIFT), U_MASK(UBIDI_JOIN_CONTROL_SHIFT) } | |
222 | }; | |
223 | ||
224 | static const Binaries | |
225 | propListBinaries={ | |
226 | "PropList", propListNames, LENGTHOF(propListNames) | |
227 | }; | |
228 | ||
229 | static void U_CALLCONV | |
230 | binariesLineFn(void *context, | |
231 | char *fields[][2], int32_t fieldCount, | |
232 | UErrorCode *pErrorCode) { | |
233 | const Binaries *bin; | |
234 | char *s; | |
235 | uint32_t start, limit; | |
236 | int32_t i; | |
237 | ||
238 | bin=(const Binaries *)context; | |
239 | ||
240 | u_parseCodePointRange(fields[0][0], &start, &limit, pErrorCode); | |
241 | if(U_FAILURE(*pErrorCode)) { | |
242 | fprintf(stderr, "genbidi: syntax error in %s.txt field 0 at %s\n", bin->ucdFile, fields[0][0]); | |
243 | exit(*pErrorCode); | |
244 | } | |
245 | ++limit; | |
246 | ||
247 | /* parse binary property name */ | |
248 | s=(char *)u_skipWhitespace(fields[1][0]); | |
249 | for(i=0;; ++i) { | |
250 | if(i==bin->binariesCount) { | |
251 | /* ignore unrecognized properties */ | |
252 | return; | |
253 | } | |
254 | if(isToken(bin->binaries[i].propName, s)) { | |
255 | break; | |
256 | } | |
257 | } | |
258 | ||
259 | if(bin->binaries[i].vecMask==0) { | |
260 | fprintf(stderr, "genbidi error: mask value %d==0 for %s %s\n", | |
261 | (int)bin->binaries[i].vecMask, bin->ucdFile, bin->binaries[i].propName); | |
262 | exit(U_INTERNAL_PROGRAM_ERROR); | |
263 | } | |
264 | ||
265 | if(!upvec_setValue(pv, start, limit, bin->binaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode)) { | |
266 | fprintf(stderr, "genbidi error: unable to set %s, code: %s\n", | |
267 | bin->binaries[i].propName, u_errorName(*pErrorCode)); | |
268 | exit(*pErrorCode); | |
269 | } | |
270 | } | |
271 | ||
272 | static void | |
273 | parseBinariesFile(char *filename, char *basename, const char *suffix, | |
274 | const Binaries *bin, | |
275 | UErrorCode *pErrorCode) { | |
276 | char *fields[2][2]; | |
277 | ||
278 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
279 | return; | |
280 | } | |
281 | ||
282 | writeUCDFilename(basename, bin->ucdFile, suffix); | |
283 | ||
284 | u_parseDelimitedFile(filename, ';', fields, 2, binariesLineFn, (void *)bin, pErrorCode); | |
285 | if(U_FAILURE(*pErrorCode)) { | |
286 | fprintf(stderr, "error parsing %s.txt: %s\n", bin->ucdFile, u_errorName(*pErrorCode)); | |
287 | } | |
288 | } | |
289 | ||
290 | /* -------------------------------------------------------------------------- */ | |
291 | ||
292 | enum { | |
293 | HELP_H, | |
294 | HELP_QUESTION_MARK, | |
295 | VERBOSE, | |
296 | COPYRIGHT, | |
297 | DESTDIR, | |
298 | SOURCEDIR, | |
299 | UNICODE_VERSION, | |
300 | ICUDATADIR, | |
301 | CSOURCE | |
302 | }; | |
303 | ||
304 | /* Keep these values in sync with the above enums */ | |
305 | static UOption options[]={ | |
306 | UOPTION_HELP_H, | |
307 | UOPTION_HELP_QUESTION_MARK, | |
308 | UOPTION_VERBOSE, | |
309 | UOPTION_COPYRIGHT, | |
310 | UOPTION_DESTDIR, | |
311 | UOPTION_SOURCEDIR, | |
312 | UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG), | |
313 | UOPTION_ICUDATADIR, | |
314 | UOPTION_DEF("csource", 'C', UOPT_NO_ARG) | |
315 | }; | |
316 | ||
317 | extern int | |
318 | main(int argc, char* argv[]) { | |
319 | char filename[300]; | |
320 | const char *srcDir=NULL, *destDir=NULL, *suffix=NULL; | |
321 | char *basename=NULL; | |
322 | UErrorCode errorCode=U_ZERO_ERROR; | |
323 | ||
324 | U_MAIN_INIT_ARGS(argc, argv); | |
325 | ||
326 | /* preset then read command line options */ | |
327 | options[DESTDIR].value=u_getDataDirectory(); | |
328 | options[SOURCEDIR].value=""; | |
329 | options[UNICODE_VERSION].value=""; | |
330 | options[ICUDATADIR].value=u_getDataDirectory(); | |
331 | argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); | |
332 | ||
333 | /* error handling, printing usage message */ | |
334 | if(argc<0) { | |
335 | fprintf(stderr, | |
336 | "error in command line argument \"%s\"\n", | |
337 | argv[-argc]); | |
338 | } | |
339 | if(argc<0 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur) { | |
340 | /* | |
341 | * Broken into chucks because the C89 standard says the minimum | |
342 | * required supported string length is 509 bytes. | |
343 | */ | |
344 | fprintf(stderr, | |
345 | "Usage: %s [-options] [suffix]\n" | |
346 | "\n" | |
347 | "read the UnicodeData.txt file and other Unicode properties files and\n" | |
348 | "create a binary file " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE " with the bidi/shaping properties\n" | |
349 | "\n", | |
350 | argv[0]); | |
351 | fprintf(stderr, | |
352 | "Options:\n" | |
353 | "\t-h or -? or --help this usage text\n" | |
354 | "\t-v or --verbose verbose output\n" | |
355 | "\t-c or --copyright include a copyright notice\n" | |
356 | "\t-u or --unicode Unicode version, followed by the version like 3.0.0\n" | |
357 | "\t-C or --csource generate a .c source file rather than the .icu binary\n"); | |
358 | fprintf(stderr, | |
359 | "\t-d or --destdir destination directory, followed by the path\n" | |
360 | "\t-s or --sourcedir source directory, followed by the path\n" | |
361 | "\t-i or --icudatadir directory for locating any needed intermediate data files,\n" | |
362 | "\t followed by path, defaults to %s\n" | |
363 | "\tsuffix suffix that is to be appended with a '-'\n" | |
364 | "\t to the source file basenames before opening;\n" | |
365 | "\t 'genbidi new' will read UnicodeData-new.txt etc.\n", | |
366 | u_getDataDirectory()); | |
367 | return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; | |
368 | } | |
369 | ||
370 | /* get the options values */ | |
371 | beVerbose=options[VERBOSE].doesOccur; | |
372 | haveCopyright=options[COPYRIGHT].doesOccur; | |
373 | srcDir=options[SOURCEDIR].value; | |
374 | destDir=options[DESTDIR].value; | |
375 | ||
376 | if(argc>=2) { | |
377 | suffix=argv[1]; | |
378 | } else { | |
379 | suffix=NULL; | |
380 | } | |
381 | ||
382 | if(options[UNICODE_VERSION].doesOccur) { | |
383 | setUnicodeVersion(options[UNICODE_VERSION].value); | |
384 | } | |
385 | /* else use the default dataVersion in store.c */ | |
386 | ||
387 | if (options[ICUDATADIR].doesOccur) { | |
388 | u_setDataDirectory(options[ICUDATADIR].value); | |
389 | } | |
390 | ||
391 | /* prepare the filename beginning with the source dir */ | |
392 | uprv_strcpy(filename, srcDir); | |
393 | basename=filename+uprv_strlen(filename); | |
394 | if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) { | |
395 | *basename++=U_FILE_SEP_CHAR; | |
396 | } | |
397 | ||
398 | /* initialize */ | |
399 | pv=upvec_open(2, 10000); | |
400 | ||
401 | /* process BidiMirroring.txt */ | |
402 | writeUCDFilename(basename, "BidiMirroring", suffix); | |
403 | parseBidiMirroring(filename, &errorCode); | |
404 | ||
405 | /* process additional properties files */ | |
406 | *basename=0; | |
407 | ||
408 | parseBinariesFile(filename, basename, suffix, &propListBinaries, &errorCode); | |
409 | ||
410 | parseSingleEnumFile(filename, basename, suffix, &jtSingleEnum, &errorCode); | |
411 | ||
412 | parseSingleEnumFile(filename, basename, suffix, &jgSingleEnum, &errorCode); | |
413 | ||
414 | /* process UnicodeData.txt */ | |
415 | writeUCDFilename(basename, "UnicodeData", suffix); | |
416 | parseDB(filename, &errorCode); | |
417 | ||
418 | /* set proper bidi class for unassigned code points (Cn) */ | |
419 | parseTwoFieldFile(filename, basename, "DerivedBidiClass", suffix, bidiClassLineFn, &errorCode); | |
420 | ||
421 | /* process parsed data */ | |
422 | if(U_SUCCESS(errorCode)) { | |
423 | /* write the properties data file */ | |
424 | generateData(destDir, options[CSOURCE].doesOccur); | |
425 | } | |
426 | ||
427 | u_cleanup(); | |
428 | return errorCode; | |
429 | } | |
430 | ||
431 | U_CFUNC void | |
432 | writeUCDFilename(char *basename, const char *filename, const char *suffix) { | |
433 | int32_t length=(int32_t)uprv_strlen(filename); | |
434 | uprv_strcpy(basename, filename); | |
435 | if(suffix!=NULL) { | |
436 | basename[length++]='-'; | |
437 | uprv_strcpy(basename+length, suffix); | |
438 | length+=(int32_t)uprv_strlen(suffix); | |
439 | } | |
440 | uprv_strcpy(basename+length, ".txt"); | |
441 | } | |
442 | ||
443 | /* TODO: move to toolutil */ | |
444 | static UBool | |
445 | isToken(const char *token, const char *s) { | |
446 | const char *z; | |
447 | int32_t j; | |
448 | ||
449 | s=u_skipWhitespace(s); | |
450 | for(j=0;; ++j) { | |
451 | if(token[j]!=0) { | |
452 | if(s[j]!=token[j]) { | |
453 | break; | |
454 | } | |
455 | } else { | |
456 | z=u_skipWhitespace(s+j); | |
457 | if(*z==';' || *z==0) { | |
458 | return TRUE; | |
459 | } else { | |
460 | break; | |
461 | } | |
462 | } | |
463 | } | |
464 | ||
465 | return FALSE; | |
466 | } | |
467 | ||
468 | /* parser for BidiMirroring.txt --------------------------------------------- */ | |
469 | ||
470 | static void U_CALLCONV | |
471 | mirrorLineFn(void *context, | |
472 | char *fields[][2], int32_t fieldCount, | |
473 | UErrorCode *pErrorCode) { | |
474 | char *end; | |
475 | UChar32 src, mirror; | |
476 | ||
477 | src=(UChar32)uprv_strtoul(fields[0][0], &end, 16); | |
478 | if(end<=fields[0][0] || end!=fields[0][1]) { | |
479 | fprintf(stderr, "genbidi: syntax error in BidiMirroring.txt field 0 at %s\n", fields[0][0]); | |
480 | *pErrorCode=U_PARSE_ERROR; | |
481 | exit(U_PARSE_ERROR); | |
482 | } | |
483 | ||
484 | mirror=(UChar32)uprv_strtoul(fields[1][0], &end, 16); | |
485 | if(end<=fields[1][0] || end!=fields[1][1]) { | |
486 | fprintf(stderr, "genbidi: syntax error in BidiMirroring.txt field 1 at %s\n", fields[1][0]); | |
487 | *pErrorCode=U_PARSE_ERROR; | |
488 | exit(U_PARSE_ERROR); | |
489 | } | |
490 | ||
491 | addMirror(src, mirror); | |
492 | } | |
493 | ||
494 | static void | |
495 | parseBidiMirroring(const char *filename, UErrorCode *pErrorCode) { | |
496 | char *fields[2][2]; | |
497 | ||
498 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
499 | return; | |
500 | } | |
501 | ||
502 | u_parseDelimitedFile(filename, ';', fields, 2, mirrorLineFn, NULL, pErrorCode); | |
503 | } | |
504 | ||
505 | /* parser for UnicodeData.txt ----------------------------------------------- */ | |
506 | ||
507 | static void U_CALLCONV | |
508 | unicodeDataLineFn(void *context, | |
509 | char *fields[][2], int32_t fieldCount, | |
510 | UErrorCode *pErrorCode) { | |
511 | char *end; | |
512 | UErrorCode errorCode; | |
513 | UChar32 c; | |
514 | ||
515 | errorCode=U_ZERO_ERROR; | |
516 | ||
517 | /* get the character code, field 0 */ | |
518 | c=(UChar32)uprv_strtoul(fields[0][0], &end, 16); | |
519 | if(end<=fields[0][0] || end!=fields[0][1]) { | |
520 | fprintf(stderr, "genbidi: syntax error in field 0 at %s\n", fields[0][0]); | |
521 | *pErrorCode=U_PARSE_ERROR; | |
522 | exit(U_PARSE_ERROR); | |
523 | } | |
524 | ||
525 | /* get Mirrored flag, field 9 */ | |
526 | if(*fields[9][0]=='Y') { | |
527 | if(!upvec_setValue(pv, c, c+1, 0, U_MASK(UBIDI_IS_MIRRORED_SHIFT), U_MASK(UBIDI_IS_MIRRORED_SHIFT), &errorCode)) { | |
528 | fprintf(stderr, "genbidi error: unable to set 'is mirrored' for U+%04lx, code: %s\n", | |
529 | (long)c, u_errorName(errorCode)); | |
530 | exit(errorCode); | |
531 | } | |
532 | } else if(fields[9][1]-fields[9][0]!=1 || *fields[9][0]!='N') { | |
533 | fprintf(stderr, "genbidi: syntax error in field 9 at U+%04lx\n", | |
534 | (long)c); | |
535 | *pErrorCode=U_PARSE_ERROR; | |
536 | exit(U_PARSE_ERROR); | |
537 | } | |
538 | } | |
539 | ||
540 | static void | |
541 | parseDB(const char *filename, UErrorCode *pErrorCode) { | |
542 | /* default Bidi classes for unassigned code points */ | |
543 | static const UChar32 defaultBidi[][3]={ /* { start, end, class } */ | |
544 | /* R: U+0590..U+05FF, U+07C0..U+08FF, U+FB1D..U+FB4F, U+10800..U+10FFF */ | |
545 | { 0x0590, 0x05FF, U_RIGHT_TO_LEFT }, | |
546 | { 0x07C0, 0x08FF, U_RIGHT_TO_LEFT }, | |
547 | { 0xFB1D, 0xFB4F, U_RIGHT_TO_LEFT }, | |
548 | { 0x10800, 0x10FFF, U_RIGHT_TO_LEFT }, | |
549 | ||
550 | /* AL: U+0600..U+07BF, U+FB50..U+FDCF, U+FDF0..U+FDFF, U+FE70..U+FEFE */ | |
551 | { 0x0600, 0x07BF, U_RIGHT_TO_LEFT_ARABIC }, | |
552 | { 0xFB50, 0xFDCF, U_RIGHT_TO_LEFT_ARABIC }, | |
553 | { 0xFDF0, 0xFDFF, U_RIGHT_TO_LEFT_ARABIC }, | |
554 | { 0xFE70, 0xFEFE, U_RIGHT_TO_LEFT_ARABIC } | |
555 | ||
556 | /* L otherwise */ | |
557 | }; | |
558 | ||
559 | char *fields[15][2]; | |
560 | UChar32 start, end; | |
561 | int32_t i; | |
562 | ||
563 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
564 | return; | |
565 | } | |
566 | ||
567 | /* | |
568 | * Set default Bidi classes for unassigned code points. | |
569 | * See the documentation for Bidi_Class in UCD.html in the Unicode data. | |
570 | * http://www.unicode.org/Public/ | |
571 | * | |
572 | * Starting with Unicode 5.0, DerivedBidiClass.txt should (re)set | |
573 | * the Bidi_Class values for all code points including unassigned ones | |
574 | * and including L values for these. | |
575 | * This code becomes unnecesary but harmless. Leave it for now in case | |
576 | * someone uses genbidi on pre-Unicode 5.0 data. | |
577 | */ | |
578 | for(i=0; i<LENGTHOF(defaultBidi); ++i) { | |
579 | start=defaultBidi[i][0]; | |
580 | end=defaultBidi[i][1]; | |
581 | if(!upvec_setValue(pv, start, end+1, 0, (uint32_t)defaultBidi[i][2], UBIDI_CLASS_MASK, pErrorCode)) { | |
582 | fprintf(stderr, "genbidi error: unable to set default bidi class for U+%04lx..U+%04lx, code: %s\n", | |
583 | (long)start, (long)end, u_errorName(*pErrorCode)); | |
584 | exit(*pErrorCode); | |
585 | } | |
586 | } | |
587 | ||
588 | u_parseDelimitedFile(filename, ';', fields, 15, unicodeDataLineFn, NULL, pErrorCode); | |
589 | ||
590 | if(U_FAILURE(*pErrorCode)) { | |
591 | return; | |
592 | } | |
593 | } | |
594 | ||
595 | /* DerivedBidiClass.txt ----------------------------------------------------- */ | |
596 | ||
597 | static void U_CALLCONV | |
598 | bidiClassLineFn(void *context, | |
599 | char *fields[][2], int32_t fieldCount, | |
600 | UErrorCode *pErrorCode) { | |
601 | char *s; | |
602 | uint32_t start, limit, value; | |
603 | ||
604 | /* get the code point range */ | |
605 | u_parseCodePointRange(fields[0][0], &start, &limit, pErrorCode); | |
606 | if(U_FAILURE(*pErrorCode)) { | |
607 | fprintf(stderr, "genbidi: syntax error in DerivedBidiClass.txt field 0 at %s\n", fields[0][0]); | |
608 | exit(*pErrorCode); | |
609 | } | |
610 | ++limit; | |
611 | ||
612 | /* parse bidi class */ | |
613 | s=trimTerminateField(fields[1][0], fields[1][1]); | |
614 | value=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, s); | |
615 | if((int32_t)value<0) { | |
616 | fprintf(stderr, "genbidi error: unknown bidi class in DerivedBidiClass.txt field 1 at %s\n", s); | |
617 | exit(U_PARSE_ERROR); | |
618 | } | |
619 | ||
620 | if(!upvec_setValue(pv, start, limit, 0, value, UBIDI_CLASS_MASK, pErrorCode)) { | |
621 | fprintf(stderr, "genbidi error: unable to set derived bidi class for U+%04x..U+%04x - %s\n", | |
622 | (int)start, (int)limit-1, u_errorName(*pErrorCode)); | |
623 | exit(*pErrorCode); | |
624 | } | |
625 | } | |
626 | ||
627 | /* | |
628 | * Hey, Emacs, please set the following: | |
629 | * | |
630 | * Local Variables: | |
631 | * indent-tabs-mode: nil | |
632 | * End: | |
633 | * | |
634 | */ |