]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
73c04bcf | 4 | * Copyright (C) 2001-2005, International Business Machines |
b75a7d8f A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: gennorm.c | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2001may25 | |
14 | * created by: Markus W. Scherer | |
15 | * | |
16 | * This program reads the Unicode character database text file, | |
17 | * parses it, and extracts the data for normalization. | |
18 | * It then preprocesses it and writes a binary file for efficient use | |
19 | * in various Unicode text normalization processes. | |
20 | */ | |
21 | ||
22 | #include <stdio.h> | |
23 | #include <stdlib.h> | |
24 | #include "unicode/utypes.h" | |
25 | #include "unicode/uchar.h" | |
374ca955 | 26 | #include "unicode/ustring.h" |
b75a7d8f | 27 | #include "unicode/putil.h" |
374ca955 A |
28 | #include "unicode/uclean.h" |
29 | #include "unicode/udata.h" | |
30 | #include "unicode/uset.h" | |
b75a7d8f A |
31 | #include "cmemory.h" |
32 | #include "cstring.h" | |
b75a7d8f A |
33 | #include "unewdata.h" |
34 | #include "uoptions.h" | |
35 | #include "uparse.h" | |
36 | #include "unormimp.h" | |
37 | ||
38 | U_CDECL_BEGIN | |
39 | #include "gennorm.h" | |
40 | U_CDECL_END | |
41 | ||
b75a7d8f A |
42 | UBool beVerbose=FALSE, haveCopyright=TRUE; |
43 | ||
44 | /* prototypes --------------------------------------------------------------- */ | |
45 | ||
46 | static void | |
47 | parseDerivedNormalizationProperties(const char *filename, UErrorCode *pErrorCode, UBool reportError); | |
48 | ||
49 | static void | |
50 | parseDB(const char *filename, UErrorCode *pErrorCode); | |
51 | ||
52 | /* -------------------------------------------------------------------------- */ | |
53 | ||
374ca955 A |
54 | enum { |
55 | HELP_H, | |
56 | HELP_QUESTION_MARK, | |
57 | VERBOSE, | |
58 | COPYRIGHT, | |
59 | DESTDIR, | |
60 | SOURCEDIR, | |
61 | UNICODE_VERSION, | |
73c04bcf A |
62 | ICUDATADIR, |
63 | CSOURCE, | |
64 | STORE_FLAGS | |
374ca955 A |
65 | }; |
66 | ||
b75a7d8f A |
67 | static UOption options[]={ |
68 | UOPTION_HELP_H, | |
69 | UOPTION_HELP_QUESTION_MARK, | |
70 | UOPTION_VERBOSE, | |
71 | UOPTION_COPYRIGHT, | |
72 | UOPTION_DESTDIR, | |
73 | UOPTION_SOURCEDIR, | |
73c04bcf A |
74 | UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG), |
75 | UOPTION_ICUDATADIR, | |
76 | UOPTION_DEF("csource", 'C', UOPT_NO_ARG), | |
77 | UOPTION_DEF("prune", 'p', UOPT_REQUIRES_ARG) | |
b75a7d8f A |
78 | }; |
79 | ||
80 | extern int | |
81 | main(int argc, char* argv[]) { | |
82 | #if !UCONFIG_NO_NORMALIZATION | |
83 | char filename[300]; | |
84 | #endif | |
85 | const char *srcDir=NULL, *destDir=NULL, *suffix=NULL; | |
86 | char *basename=NULL; | |
87 | UErrorCode errorCode=U_ZERO_ERROR; | |
88 | ||
89 | U_MAIN_INIT_ARGS(argc, argv); | |
90 | ||
91 | /* preset then read command line options */ | |
92 | options[4].value=u_getDataDirectory(); | |
93 | options[5].value=""; | |
94 | options[6].value="3.0.0"; | |
374ca955 | 95 | options[ICUDATADIR].value=u_getDataDirectory(); |
b75a7d8f A |
96 | argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); |
97 | ||
98 | /* error handling, printing usage message */ | |
99 | if(argc<0) { | |
100 | fprintf(stderr, | |
101 | "error in command line argument \"%s\"\n", | |
102 | argv[-argc]); | |
103 | } | |
104 | if(argc<0 || options[0].doesOccur || options[1].doesOccur) { | |
105 | /* | |
106 | * Broken into chucks because the C89 standard says the minimum | |
107 | * required supported string length is 509 bytes. | |
108 | */ | |
109 | fprintf(stderr, | |
110 | "Usage: %s [-options] [suffix]\n" | |
111 | "\n" | |
112 | "Read the UnicodeData.txt file and other Unicode properties files and\n" | |
113 | "create a binary file " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE " with the normalization data\n" | |
114 | "\n", | |
115 | argv[0]); | |
116 | fprintf(stderr, | |
117 | "Options:\n" | |
118 | "\t-h or -? or --help this usage text\n" | |
119 | "\t-v or --verbose verbose output\n" | |
120 | "\t-c or --copyright include a copyright notice\n" | |
73c04bcf A |
121 | "\t-u or --unicode Unicode version, followed by the version like 3.0.0\n" |
122 | "\t-C or --csource generate a .c source file rather than the .icu binary\n"); | |
123 | fprintf(stderr, | |
124 | "\t-p or --prune flags Prune for data modularization:\n" | |
125 | "\t Determine what data is to be stored.\n" | |
126 | "\t 0 (zero) stores minimal data (only for NFD)\n" | |
127 | "\t lowercase letters turn off data, uppercase turn on (use with 0)\n"); | |
128 | fprintf(stderr, | |
129 | "\t k: compatibility decompositions (NFKC, NFKD)\n" | |
130 | "\t c: composition data (NFC, NFKC)\n" | |
131 | "\t f: FCD data (will be generated at load time)\n" | |
132 | "\t a: auxiliary data (canonical closure etc.)\n" | |
133 | "\t x: exclusion sets (Unicode 3.2-level normalization)\n"); | |
b75a7d8f A |
134 | fprintf(stderr, |
135 | "\t-d or --destdir destination directory, followed by the path\n" | |
136 | "\t-s or --sourcedir source directory, followed by the path\n" | |
374ca955 A |
137 | "\t-i or --icudatadir directory for locating any needed intermediate data files,\n" |
138 | "\t followed by path, defaults to <%s>\n" | |
b75a7d8f A |
139 | "\tsuffix suffix that is to be appended with a '-'\n" |
140 | "\t to the source file basenames before opening;\n" | |
374ca955 A |
141 | "\t 'gennorm new' will read UnicodeData-new.txt etc.\n", |
142 | u_getDataDirectory()); | |
b75a7d8f A |
143 | return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; |
144 | } | |
145 | ||
146 | /* get the options values */ | |
147 | beVerbose=options[2].doesOccur; | |
148 | haveCopyright=options[3].doesOccur; | |
149 | srcDir=options[5].value; | |
150 | destDir=options[4].value; | |
151 | ||
152 | if(argc>=2) { | |
153 | suffix=argv[1]; | |
154 | } else { | |
155 | suffix=NULL; | |
156 | } | |
157 | ||
158 | #if UCONFIG_NO_NORMALIZATION | |
159 | ||
160 | fprintf(stderr, | |
161 | "gennorm writes a dummy " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE | |
162 | " because UCONFIG_NO_NORMALIZATION is set, \n" | |
163 | "see icu/source/common/unicode/uconfig.h\n"); | |
73c04bcf | 164 | generateData(destDir, options[CSOURCE].doesOccur); |
b75a7d8f A |
165 | |
166 | #else | |
167 | ||
168 | setUnicodeVersion(options[6].value); | |
169 | ||
374ca955 A |
170 | if (options[ICUDATADIR].doesOccur) { |
171 | u_setDataDirectory(options[ICUDATADIR].value); | |
172 | } | |
173 | ||
73c04bcf A |
174 | if(options[STORE_FLAGS].doesOccur) { |
175 | const char *s=options[STORE_FLAGS].value; | |
176 | char c; | |
177 | ||
178 | while((c=*s++)!=0) { | |
179 | switch(c) { | |
180 | case '0': | |
181 | gStoreFlags=0; /* store minimal data (only for NFD) */ | |
182 | break; | |
183 | ||
184 | /* lowercase letters: omit data */ | |
185 | case 'k': | |
186 | gStoreFlags&=~U_MASK(UGENNORM_STORE_COMPAT); | |
187 | break; | |
188 | case 'c': | |
189 | gStoreFlags&=~U_MASK(UGENNORM_STORE_COMPOSITION); | |
190 | break; | |
191 | case 'f': | |
192 | gStoreFlags&=~U_MASK(UGENNORM_STORE_FCD); | |
193 | break; | |
194 | case 'a': | |
195 | gStoreFlags&=~U_MASK(UGENNORM_STORE_AUX); | |
196 | break; | |
197 | case 'x': | |
198 | gStoreFlags&=~U_MASK(UGENNORM_STORE_EXCLUSIONS); | |
199 | break; | |
200 | ||
201 | /* uppercase letters: include data (use with 0) */ | |
202 | case 'K': | |
203 | gStoreFlags|=U_MASK(UGENNORM_STORE_COMPAT); | |
204 | break; | |
205 | case 'C': | |
206 | gStoreFlags|=U_MASK(UGENNORM_STORE_COMPOSITION); | |
207 | break; | |
208 | case 'F': | |
209 | gStoreFlags|=U_MASK(UGENNORM_STORE_FCD); | |
210 | break; | |
211 | case 'A': | |
212 | gStoreFlags|=U_MASK(UGENNORM_STORE_AUX); | |
213 | break; | |
214 | case 'X': | |
215 | gStoreFlags|=U_MASK(UGENNORM_STORE_EXCLUSIONS); | |
216 | break; | |
217 | ||
218 | default: | |
219 | fprintf(stderr, "ignoring undefined prune flag '%c'\n", c); | |
220 | break; | |
221 | } | |
222 | } | |
223 | } | |
224 | ||
374ca955 A |
225 | /* |
226 | * Verify that we can work with properties | |
227 | * but don't call u_init() because that needs unorm.icu which we are just | |
228 | * going to build here. | |
229 | */ | |
230 | { | |
231 | U_STRING_DECL(ideo, "[:Ideographic:]", 15); | |
232 | USet *set; | |
233 | ||
234 | U_STRING_INIT(ideo, "[:Ideographic:]", 15); | |
235 | set=uset_openPattern(ideo, -1, &errorCode); | |
236 | if(U_FAILURE(errorCode) || !uset_contains(set, 0xf900)) { | |
237 | fprintf(stderr, "gennorm is unable to work with properties (uprops.icu): %s\n", u_errorName(errorCode)); | |
238 | exit(errorCode); | |
239 | } | |
240 | uset_close(set); | |
241 | } | |
242 | ||
b75a7d8f A |
243 | /* prepare the filename beginning with the source dir */ |
244 | uprv_strcpy(filename, srcDir); | |
245 | basename=filename+uprv_strlen(filename); | |
246 | if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) { | |
247 | *basename++=U_FILE_SEP_CHAR; | |
248 | } | |
249 | ||
250 | /* initialize */ | |
251 | init(); | |
252 | ||
253 | /* process DerivedNormalizationProps.txt (name changed for Unicode 3.2, to <=31 characters) */ | |
254 | if(suffix==NULL) { | |
255 | uprv_strcpy(basename, "DerivedNormalizationProps.txt"); | |
256 | } else { | |
257 | uprv_strcpy(basename, "DerivedNormalizationProps"); | |
258 | basename[30]='-'; | |
259 | uprv_strcpy(basename+31, suffix); | |
260 | uprv_strcat(basename+31, ".txt"); | |
261 | } | |
262 | parseDerivedNormalizationProperties(filename, &errorCode, FALSE); | |
263 | if(U_FAILURE(errorCode)) { | |
264 | /* can be only U_FILE_ACCESS_ERROR - try filename from before Unicode 3.2 */ | |
265 | if(suffix==NULL) { | |
266 | uprv_strcpy(basename, "DerivedNormalizationProperties.txt"); | |
267 | } else { | |
268 | uprv_strcpy(basename, "DerivedNormalizationProperties"); | |
269 | basename[30]='-'; | |
270 | uprv_strcpy(basename+31, suffix); | |
271 | uprv_strcat(basename+31, ".txt"); | |
272 | } | |
273 | parseDerivedNormalizationProperties(filename, &errorCode, TRUE); | |
274 | } | |
275 | ||
276 | /* process UnicodeData.txt */ | |
277 | if(suffix==NULL) { | |
278 | uprv_strcpy(basename, "UnicodeData.txt"); | |
279 | } else { | |
280 | uprv_strcpy(basename, "UnicodeData"); | |
281 | basename[11]='-'; | |
282 | uprv_strcpy(basename+12, suffix); | |
283 | uprv_strcat(basename+12, ".txt"); | |
284 | } | |
285 | parseDB(filename, &errorCode); | |
286 | ||
287 | /* process parsed data */ | |
288 | if(U_SUCCESS(errorCode)) { | |
289 | processData(); | |
290 | ||
291 | /* write the properties data file */ | |
73c04bcf | 292 | generateData(destDir, options[CSOURCE].doesOccur); |
b75a7d8f A |
293 | |
294 | cleanUpData(); | |
295 | } | |
296 | ||
297 | #endif | |
298 | ||
299 | return errorCode; | |
300 | } | |
301 | ||
302 | #if !UCONFIG_NO_NORMALIZATION | |
303 | ||
304 | /* parser for DerivedNormalizationProperties.txt ---------------------------- */ | |
305 | ||
306 | static void U_CALLCONV | |
307 | derivedNormalizationPropertiesLineFn(void *context, | |
308 | char *fields[][2], int32_t fieldCount, | |
309 | UErrorCode *pErrorCode) { | |
310 | UChar string[32]; | |
311 | char *s; | |
312 | uint32_t start, end; | |
313 | int32_t count; | |
314 | uint8_t qcFlags; | |
315 | ||
316 | /* get code point range */ | |
317 | count=u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode); | |
318 | if(U_FAILURE(*pErrorCode)) { | |
319 | fprintf(stderr, "gennorm: error parsing DerivedNormalizationProperties.txt mapping at %s\n", fields[0][0]); | |
320 | exit(*pErrorCode); | |
321 | } | |
322 | ||
323 | /* ignore hangul - handle explicitly */ | |
324 | if(start==0xac00) { | |
325 | return; | |
326 | } | |
327 | ||
328 | /* get property - ignore unrecognized ones */ | |
329 | s=(char *)u_skipWhitespace(fields[1][0]); | |
330 | if(*s=='N' && s[1]=='F') { | |
331 | /* quick check flag */ | |
332 | qcFlags=0x11; | |
333 | s+=2; | |
334 | if(*s=='K') { | |
335 | qcFlags<<=1; | |
336 | ++s; | |
337 | } | |
338 | ||
339 | if(*s=='C' && s[1]=='_') { | |
340 | s+=2; | |
341 | } else if(*s=='D' && s[1]=='_') { | |
342 | qcFlags<<=2; | |
343 | s+=2; | |
344 | } else { | |
345 | return; | |
346 | } | |
347 | ||
374ca955 | 348 | if(0==uprv_strncmp(s, "NO", 2)) { |
b75a7d8f | 349 | qcFlags&=0xf; |
374ca955 | 350 | } else if(0==uprv_strncmp(s, "MAYBE", 5)) { |
b75a7d8f | 351 | qcFlags&=0x30; |
374ca955 A |
352 | } else if(0==uprv_strncmp(s, "QC", 2) && *(s=(char *)u_skipWhitespace(s+2))==';') { |
353 | /* | |
354 | * Unicode 4.0.1: | |
355 | * changes single field "NFD_NO" -> two fields "NFD_QC; N" etc. | |
356 | */ | |
357 | /* start of the field */ | |
358 | s=(char *)u_skipWhitespace(s+1); | |
359 | if(*s=='N') { | |
360 | qcFlags&=0xf; | |
361 | } else if(*s=='M') { | |
362 | qcFlags&=0x30; | |
363 | } else { | |
364 | return; /* do nothing for "Yes" because it's the default value */ | |
365 | } | |
b75a7d8f | 366 | } else { |
374ca955 | 367 | return; /* do nothing for "Yes" because it's the default value */ |
b75a7d8f A |
368 | } |
369 | ||
370 | /* set this flag for all code points in this range */ | |
371 | while(start<=end) { | |
372 | setQCFlags(start++, qcFlags); | |
373 | } | |
374 | } else if(0==uprv_memcmp(s, "Comp_Ex", 7) || 0==uprv_memcmp(s, "Full_Composition_Exclusion", 26)) { | |
375 | /* full composition exclusion */ | |
376 | while(start<=end) { | |
377 | setCompositionExclusion(start++); | |
378 | } | |
374ca955 A |
379 | } else if( |
380 | ((0==uprv_memcmp(s, "FNC", 3) && *(s=(char *)u_skipWhitespace(s+3))==';') || | |
381 | (0==uprv_memcmp(s, "FC_NFKC", 7) && *(s=(char *)u_skipWhitespace(s+7))==';')) | |
382 | ||
383 | ) { | |
b75a7d8f A |
384 | /* FC_NFKC_Closure, parse field 2 to get the string */ |
385 | char *t; | |
386 | ||
387 | /* start of the field */ | |
388 | s=(char *)u_skipWhitespace(s+1); | |
389 | ||
390 | /* find the end of the field */ | |
391 | for(t=s; *t!=';' && *t!='#' && *t!=0 && *t!='\n' && *t!='\r'; ++t) {} | |
392 | *t=0; | |
393 | ||
394 | string[0]=(UChar)u_parseString(s, string+1, 31, NULL, pErrorCode); | |
395 | if(U_FAILURE(*pErrorCode)) { | |
396 | fprintf(stderr, "gennorm error: illegal FNC string at %s\n", fields[0][0]); | |
397 | exit(*pErrorCode); | |
398 | } | |
399 | while(start<=end) { | |
400 | setFNC(start++, string); | |
401 | } | |
402 | } | |
403 | } | |
404 | ||
405 | static void | |
406 | parseDerivedNormalizationProperties(const char *filename, UErrorCode *pErrorCode, UBool reportError) { | |
407 | char *fields[2][2]; | |
408 | ||
409 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
410 | return; | |
411 | } | |
412 | ||
413 | u_parseDelimitedFile(filename, ';', fields, 2, derivedNormalizationPropertiesLineFn, NULL, pErrorCode); | |
414 | if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) { | |
415 | fprintf(stderr, "gennorm error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode)); | |
416 | exit(*pErrorCode); | |
417 | } | |
418 | } | |
419 | ||
420 | /* parser for UnicodeData.txt ----------------------------------------------- */ | |
421 | ||
422 | static void U_CALLCONV | |
423 | unicodeDataLineFn(void *context, | |
424 | char *fields[][2], int32_t fieldCount, | |
425 | UErrorCode *pErrorCode) { | |
426 | uint32_t decomp[40]; | |
427 | Norm norm; | |
428 | const char *s; | |
429 | char *end; | |
430 | uint32_t code, value; | |
431 | int32_t length; | |
432 | UBool isCompat, something=FALSE; | |
433 | ||
434 | /* ignore First and Last entries for ranges */ | |
435 | if( *fields[1][0]=='<' && | |
436 | (length=(int32_t)(fields[1][1]-fields[1][0]))>=9 && | |
437 | (0==uprv_memcmp(", First>", fields[1][1]-8, 8) || 0==uprv_memcmp(", Last>", fields[1][1]-7, 7)) | |
438 | ) { | |
439 | return; | |
440 | } | |
441 | ||
442 | /* reset the properties */ | |
443 | uprv_memset(&norm, 0, sizeof(Norm)); | |
444 | ||
73c04bcf A |
445 | /* |
446 | * The combiningIndex must not be initialized to 0 because 0 is the | |
447 | * combiningIndex of the first forward-combining character. | |
448 | */ | |
449 | norm.combiningIndex=0xffff; | |
450 | ||
b75a7d8f A |
451 | /* get the character code, field 0 */ |
452 | code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16); | |
453 | if(end<=fields[0][0] || end!=fields[0][1]) { | |
454 | fprintf(stderr, "gennorm: syntax error in field 0 at %s\n", fields[0][0]); | |
455 | *pErrorCode=U_PARSE_ERROR; | |
456 | exit(U_PARSE_ERROR); | |
457 | } | |
458 | ||
459 | /* get canonical combining class, field 3 */ | |
460 | value=(uint32_t)uprv_strtoul(fields[3][0], &end, 10); | |
461 | if(end<=fields[3][0] || end!=fields[3][1] || value>0xff) { | |
462 | fprintf(stderr, "gennorm: syntax error in field 3 at %s\n", fields[0][0]); | |
463 | *pErrorCode=U_PARSE_ERROR; | |
464 | exit(U_PARSE_ERROR); | |
465 | } | |
466 | if(value>0) { | |
467 | norm.udataCC=(uint8_t)value; | |
468 | something=TRUE; | |
469 | } | |
470 | ||
471 | /* get the decomposition, field 5 */ | |
472 | if(fields[5][0]<fields[5][1]) { | |
473 | if(*(s=fields[5][0])=='<') { | |
474 | ++s; | |
475 | isCompat=TRUE; | |
476 | ||
477 | /* skip and ignore the compatibility type name */ | |
478 | do { | |
479 | if(s==fields[5][1]) { | |
480 | /* missing '>' */ | |
481 | fprintf(stderr, "gennorm: syntax error in field 5 at %s\n", fields[0][0]); | |
482 | *pErrorCode=U_PARSE_ERROR; | |
483 | exit(U_PARSE_ERROR); | |
484 | } | |
485 | } while(*s++!='>'); | |
486 | } else { | |
487 | isCompat=FALSE; | |
488 | } | |
489 | ||
490 | /* parse the decomposition string */ | |
491 | length=u_parseCodePoints(s, decomp, sizeof(decomp)/4, pErrorCode); | |
492 | if(U_FAILURE(*pErrorCode)) { | |
493 | fprintf(stderr, "gennorm error parsing UnicodeData.txt decomposition of U+%04lx - %s\n", | |
494 | (long)code, u_errorName(*pErrorCode)); | |
495 | exit(*pErrorCode); | |
496 | } | |
497 | ||
498 | /* store the string */ | |
499 | if(length>0) { | |
500 | something=TRUE; | |
501 | if(isCompat) { | |
502 | norm.lenNFKD=(uint8_t)length; | |
503 | norm.nfkd=decomp; | |
504 | } else { | |
505 | if(length>2) { | |
506 | fprintf(stderr, "gennorm: error - length of NFD(U+%04lx) = %ld >2 in UnicodeData - illegal\n", | |
507 | (long)code, (long)length); | |
508 | *pErrorCode=U_PARSE_ERROR; | |
509 | exit(U_PARSE_ERROR); | |
510 | } | |
511 | norm.lenNFD=(uint8_t)length; | |
512 | norm.nfd=decomp; | |
513 | } | |
514 | } | |
515 | } | |
516 | ||
517 | /* check for non-character code points */ | |
518 | if((code&0xfffe)==0xfffe || (uint32_t)(code-0xfdd0)<0x20 || code>0x10ffff) { | |
519 | fprintf(stderr, "gennorm: error - properties for non-character code point U+%04lx\n", | |
520 | (long)code); | |
521 | *pErrorCode=U_PARSE_ERROR; | |
522 | exit(U_PARSE_ERROR); | |
523 | } | |
524 | ||
525 | if(something) { | |
526 | /* there are normalization values, so store them */ | |
527 | #if 0 | |
528 | if(beVerbose) { | |
529 | printf("store values for U+%04lx: cc=%d, lenNFD=%ld, lenNFKD=%ld\n", | |
530 | (long)code, norm.udataCC, (long)norm.lenNFD, (long)norm.lenNFKD); | |
531 | } | |
532 | #endif | |
533 | storeNorm(code, &norm); | |
534 | } | |
535 | } | |
536 | ||
537 | static void | |
538 | parseDB(const char *filename, UErrorCode *pErrorCode) { | |
539 | char *fields[15][2]; | |
540 | ||
541 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
542 | return; | |
543 | } | |
544 | ||
545 | u_parseDelimitedFile(filename, ';', fields, 15, unicodeDataLineFn, NULL, pErrorCode); | |
546 | if(U_FAILURE(*pErrorCode)) { | |
547 | fprintf(stderr, "gennorm error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode)); | |
548 | exit(*pErrorCode); | |
549 | } | |
550 | } | |
551 | ||
552 | #endif /* #if !UCONFIG_NO_NORMALIZATION */ | |
553 | ||
554 | /* | |
555 | * Hey, Emacs, please set the following: | |
556 | * | |
557 | * Local Variables: | |
558 | * indent-tabs-mode: nil | |
559 | * End: | |
560 | * | |
561 | */ |