]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ******************************************************************************** | |
3 | * | |
73c04bcf | 4 | * Copyright (C) 1998-2006, International Business Machines |
b75a7d8f A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ******************************************************************************** | |
8 | * | |
9 | * | |
10 | * makeconv.c: | |
11 | * tool creating a binary (compressed) representation of the conversion mapping | |
12 | * table (IBM NLTC ucmap format). | |
13 | * | |
14 | * 05/04/2000 helena Added fallback mapping into the picture... | |
15 | * 06/29/2000 helena Major rewrite of the callback APIs. | |
16 | */ | |
17 | ||
18 | #include <stdio.h> | |
19 | #include "unicode/putil.h" | |
b75a7d8f A |
20 | #include "unicode/ucnv_err.h" |
21 | #include "ucnv_bld.h" | |
22 | #include "ucnv_imp.h" | |
23 | #include "ucnv_cnv.h" | |
24 | #include "cstring.h" | |
25 | #include "cmemory.h" | |
374ca955 | 26 | #include "uinvchar.h" |
b75a7d8f A |
27 | #include "filestrm.h" |
28 | #include "toolutil.h" | |
29 | #include "uoptions.h" | |
30 | #include "unicode/udata.h" | |
31 | #include "unewdata.h" | |
374ca955 A |
32 | #include "uparse.h" |
33 | #include "ucm.h" | |
b75a7d8f A |
34 | #include "makeconv.h" |
35 | #include "genmbcs.h" | |
36 | ||
37 | #define DEBUG 0 | |
38 | ||
73c04bcf | 39 | |
374ca955 A |
40 | typedef struct ConvData { |
41 | UCMFile *ucm; | |
42 | NewConverter *cnvData, *extData; | |
43 | UConverterSharedData sharedData; | |
44 | UConverterStaticData staticData; | |
45 | } ConvData; | |
46 | ||
47 | static void | |
48 | initConvData(ConvData *data) { | |
49 | uprv_memset(data, 0, sizeof(ConvData)); | |
50 | data->sharedData.structSize=sizeof(UConverterSharedData); | |
51 | data->staticData.structSize=sizeof(UConverterStaticData); | |
52 | data->sharedData.staticData=&data->staticData; | |
53 | } | |
54 | ||
55 | static void | |
56 | cleanupConvData(ConvData *data) { | |
57 | if(data!=NULL) { | |
58 | if(data->cnvData!=NULL) { | |
59 | data->cnvData->close(data->cnvData); | |
60 | data->cnvData=NULL; | |
61 | } | |
62 | if(data->extData!=NULL) { | |
63 | data->extData->close(data->extData); | |
64 | data->extData=NULL; | |
65 | } | |
66 | ucm_close(data->ucm); | |
67 | data->ucm=NULL; | |
68 | } | |
69 | } | |
70 | ||
b75a7d8f A |
71 | /* |
72 | * from ucnvstat.c - static prototypes of data-based converters | |
73 | */ | |
74 | extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]; | |
75 | ||
76 | /* | |
77 | * Global - verbosity | |
78 | */ | |
79 | UBool VERBOSE = FALSE; | |
b75a7d8f | 80 | |
374ca955 A |
81 | static void |
82 | createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode); | |
b75a7d8f A |
83 | |
84 | /* | |
85 | * Set up the UNewData and write the converter.. | |
86 | */ | |
374ca955 A |
87 | static void |
88 | writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status); | |
b75a7d8f A |
89 | |
90 | UBool haveCopyright=TRUE; | |
91 | ||
92 | static UDataInfo dataInfo={ | |
93 | sizeof(UDataInfo), | |
94 | 0, | |
95 | ||
96 | U_IS_BIG_ENDIAN, | |
97 | U_CHARSET_FAMILY, | |
98 | sizeof(UChar), | |
99 | 0, | |
100 | ||
101 | {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */ | |
102 | {6, 2, 0, 0}, /* formatVersion */ | |
103 | {0, 0, 0, 0} /* dataVersion (calculated at runtime) */ | |
104 | }; | |
105 | ||
374ca955 A |
106 | static void |
107 | writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status) | |
b75a7d8f A |
108 | { |
109 | UNewDataMemory *mem = NULL; | |
110 | uint32_t sz2; | |
111 | uint32_t size = 0; | |
374ca955 | 112 | int32_t tableType; |
b75a7d8f A |
113 | |
114 | if(U_FAILURE(*status)) | |
115 | { | |
116 | return; | |
117 | } | |
118 | ||
374ca955 A |
119 | tableType=TABLE_NONE; |
120 | if(data->cnvData!=NULL) { | |
121 | tableType|=TABLE_BASE; | |
122 | } | |
123 | if(data->extData!=NULL) { | |
124 | tableType|=TABLE_EXT; | |
125 | } | |
126 | ||
b75a7d8f A |
127 | mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status); |
128 | ||
129 | if(U_FAILURE(*status)) | |
130 | { | |
131 | fprintf(stderr, "Couldn't create the udata %s.%s: %s\n", | |
132 | cnvName, | |
133 | "cnv", | |
134 | u_errorName(*status)); | |
135 | return; | |
136 | } | |
137 | ||
138 | if(VERBOSE) | |
139 | { | |
140 | fprintf(stderr, "- Opened udata %s.%s\n", cnvName, "cnv"); | |
141 | } | |
142 | ||
374ca955 | 143 | |
b75a7d8f | 144 | /* all read only, clean, platform independent data. Mmmm. :) */ |
374ca955 | 145 | udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData)); |
b75a7d8f A |
146 | size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */ |
147 | /* Now, write the table */ | |
374ca955 A |
148 | if(tableType&TABLE_BASE) { |
149 | size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType); | |
150 | } | |
151 | if(tableType&TABLE_EXT) { | |
152 | size += data->extData->write(data->extData, &data->staticData, mem, tableType); | |
153 | } | |
b75a7d8f A |
154 | |
155 | sz2 = udata_finish(mem, status); | |
156 | if(size != sz2) | |
157 | { | |
374ca955 | 158 | fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size); |
b75a7d8f A |
159 | *status=U_INTERNAL_PROGRAM_ERROR; |
160 | } | |
161 | if(VERBOSE) | |
162 | { | |
374ca955 | 163 | fprintf(stderr, "- Wrote %u bytes to the udata.\n", (int)sz2); |
b75a7d8f A |
164 | } |
165 | } | |
166 | ||
167 | static UOption options[]={ | |
168 | UOPTION_HELP_H, /* 0 Numbers for those who*/ | |
169 | UOPTION_HELP_QUESTION_MARK, /* 1 can't count. */ | |
170 | UOPTION_COPYRIGHT, /* 2 */ | |
171 | UOPTION_VERSION, /* 3 */ | |
172 | UOPTION_DESTDIR, /* 4 */ | |
173 | UOPTION_VERBOSE, /* 5 */ | |
b75a7d8f A |
174 | }; |
175 | ||
176 | int main(int argc, char* argv[]) | |
177 | { | |
374ca955 A |
178 | ConvData data; |
179 | UErrorCode err = U_ZERO_ERROR, localError; | |
b75a7d8f | 180 | char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; |
b75a7d8f | 181 | const char* destdir, *arg; |
b75a7d8f A |
182 | size_t destdirlen; |
183 | char* dot = NULL, *outBasename; | |
184 | char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; | |
185 | char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH]; | |
186 | UVersionInfo icuVersion; | |
374ca955 A |
187 | UBool printFilename; |
188 | ||
189 | err = U_ZERO_ERROR; | |
b75a7d8f A |
190 | |
191 | U_MAIN_INIT_ARGS(argc, argv); | |
192 | ||
193 | /* Set up the ICU version number */ | |
194 | u_getVersion(icuVersion); | |
195 | uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo)); | |
196 | ||
197 | /* preset then read command line options */ | |
198 | options[4].value=u_getDataDirectory(); | |
199 | argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); | |
200 | ||
201 | /* error handling, printing usage message */ | |
202 | if(argc<0) { | |
203 | fprintf(stderr, | |
204 | "error in command line argument \"%s\"\n", | |
205 | argv[-argc]); | |
206 | } else if(argc<2) { | |
207 | argc=-1; | |
208 | } | |
209 | if(argc<0 || options[0].doesOccur || options[1].doesOccur) { | |
210 | fprintf(stderr, | |
211 | "usage: %s [-options] files...\n" | |
212 | "\tread .ucm codepage mapping files and write .cnv files\n" | |
213 | "options:\n" | |
214 | "\t-h or -? or --help this usage text\n" | |
215 | "\t-V or --version show a version message\n" | |
216 | "\t-c or --copyright include a copyright notice\n" | |
217 | "\t-d or --destdir destination directory, followed by the path\n" | |
218 | "\t-v or --verbose Turn on verbose output\n", | |
219 | argv[0]); | |
b75a7d8f A |
220 | return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; |
221 | } | |
222 | ||
223 | if(options[3].doesOccur) { | |
73c04bcf | 224 | fprintf(stderr,"makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n", |
b75a7d8f | 225 | dataInfo.formatVersion[0], dataInfo.formatVersion[1]); |
73c04bcf | 226 | fprintf(stderr, U_COPYRIGHT_STRING "\n"); |
b75a7d8f A |
227 | exit(0); |
228 | } | |
229 | ||
b75a7d8f A |
230 | /* get the options values */ |
231 | haveCopyright = options[2].doesOccur; | |
232 | destdir = options[4].value; | |
233 | VERBOSE = options[5].doesOccur; | |
234 | ||
235 | if (destdir != NULL && *destdir != 0) { | |
236 | uprv_strcpy(outFileName, destdir); | |
237 | destdirlen = uprv_strlen(destdir); | |
238 | outBasename = outFileName + destdirlen; | |
239 | if (*(outBasename - 1) != U_FILE_SEP_CHAR) { | |
240 | *outBasename++ = U_FILE_SEP_CHAR; | |
241 | ++destdirlen; | |
242 | } | |
243 | } else { | |
244 | destdirlen = 0; | |
245 | outBasename = outFileName; | |
246 | } | |
247 | ||
248 | #if DEBUG | |
249 | { | |
250 | int i; | |
251 | printf("makeconv: processing %d files...\n", argc - 1); | |
252 | for(i=1; i<argc; ++i) { | |
253 | printf("%s ", argv[i]); | |
254 | } | |
255 | printf("\n"); | |
256 | fflush(stdout); | |
257 | } | |
258 | #endif | |
259 | ||
374ca955 A |
260 | err = U_ZERO_ERROR; |
261 | printFilename = (UBool) (argc > 2 || VERBOSE); | |
262 | for (++argv; --argc; ++argv) | |
b75a7d8f | 263 | { |
374ca955 | 264 | arg = getLongPathname(*argv); |
b75a7d8f | 265 | |
374ca955 A |
266 | /*produces the right destination path for display*/ |
267 | if (destdirlen != 0) | |
b75a7d8f | 268 | { |
374ca955 | 269 | const char *basename; |
b75a7d8f | 270 | |
374ca955 A |
271 | /* find the last file sepator */ |
272 | basename = findBasename(arg); | |
273 | uprv_strcpy(outBasename, basename); | |
b75a7d8f | 274 | } |
374ca955 | 275 | else |
b75a7d8f | 276 | { |
374ca955 | 277 | uprv_strcpy(outFileName, arg); |
b75a7d8f A |
278 | } |
279 | ||
374ca955 A |
280 | /*removes the extension if any is found*/ |
281 | dot = uprv_strrchr(outBasename, '.'); | |
282 | if (dot) | |
b75a7d8f | 283 | { |
374ca955 | 284 | *dot = '\0'; |
b75a7d8f A |
285 | } |
286 | ||
374ca955 A |
287 | /* the basename without extension is the converter name */ |
288 | uprv_strcpy(cnvName, outBasename); | |
b75a7d8f | 289 | |
374ca955 A |
290 | /*Adds the target extension*/ |
291 | uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION); | |
b75a7d8f A |
292 | |
293 | #if DEBUG | |
294 | printf("makeconv: processing %s ...\n", arg); | |
295 | fflush(stdout); | |
296 | #endif | |
374ca955 A |
297 | localError = U_ZERO_ERROR; |
298 | initConvData(&data); | |
299 | createConverter(&data, arg, &localError); | |
b75a7d8f | 300 | |
374ca955 | 301 | if (U_FAILURE(localError)) |
b75a7d8f | 302 | { |
374ca955 A |
303 | /* if an error is found, print out an error msg and keep going */ |
304 | fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg, | |
305 | u_errorName(localError)); | |
306 | if(U_SUCCESS(err)) { | |
307 | err = localError; | |
308 | } | |
b75a7d8f | 309 | } |
374ca955 | 310 | else |
b75a7d8f | 311 | { |
374ca955 A |
312 | /* Make the static data name equal to the file name */ |
313 | if( /*VERBOSE && */ uprv_stricmp(cnvName,data.staticData.name)) | |
314 | { | |
315 | fprintf(stderr, "Warning: %s%s claims to be '%s'\n", | |
b75a7d8f A |
316 | cnvName, |
317 | CONVERTER_FILE_EXTENSION, | |
374ca955 A |
318 | data.staticData.name); |
319 | } | |
320 | ||
321 | uprv_strcpy((char*)data.staticData.name, cnvName); | |
322 | ||
323 | if(!uprv_isInvariantString((char*)data.staticData.name, -1)) { | |
324 | fprintf(stderr, | |
325 | "Error: A converter name must contain only invariant characters.\n" | |
326 | "%s is not a valid converter name.\n", | |
327 | data.staticData.name); | |
328 | if(U_SUCCESS(err)) { | |
329 | err = U_INVALID_TABLE_FORMAT; | |
330 | } | |
331 | } | |
332 | ||
73c04bcf | 333 | uprv_strcpy(cnvNameWithPkg, cnvName); |
374ca955 A |
334 | |
335 | localError = U_ZERO_ERROR; | |
336 | writeConverterData(&data, cnvNameWithPkg, destdir, &localError); | |
374ca955 A |
337 | |
338 | if(U_FAILURE(localError)) | |
339 | { | |
340 | /* if an error is found, print out an error msg and keep going*/ | |
341 | fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg, | |
342 | u_errorName(localError)); | |
343 | if(U_SUCCESS(err)) { | |
344 | err = localError; | |
345 | } | |
346 | } | |
347 | else if (printFilename) | |
348 | { | |
349 | puts(outFileName); | |
350 | } | |
b75a7d8f | 351 | } |
374ca955 A |
352 | fflush(stdout); |
353 | fflush(stderr); | |
354 | ||
355 | cleanupConvData(&data); | |
b75a7d8f A |
356 | } |
357 | ||
374ca955 | 358 | return err; |
b75a7d8f A |
359 | } |
360 | ||
361 | static void | |
362 | getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) { | |
363 | if( (name[0]=='i' || name[0]=='I') && | |
364 | (name[1]=='b' || name[1]=='B') && | |
365 | (name[2]=='m' || name[2]=='M') | |
366 | ) { | |
367 | name+=3; | |
368 | if(*name=='-') { | |
369 | ++name; | |
370 | } | |
371 | *pPlatform=UCNV_IBM; | |
372 | *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10); | |
373 | } else { | |
374 | *pPlatform=UCNV_UNKNOWN; | |
375 | *pCCSID=0; | |
376 | } | |
377 | } | |
378 | ||
374ca955 A |
379 | static void |
380 | readHeader(ConvData *data, | |
381 | FileStream* convFile, | |
382 | const char* converterName, | |
383 | UErrorCode *pErrorCode) { | |
b75a7d8f | 384 | char line[200]; |
374ca955 A |
385 | char *s, *key, *value; |
386 | const UConverterStaticData *prototype; | |
b75a7d8f | 387 | UConverterStaticData *staticData; |
b75a7d8f A |
388 | |
389 | if(U_FAILURE(*pErrorCode)) { | |
390 | return; | |
391 | } | |
392 | ||
374ca955 | 393 | staticData=&data->staticData; |
b75a7d8f A |
394 | staticData->platform=UCNV_IBM; |
395 | staticData->subCharLen=0; | |
396 | ||
397 | while(T_FileStream_readLine(convFile, line, sizeof(line))) { | |
374ca955 A |
398 | /* basic parsing and handling of state-related items */ |
399 | if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) { | |
b75a7d8f A |
400 | continue; |
401 | } | |
402 | ||
403 | /* stop at the beginning of the mapping section */ | |
374ca955 | 404 | if(uprv_strcmp(line, "CHARMAP")==0) { |
b75a7d8f A |
405 | break; |
406 | } | |
407 | ||
b75a7d8f A |
408 | /* collect the information from the header field, ignore unknown keys */ |
409 | if(uprv_strcmp(key, "code_set_name")==0) { | |
410 | if(*value!=0) { | |
374ca955 | 411 | uprv_strcpy((char *)staticData->name, value); |
b75a7d8f A |
412 | getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage); |
413 | } | |
b75a7d8f | 414 | } else if(uprv_strcmp(key, "subchar")==0) { |
374ca955 A |
415 | uint8_t bytes[UCNV_EXT_MAX_BYTES]; |
416 | int8_t length; | |
417 | ||
418 | s=value; | |
419 | length=ucm_parseBytes(bytes, line, (const char **)&s); | |
420 | if(1<=length && length<=4 && *s==0) { | |
421 | staticData->subCharLen=length; | |
422 | uprv_memcpy(staticData->subChar, bytes, length); | |
b75a7d8f A |
423 | } else { |
424 | fprintf(stderr, "error: illegal <subchar> %s\n", value); | |
425 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
426 | return; | |
427 | } | |
428 | } else if(uprv_strcmp(key, "subchar1")==0) { | |
374ca955 | 429 | uint8_t bytes[UCNV_EXT_MAX_BYTES]; |
b75a7d8f | 430 | |
374ca955 A |
431 | s=value; |
432 | if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) { | |
433 | staticData->subChar1=bytes[0]; | |
b75a7d8f A |
434 | } else { |
435 | fprintf(stderr, "error: illegal <subchar1> %s\n", value); | |
436 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
437 | return; | |
438 | } | |
374ca955 A |
439 | } |
440 | } | |
441 | ||
442 | /* copy values from the UCMFile to the static data */ | |
443 | staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength; | |
444 | staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength; | |
445 | staticData->conversionType=data->ucm->states.conversionType; | |
446 | ||
447 | if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) { | |
448 | fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n"); | |
449 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
450 | return; | |
451 | } | |
452 | ||
453 | /* | |
454 | * Now that we know the type, copy any 'default' values from the table. | |
455 | * We need not check the type any further because the parser only | |
456 | * recognizes what we have prototypes for. | |
457 | * | |
458 | * For delta (extension-only) tables, copy values from the base file | |
459 | * instead, see createConverter(). | |
460 | */ | |
461 | if(data->ucm->baseName[0]==0) { | |
462 | prototype=ucnv_converterStaticData[staticData->conversionType]; | |
463 | if(prototype!=NULL) { | |
464 | if(staticData->name[0]==0) { | |
465 | uprv_strcpy((char *)staticData->name, prototype->name); | |
466 | } | |
467 | ||
468 | if(staticData->codepage==0) { | |
469 | staticData->codepage=prototype->codepage; | |
470 | } | |
471 | ||
472 | if(staticData->platform==0) { | |
473 | staticData->platform=prototype->platform; | |
474 | } | |
475 | ||
476 | if(staticData->minBytesPerChar==0) { | |
477 | staticData->minBytesPerChar=prototype->minBytesPerChar; | |
b75a7d8f A |
478 | } |
479 | ||
480 | if(staticData->maxBytesPerChar==0) { | |
374ca955 | 481 | staticData->maxBytesPerChar=prototype->maxBytesPerChar; |
b75a7d8f | 482 | } |
374ca955 A |
483 | |
484 | if(staticData->subCharLen==0) { | |
485 | staticData->subCharLen=prototype->subCharLen; | |
486 | if(prototype->subCharLen>0) { | |
487 | uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen); | |
b75a7d8f A |
488 | } |
489 | } | |
b75a7d8f A |
490 | } |
491 | } | |
492 | ||
374ca955 A |
493 | if(data->ucm->states.outputType<0) { |
494 | data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1; | |
495 | } | |
496 | ||
497 | if( staticData->subChar1!=0 && | |
498 | (staticData->minBytesPerChar>1 || | |
499 | (staticData->conversionType!=UCNV_MBCS && | |
500 | staticData->conversionType!=UCNV_EBCDIC_STATEFUL)) | |
b75a7d8f A |
501 | ) { |
502 | fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n"); | |
503 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
504 | } | |
505 | } | |
506 | ||
374ca955 A |
507 | /* return TRUE if a base table was read, FALSE for an extension table */ |
508 | static UBool | |
509 | readFile(ConvData *data, const char* converterName, | |
510 | UErrorCode *pErrorCode) { | |
511 | char line[200]; | |
512 | char *end; | |
513 | FileStream *convFile; | |
b75a7d8f | 514 | |
374ca955 A |
515 | UCMStates *baseStates; |
516 | UBool dataIsBase; | |
b75a7d8f | 517 | |
374ca955 A |
518 | if(U_FAILURE(*pErrorCode)) { |
519 | return FALSE; | |
520 | } | |
b75a7d8f | 521 | |
374ca955 | 522 | data->ucm=ucm_open(); |
b75a7d8f | 523 | |
374ca955 A |
524 | convFile=T_FileStream_open(converterName, "r"); |
525 | if(convFile==NULL) { | |
526 | *pErrorCode=U_FILE_ACCESS_ERROR; | |
527 | return FALSE; | |
528 | } | |
b75a7d8f | 529 | |
374ca955 A |
530 | readHeader(data, convFile, converterName, pErrorCode); |
531 | if(U_FAILURE(*pErrorCode)) { | |
532 | return FALSE; | |
b75a7d8f A |
533 | } |
534 | ||
374ca955 A |
535 | if(data->ucm->baseName[0]==0) { |
536 | dataIsBase=TRUE; | |
537 | baseStates=&data->ucm->states; | |
538 | ucm_processStates(baseStates); | |
539 | } else { | |
540 | dataIsBase=FALSE; | |
541 | baseStates=NULL; | |
b75a7d8f | 542 | } |
b75a7d8f | 543 | |
374ca955 A |
544 | /* read the base table */ |
545 | ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode); | |
546 | if(U_FAILURE(*pErrorCode)) { | |
547 | return FALSE; | |
b75a7d8f A |
548 | } |
549 | ||
374ca955 A |
550 | /* read an extension table if there is one */ |
551 | while(T_FileStream_readLine(convFile, line, sizeof(line))) { | |
552 | end=uprv_strchr(line, 0); | |
553 | while(line<end && | |
554 | (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) { | |
555 | --end; | |
556 | } | |
557 | *end=0; | |
558 | ||
559 | if(line[0]=='#' || u_skipWhitespace(line)==end) { | |
560 | continue; /* ignore empty and comment lines */ | |
561 | } | |
562 | ||
563 | if(0==uprv_strcmp(line, "CHARMAP")) { | |
564 | /* read the extension table */ | |
565 | ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode); | |
566 | } else { | |
567 | fprintf(stderr, "unexpected text after the base mapping table\n"); | |
568 | } | |
569 | break; | |
b75a7d8f | 570 | } |
374ca955 A |
571 | |
572 | T_FileStream_close(convFile); | |
573 | ||
574 | if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) { | |
b75a7d8f | 575 | fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n"); |
374ca955 | 576 | *pErrorCode=U_INVALID_TABLE_FORMAT; |
b75a7d8f | 577 | } |
374ca955 A |
578 | |
579 | return dataIsBase; | |
b75a7d8f A |
580 | } |
581 | ||
374ca955 A |
582 | static void |
583 | createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) { | |
584 | ConvData baseData; | |
585 | UBool dataIsBase; | |
b75a7d8f | 586 | |
374ca955 A |
587 | UConverterStaticData *staticData; |
588 | UCMStates *states, *baseStates; | |
b75a7d8f | 589 | |
374ca955 A |
590 | if(U_FAILURE(*pErrorCode)) { |
591 | return; | |
b75a7d8f A |
592 | } |
593 | ||
374ca955 | 594 | initConvData(data); |
b75a7d8f | 595 | |
374ca955 A |
596 | dataIsBase=readFile(data, converterName, pErrorCode); |
597 | if(U_FAILURE(*pErrorCode)) { | |
598 | return; | |
b75a7d8f A |
599 | } |
600 | ||
374ca955 A |
601 | staticData=&data->staticData; |
602 | states=&data->ucm->states; | |
b75a7d8f | 603 | |
374ca955 A |
604 | if(dataIsBase) { |
605 | data->cnvData=MBCSOpen(data->ucm); | |
606 | if(data->cnvData==NULL) { | |
607 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
b75a7d8f | 608 | |
374ca955 A |
609 | } else if(!data->cnvData->isValid(data->cnvData, |
610 | staticData->subChar, staticData->subCharLen) | |
611 | ) { | |
612 | fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n"); | |
613 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
b75a7d8f | 614 | |
374ca955 A |
615 | } else if(staticData->subChar1!=0 && |
616 | !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1) | |
617 | ) { | |
618 | fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n"); | |
619 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
b75a7d8f | 620 | |
374ca955 A |
621 | } else if(data->ucm->ext->mappingsLength>0) { |
622 | /* prepare the extension table, if there is one */ | |
623 | data->extData=CnvExtOpen(data->ucm); | |
624 | if(data->extData==NULL) { | |
625 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
b75a7d8f | 626 | |
374ca955 A |
627 | } else if( |
628 | !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE) || | |
629 | !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData) | |
630 | ) { | |
631 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
b75a7d8f A |
632 | } |
633 | } | |
374ca955 A |
634 | |
635 | /* add the base table after ucm_checkBaseExt()! */ | |
636 | if( U_SUCCESS(*pErrorCode) && | |
637 | !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData) | |
638 | ) { | |
639 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
b75a7d8f | 640 | } |
374ca955 A |
641 | } else { |
642 | char baseFilename[500]; | |
643 | char *basename; | |
644 | ||
645 | initConvData(&baseData); | |
646 | ||
647 | /* assemble a path/filename for data->ucm->baseName */ | |
648 | uprv_strcpy(baseFilename, converterName); | |
649 | basename=(char *)findBasename(baseFilename); | |
650 | uprv_strcpy(basename, data->ucm->baseName); | |
651 | uprv_strcat(basename, ".ucm"); | |
652 | ||
653 | /* read the base table */ | |
654 | dataIsBase=readFile(&baseData, baseFilename, pErrorCode); | |
655 | if(U_FAILURE(*pErrorCode)) { | |
656 | return; | |
657 | } else if(!dataIsBase) { | |
658 | fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename); | |
659 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
660 | } else { | |
661 | /* prepare the extension table */ | |
662 | data->extData=CnvExtOpen(data->ucm); | |
663 | if(data->extData==NULL) { | |
664 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
665 | ||
666 | } else { | |
667 | /* fill in gaps in extension file header fields */ | |
668 | UCMapping *m, *mLimit; | |
669 | uint8_t fallbackFlags; | |
670 | ||
671 | baseStates=&baseData.ucm->states; | |
672 | if(states->conversionType==UCNV_DBCS) { | |
673 | staticData->minBytesPerChar=(int8_t)(states->minCharLength=2); | |
674 | } else if(states->minCharLength==0) { | |
675 | staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength); | |
676 | } | |
677 | if(states->maxCharLength<states->minCharLength) { | |
678 | staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength); | |
679 | } | |
680 | ||
681 | if(staticData->subCharLen==0) { | |
682 | uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4); | |
683 | staticData->subCharLen=baseData.staticData.subCharLen; | |
684 | } | |
685 | /* | |
686 | * do not copy subChar1 - | |
687 | * only use what is explicitly specified | |
688 | * because it cannot be unset in the extension file header | |
689 | */ | |
690 | ||
691 | /* get the fallback flags */ | |
692 | fallbackFlags=0; | |
693 | for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength; | |
694 | m<mLimit && fallbackFlags!=3; | |
695 | ++m | |
b75a7d8f | 696 | ) { |
374ca955 A |
697 | if(m->f==1) { |
698 | fallbackFlags|=1; | |
699 | } else if(m->f==3) { | |
700 | fallbackFlags|=2; | |
701 | } | |
b75a7d8f | 702 | } |
374ca955 A |
703 | for(m=data->ucm->base->mappings, mLimit=m+data->ucm->base->mappingsLength; |
704 | m<mLimit && fallbackFlags!=3; | |
705 | ++m | |
b75a7d8f | 706 | ) { |
374ca955 A |
707 | if(m->f==1) { |
708 | fallbackFlags|=1; | |
709 | } else if(m->f==3) { | |
710 | fallbackFlags|=2; | |
711 | } | |
b75a7d8f | 712 | } |
b75a7d8f | 713 | |
374ca955 A |
714 | if(fallbackFlags&1) { |
715 | staticData->hasFromUnicodeFallback=TRUE; | |
716 | } | |
717 | if(fallbackFlags&2) { | |
718 | staticData->hasToUnicodeFallback=TRUE; | |
719 | } | |
b75a7d8f | 720 | |
374ca955 A |
721 | if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) { |
722 | fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n"); | |
723 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
b75a7d8f | 724 | |
374ca955 A |
725 | } else if(1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) { |
726 | fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n"); | |
727 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
b75a7d8f | 728 | |
374ca955 A |
729 | } else if( |
730 | !ucm_checkValidity(data->ucm->ext, baseStates) || | |
731 | !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE) || | |
732 | !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData) | |
733 | ) { | |
734 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
735 | } | |
736 | } | |
737 | } | |
738 | ||
739 | cleanupConvData(&baseData); | |
740 | } | |
b75a7d8f A |
741 | } |
742 | ||
743 | /* | |
744 | * Hey, Emacs, please set the following: | |
745 | * | |
746 | * Local Variables: | |
747 | * indent-tabs-mode: nil | |
748 | * End: | |
749 | * | |
750 | */ |