]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/makeconv/makeconv.c
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / tools / makeconv / makeconv.c
CommitLineData
b75a7d8f
A
1/*
2 ********************************************************************************
3 *
374ca955 4 * Copyright (C) 1998-2004, International Business Machines
b75a7d8f
A
5 * Corporation and others. All Rights Reserved.
6 *
7 ********************************************************************************
8 *
9 *
10 * makeconv.c:
11 * tool creating a binary (compressed) representation of the conversion mapping
12 * table (IBM NLTC ucmap format).
13 *
14 * 05/04/2000 helena Added fallback mapping into the picture...
15 * 06/29/2000 helena Major rewrite of the callback APIs.
16 */
17
18#include <stdio.h>
19#include "unicode/putil.h"
20#include "ucnv_io.h"
21#include "unicode/ucnv_err.h"
22#include "ucnv_bld.h"
23#include "ucnv_imp.h"
24#include "ucnv_cnv.h"
25#include "cstring.h"
26#include "cmemory.h"
374ca955 27#include "uinvchar.h"
b75a7d8f
A
28#include "filestrm.h"
29#include "toolutil.h"
30#include "uoptions.h"
31#include "unicode/udata.h"
32#include "unewdata.h"
374ca955
A
33#include "uparse.h"
34#include "ucm.h"
b75a7d8f
A
35#include "makeconv.h"
36#include "genmbcs.h"
37
38#define DEBUG 0
39
374ca955
A
40typedef struct ConvData {
41 UCMFile *ucm;
42 NewConverter *cnvData, *extData;
43 UConverterSharedData sharedData;
44 UConverterStaticData staticData;
45} ConvData;
46
47static void
48initConvData(ConvData *data) {
49 uprv_memset(data, 0, sizeof(ConvData));
50 data->sharedData.structSize=sizeof(UConverterSharedData);
51 data->staticData.structSize=sizeof(UConverterStaticData);
52 data->sharedData.staticData=&data->staticData;
53}
54
55static void
56cleanupConvData(ConvData *data) {
57 if(data!=NULL) {
58 if(data->cnvData!=NULL) {
59 data->cnvData->close(data->cnvData);
60 data->cnvData=NULL;
61 }
62 if(data->extData!=NULL) {
63 data->extData->close(data->extData);
64 data->extData=NULL;
65 }
66 ucm_close(data->ucm);
67 data->ucm=NULL;
68 }
69}
70
b75a7d8f
A
71/*
72 * from ucnvstat.c - static prototypes of data-based converters
73 */
74extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
75
76/*
77 * Global - verbosity
78 */
79UBool VERBOSE = FALSE;
80UBool TOUCHFILE = FALSE;
81
374ca955
A
82static void
83createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
b75a7d8f
A
84
85/*
86 * Set up the UNewData and write the converter..
87 */
374ca955
A
88static void
89writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
b75a7d8f
A
90
91UBool haveCopyright=TRUE;
92
93static UDataInfo dataInfo={
94 sizeof(UDataInfo),
95 0,
96
97 U_IS_BIG_ENDIAN,
98 U_CHARSET_FAMILY,
99 sizeof(UChar),
100 0,
101
102 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */
103 {6, 2, 0, 0}, /* formatVersion */
104 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */
105};
106
374ca955
A
107static void
108writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
b75a7d8f
A
109{
110 UNewDataMemory *mem = NULL;
111 uint32_t sz2;
112 uint32_t size = 0;
374ca955 113 int32_t tableType;
b75a7d8f
A
114
115 if(U_FAILURE(*status))
116 {
117 return;
118 }
119
374ca955
A
120 tableType=TABLE_NONE;
121 if(data->cnvData!=NULL) {
122 tableType|=TABLE_BASE;
123 }
124 if(data->extData!=NULL) {
125 tableType|=TABLE_EXT;
126 }
127
b75a7d8f
A
128 mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
129
130 if(U_FAILURE(*status))
131 {
132 fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
133 cnvName,
134 "cnv",
135 u_errorName(*status));
136 return;
137 }
138
139 if(VERBOSE)
140 {
141 fprintf(stderr, "- Opened udata %s.%s\n", cnvName, "cnv");
142 }
143
374ca955 144
b75a7d8f 145 /* all read only, clean, platform independent data. Mmmm. :) */
374ca955 146 udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
b75a7d8f
A
147 size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */
148 /* Now, write the table */
374ca955
A
149 if(tableType&TABLE_BASE) {
150 size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
151 }
152 if(tableType&TABLE_EXT) {
153 size += data->extData->write(data->extData, &data->staticData, mem, tableType);
154 }
b75a7d8f
A
155
156 sz2 = udata_finish(mem, status);
157 if(size != sz2)
158 {
374ca955 159 fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
b75a7d8f
A
160 *status=U_INTERNAL_PROGRAM_ERROR;
161 }
162 if(VERBOSE)
163 {
374ca955 164 fprintf(stderr, "- Wrote %u bytes to the udata.\n", (int)sz2);
b75a7d8f
A
165 }
166}
167
168static UOption options[]={
169 UOPTION_HELP_H, /* 0 Numbers for those who*/
170 UOPTION_HELP_QUESTION_MARK, /* 1 can't count. */
171 UOPTION_COPYRIGHT, /* 2 */
172 UOPTION_VERSION, /* 3 */
173 UOPTION_DESTDIR, /* 4 */
174 UOPTION_VERBOSE, /* 5 */
175 UOPTION_PACKAGE_NAME, /* 6 */
374ca955 176 UOPTION_DEF( "touchfile", 't', UOPT_NO_ARG) /* 7 */
b75a7d8f
A
177};
178
179int main(int argc, char* argv[])
180{
374ca955
A
181 ConvData data;
182 UErrorCode err = U_ZERO_ERROR, localError;
b75a7d8f
A
183 char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
184 char touchFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
185 const char* destdir, *arg;
186 const char *pkgName = NULL;
187 size_t destdirlen;
188 char* dot = NULL, *outBasename;
189 char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
190 char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH];
191 UVersionInfo icuVersion;
374ca955
A
192 UBool printFilename;
193
194 err = U_ZERO_ERROR;
b75a7d8f
A
195
196 U_MAIN_INIT_ARGS(argc, argv);
197
198 /* Set up the ICU version number */
199 u_getVersion(icuVersion);
200 uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
201
202 /* preset then read command line options */
203 options[4].value=u_getDataDirectory();
204 argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
205
206 /* error handling, printing usage message */
207 if(argc<0) {
208 fprintf(stderr,
209 "error in command line argument \"%s\"\n",
210 argv[-argc]);
211 } else if(argc<2) {
212 argc=-1;
213 }
214 if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
215 fprintf(stderr,
216 "usage: %s [-options] files...\n"
217 "\tread .ucm codepage mapping files and write .cnv files\n"
218 "options:\n"
219 "\t-h or -? or --help this usage text\n"
220 "\t-V or --version show a version message\n"
221 "\t-c or --copyright include a copyright notice\n"
222 "\t-d or --destdir destination directory, followed by the path\n"
223 "\t-v or --verbose Turn on verbose output\n",
224 argv[0]);
225 fprintf(stderr,
226 "\t-p or --pkgname sets the 'package' name for output files.\n"
227 "\t If name is ICUDATA, then the default icu package\n"
228 "\t name will be used.\n"
229 "\t-t or --touchfile Generate additional small file without packagename, for nmake\n");
230 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
231 }
232
233 if(options[3].doesOccur) {
234 fprintf(stderr,"makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
235 dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
236 fprintf(stderr, "Copyright (C) 1998-2000, International Business Machines\n");
237 fprintf(stderr,"Corporation and others. All Rights Reserved.\n");
238 exit(0);
239 }
240
241 TOUCHFILE = options[7].doesOccur;
242
243 if(!options[6].doesOccur)
244 {
374ca955 245 pkgName=NULL;
b75a7d8f
A
246 }
247 else
248 {
249 pkgName =options[6].value;
250 if(!strcmp(pkgName, "ICUDATA"))
251 {
252 pkgName = U_ICUDATA_NAME;
253 }
254 if(pkgName[0] == 0)
255 {
256 pkgName = NULL;
257
258 if(TOUCHFILE)
259 {
260 fprintf(stderr, "%s: Don't use touchfile option with an empty packagename.\n",
261 argv[0]);
262 exit(1);
263 }
264 }
265 }
266
267 /* get the options values */
268 haveCopyright = options[2].doesOccur;
269 destdir = options[4].value;
270 VERBOSE = options[5].doesOccur;
271
272 if (destdir != NULL && *destdir != 0) {
273 uprv_strcpy(outFileName, destdir);
274 destdirlen = uprv_strlen(destdir);
275 outBasename = outFileName + destdirlen;
276 if (*(outBasename - 1) != U_FILE_SEP_CHAR) {
277 *outBasename++ = U_FILE_SEP_CHAR;
278 ++destdirlen;
279 }
280 } else {
281 destdirlen = 0;
282 outBasename = outFileName;
283 }
284
285#if DEBUG
286 {
287 int i;
288 printf("makeconv: processing %d files...\n", argc - 1);
289 for(i=1; i<argc; ++i) {
290 printf("%s ", argv[i]);
291 }
292 printf("\n");
293 fflush(stdout);
294 }
295#endif
296
374ca955
A
297 err = U_ZERO_ERROR;
298 printFilename = (UBool) (argc > 2 || VERBOSE);
299 for (++argv; --argc; ++argv)
b75a7d8f 300 {
374ca955 301 arg = getLongPathname(*argv);
b75a7d8f 302
374ca955
A
303 /*produces the right destination path for display*/
304 if (destdirlen != 0)
b75a7d8f 305 {
374ca955 306 const char *basename;
b75a7d8f 307
374ca955
A
308 /* find the last file sepator */
309 basename = findBasename(arg);
310 uprv_strcpy(outBasename, basename);
b75a7d8f 311 }
374ca955 312 else
b75a7d8f 313 {
374ca955 314 uprv_strcpy(outFileName, arg);
b75a7d8f
A
315 }
316
374ca955
A
317 /*removes the extension if any is found*/
318 dot = uprv_strrchr(outBasename, '.');
319 if (dot)
b75a7d8f 320 {
374ca955 321 *dot = '\0';
b75a7d8f
A
322 }
323
374ca955
A
324 /* the basename without extension is the converter name */
325 uprv_strcpy(cnvName, outBasename);
b75a7d8f 326
374ca955
A
327 if(TOUCHFILE)
328 {
329 uprv_strcpy(touchFileName, outBasename);
330 uprv_strcat(touchFileName, ".cnv");
331 }
b75a7d8f 332
374ca955
A
333 if(pkgName != NULL)
334 {
335 /* changes both basename and filename */
336 uprv_strcpy(outBasename, pkgName);
337 uprv_strcat(outBasename, "_");
338 uprv_strcat(outBasename, cnvName);
339 }
b75a7d8f
A
340
341
374ca955
A
342 /*Adds the target extension*/
343 uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION);
b75a7d8f
A
344
345#if DEBUG
346 printf("makeconv: processing %s ...\n", arg);
347 fflush(stdout);
348#endif
374ca955
A
349 localError = U_ZERO_ERROR;
350 initConvData(&data);
351 createConverter(&data, arg, &localError);
b75a7d8f 352
374ca955 353 if (U_FAILURE(localError))
b75a7d8f 354 {
374ca955
A
355 /* if an error is found, print out an error msg and keep going */
356 fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
357 u_errorName(localError));
358 if(U_SUCCESS(err)) {
359 err = localError;
360 }
b75a7d8f 361 }
374ca955 362 else
b75a7d8f 363 {
374ca955
A
364 /* Make the static data name equal to the file name */
365 if( /*VERBOSE && */ uprv_stricmp(cnvName,data.staticData.name))
366 {
367 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
b75a7d8f
A
368 cnvName,
369 CONVERTER_FILE_EXTENSION,
374ca955
A
370 data.staticData.name);
371 }
372
373 uprv_strcpy((char*)data.staticData.name, cnvName);
374
375 if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
376 fprintf(stderr,
377 "Error: A converter name must contain only invariant characters.\n"
378 "%s is not a valid converter name.\n",
379 data.staticData.name);
380 if(U_SUCCESS(err)) {
381 err = U_INVALID_TABLE_FORMAT;
382 }
383 }
384
385 if(pkgName == NULL)
386 {
387 uprv_strcpy(cnvNameWithPkg, cnvName);
388 }
389 else
390 {
391 uprv_strcpy(cnvNameWithPkg, pkgName);
392 uprv_strcat(cnvNameWithPkg, "_");
393 uprv_strcat(cnvNameWithPkg, cnvName);
394 }
395
396 localError = U_ZERO_ERROR;
397 writeConverterData(&data, cnvNameWithPkg, destdir, &localError);
398 if(TOUCHFILE)
399 {
400 FileStream *q;
401 char msg[1024];
402
403 sprintf(msg, "This empty file tells nmake that %s in package %s has been updated.\n",
404 cnvName, pkgName);
405
406 q = T_FileStream_open(touchFileName, "w");
407 if(q == NULL)
408 {
409 fprintf(stderr, "Error writing touchfile \"%s\"\n", touchFileName);
410 localError = U_FILE_ACCESS_ERROR;
411 }
412
413 else
414 {
415 T_FileStream_write(q, msg, (int32_t)uprv_strlen(msg));
416 T_FileStream_close(q);
417 }
418 }
419
420 if(U_FAILURE(localError))
421 {
422 /* if an error is found, print out an error msg and keep going*/
423 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
424 u_errorName(localError));
425 if(U_SUCCESS(err)) {
426 err = localError;
427 }
428 }
429 else if (printFilename)
430 {
431 puts(outFileName);
432 }
b75a7d8f 433 }
374ca955
A
434 fflush(stdout);
435 fflush(stderr);
436
437 cleanupConvData(&data);
b75a7d8f
A
438 }
439
374ca955 440 return err;
b75a7d8f
A
441}
442
443static void
444getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
445 if( (name[0]=='i' || name[0]=='I') &&
446 (name[1]=='b' || name[1]=='B') &&
447 (name[2]=='m' || name[2]=='M')
448 ) {
449 name+=3;
450 if(*name=='-') {
451 ++name;
452 }
453 *pPlatform=UCNV_IBM;
454 *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
455 } else {
456 *pPlatform=UCNV_UNKNOWN;
457 *pCCSID=0;
458 }
459}
460
374ca955
A
461static void
462readHeader(ConvData *data,
463 FileStream* convFile,
464 const char* converterName,
465 UErrorCode *pErrorCode) {
b75a7d8f 466 char line[200];
374ca955
A
467 char *s, *key, *value;
468 const UConverterStaticData *prototype;
b75a7d8f 469 UConverterStaticData *staticData;
b75a7d8f
A
470
471 if(U_FAILURE(*pErrorCode)) {
472 return;
473 }
474
374ca955 475 staticData=&data->staticData;
b75a7d8f
A
476 staticData->platform=UCNV_IBM;
477 staticData->subCharLen=0;
478
479 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
374ca955
A
480 /* basic parsing and handling of state-related items */
481 if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
b75a7d8f
A
482 continue;
483 }
484
485 /* stop at the beginning of the mapping section */
374ca955 486 if(uprv_strcmp(line, "CHARMAP")==0) {
b75a7d8f
A
487 break;
488 }
489
b75a7d8f
A
490 /* collect the information from the header field, ignore unknown keys */
491 if(uprv_strcmp(key, "code_set_name")==0) {
492 if(*value!=0) {
374ca955 493 uprv_strcpy((char *)staticData->name, value);
b75a7d8f
A
494 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
495 }
b75a7d8f 496 } else if(uprv_strcmp(key, "subchar")==0) {
374ca955
A
497 uint8_t bytes[UCNV_EXT_MAX_BYTES];
498 int8_t length;
499
500 s=value;
501 length=ucm_parseBytes(bytes, line, (const char **)&s);
502 if(1<=length && length<=4 && *s==0) {
503 staticData->subCharLen=length;
504 uprv_memcpy(staticData->subChar, bytes, length);
b75a7d8f
A
505 } else {
506 fprintf(stderr, "error: illegal <subchar> %s\n", value);
507 *pErrorCode=U_INVALID_TABLE_FORMAT;
508 return;
509 }
510 } else if(uprv_strcmp(key, "subchar1")==0) {
374ca955 511 uint8_t bytes[UCNV_EXT_MAX_BYTES];
b75a7d8f 512
374ca955
A
513 s=value;
514 if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
515 staticData->subChar1=bytes[0];
b75a7d8f
A
516 } else {
517 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
518 *pErrorCode=U_INVALID_TABLE_FORMAT;
519 return;
520 }
374ca955
A
521 }
522 }
523
524 /* copy values from the UCMFile to the static data */
525 staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
526 staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
527 staticData->conversionType=data->ucm->states.conversionType;
528
529 if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
530 fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
531 *pErrorCode=U_INVALID_TABLE_FORMAT;
532 return;
533 }
534
535 /*
536 * Now that we know the type, copy any 'default' values from the table.
537 * We need not check the type any further because the parser only
538 * recognizes what we have prototypes for.
539 *
540 * For delta (extension-only) tables, copy values from the base file
541 * instead, see createConverter().
542 */
543 if(data->ucm->baseName[0]==0) {
544 prototype=ucnv_converterStaticData[staticData->conversionType];
545 if(prototype!=NULL) {
546 if(staticData->name[0]==0) {
547 uprv_strcpy((char *)staticData->name, prototype->name);
548 }
549
550 if(staticData->codepage==0) {
551 staticData->codepage=prototype->codepage;
552 }
553
554 if(staticData->platform==0) {
555 staticData->platform=prototype->platform;
556 }
557
558 if(staticData->minBytesPerChar==0) {
559 staticData->minBytesPerChar=prototype->minBytesPerChar;
b75a7d8f
A
560 }
561
562 if(staticData->maxBytesPerChar==0) {
374ca955 563 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
b75a7d8f 564 }
374ca955
A
565
566 if(staticData->subCharLen==0) {
567 staticData->subCharLen=prototype->subCharLen;
568 if(prototype->subCharLen>0) {
569 uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
b75a7d8f
A
570 }
571 }
b75a7d8f
A
572 }
573 }
574
374ca955
A
575 if(data->ucm->states.outputType<0) {
576 data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
577 }
578
579 if( staticData->subChar1!=0 &&
580 (staticData->minBytesPerChar>1 ||
581 (staticData->conversionType!=UCNV_MBCS &&
582 staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
b75a7d8f
A
583 ) {
584 fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
585 *pErrorCode=U_INVALID_TABLE_FORMAT;
586 }
587}
588
374ca955
A
589/* return TRUE if a base table was read, FALSE for an extension table */
590static UBool
591readFile(ConvData *data, const char* converterName,
592 UErrorCode *pErrorCode) {
593 char line[200];
594 char *end;
595 FileStream *convFile;
b75a7d8f 596
374ca955
A
597 UCMStates *baseStates;
598 UBool dataIsBase;
b75a7d8f 599
374ca955
A
600 if(U_FAILURE(*pErrorCode)) {
601 return FALSE;
602 }
b75a7d8f 603
374ca955 604 data->ucm=ucm_open();
b75a7d8f 605
374ca955
A
606 convFile=T_FileStream_open(converterName, "r");
607 if(convFile==NULL) {
608 *pErrorCode=U_FILE_ACCESS_ERROR;
609 return FALSE;
610 }
b75a7d8f 611
374ca955
A
612 readHeader(data, convFile, converterName, pErrorCode);
613 if(U_FAILURE(*pErrorCode)) {
614 return FALSE;
b75a7d8f
A
615 }
616
374ca955
A
617 if(data->ucm->baseName[0]==0) {
618 dataIsBase=TRUE;
619 baseStates=&data->ucm->states;
620 ucm_processStates(baseStates);
621 } else {
622 dataIsBase=FALSE;
623 baseStates=NULL;
b75a7d8f 624 }
b75a7d8f 625
374ca955
A
626 /* read the base table */
627 ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
628 if(U_FAILURE(*pErrorCode)) {
629 return FALSE;
b75a7d8f
A
630 }
631
374ca955
A
632 /* read an extension table if there is one */
633 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
634 end=uprv_strchr(line, 0);
635 while(line<end &&
636 (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
637 --end;
638 }
639 *end=0;
640
641 if(line[0]=='#' || u_skipWhitespace(line)==end) {
642 continue; /* ignore empty and comment lines */
643 }
644
645 if(0==uprv_strcmp(line, "CHARMAP")) {
646 /* read the extension table */
647 ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
648 } else {
649 fprintf(stderr, "unexpected text after the base mapping table\n");
650 }
651 break;
b75a7d8f 652 }
374ca955
A
653
654 T_FileStream_close(convFile);
655
656 if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
b75a7d8f 657 fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
374ca955 658 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 659 }
374ca955
A
660
661 return dataIsBase;
b75a7d8f
A
662}
663
374ca955
A
664static void
665createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
666 ConvData baseData;
667 UBool dataIsBase;
b75a7d8f 668
374ca955
A
669 UConverterStaticData *staticData;
670 UCMStates *states, *baseStates;
b75a7d8f 671
374ca955
A
672 if(U_FAILURE(*pErrorCode)) {
673 return;
b75a7d8f
A
674 }
675
374ca955 676 initConvData(data);
b75a7d8f 677
374ca955
A
678 dataIsBase=readFile(data, converterName, pErrorCode);
679 if(U_FAILURE(*pErrorCode)) {
680 return;
b75a7d8f
A
681 }
682
374ca955
A
683 staticData=&data->staticData;
684 states=&data->ucm->states;
b75a7d8f 685
374ca955
A
686 if(dataIsBase) {
687 data->cnvData=MBCSOpen(data->ucm);
688 if(data->cnvData==NULL) {
689 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
b75a7d8f 690
374ca955
A
691 } else if(!data->cnvData->isValid(data->cnvData,
692 staticData->subChar, staticData->subCharLen)
693 ) {
694 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
695 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 696
374ca955
A
697 } else if(staticData->subChar1!=0 &&
698 !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
699 ) {
700 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
701 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 702
374ca955
A
703 } else if(data->ucm->ext->mappingsLength>0) {
704 /* prepare the extension table, if there is one */
705 data->extData=CnvExtOpen(data->ucm);
706 if(data->extData==NULL) {
707 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
b75a7d8f 708
374ca955
A
709 } else if(
710 !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE) ||
711 !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
712 ) {
713 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f
A
714 }
715 }
374ca955
A
716
717 /* add the base table after ucm_checkBaseExt()! */
718 if( U_SUCCESS(*pErrorCode) &&
719 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
720 ) {
721 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 722 }
374ca955
A
723 } else {
724 char baseFilename[500];
725 char *basename;
726
727 initConvData(&baseData);
728
729 /* assemble a path/filename for data->ucm->baseName */
730 uprv_strcpy(baseFilename, converterName);
731 basename=(char *)findBasename(baseFilename);
732 uprv_strcpy(basename, data->ucm->baseName);
733 uprv_strcat(basename, ".ucm");
734
735 /* read the base table */
736 dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
737 if(U_FAILURE(*pErrorCode)) {
738 return;
739 } else if(!dataIsBase) {
740 fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
741 *pErrorCode=U_INVALID_TABLE_FORMAT;
742 } else {
743 /* prepare the extension table */
744 data->extData=CnvExtOpen(data->ucm);
745 if(data->extData==NULL) {
746 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
747
748 } else {
749 /* fill in gaps in extension file header fields */
750 UCMapping *m, *mLimit;
751 uint8_t fallbackFlags;
752
753 baseStates=&baseData.ucm->states;
754 if(states->conversionType==UCNV_DBCS) {
755 staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
756 } else if(states->minCharLength==0) {
757 staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
758 }
759 if(states->maxCharLength<states->minCharLength) {
760 staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
761 }
762
763 if(staticData->subCharLen==0) {
764 uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
765 staticData->subCharLen=baseData.staticData.subCharLen;
766 }
767 /*
768 * do not copy subChar1 -
769 * only use what is explicitly specified
770 * because it cannot be unset in the extension file header
771 */
772
773 /* get the fallback flags */
774 fallbackFlags=0;
775 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
776 m<mLimit && fallbackFlags!=3;
777 ++m
b75a7d8f 778 ) {
374ca955
A
779 if(m->f==1) {
780 fallbackFlags|=1;
781 } else if(m->f==3) {
782 fallbackFlags|=2;
783 }
b75a7d8f 784 }
374ca955
A
785 for(m=data->ucm->base->mappings, mLimit=m+data->ucm->base->mappingsLength;
786 m<mLimit && fallbackFlags!=3;
787 ++m
b75a7d8f 788 ) {
374ca955
A
789 if(m->f==1) {
790 fallbackFlags|=1;
791 } else if(m->f==3) {
792 fallbackFlags|=2;
793 }
b75a7d8f 794 }
b75a7d8f 795
374ca955
A
796 if(fallbackFlags&1) {
797 staticData->hasFromUnicodeFallback=TRUE;
798 }
799 if(fallbackFlags&2) {
800 staticData->hasToUnicodeFallback=TRUE;
801 }
b75a7d8f 802
374ca955
A
803 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
804 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
805 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 806
374ca955
A
807 } else if(1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
808 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
809 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 810
374ca955
A
811 } else if(
812 !ucm_checkValidity(data->ucm->ext, baseStates) ||
813 !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE) ||
814 !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
815 ) {
816 *pErrorCode=U_INVALID_TABLE_FORMAT;
817 }
818 }
819 }
820
821 cleanupConvData(&baseData);
822 }
b75a7d8f
A
823}
824
825/*
826 * Hey, Emacs, please set the following:
827 *
828 * Local Variables:
829 * indent-tabs-mode: nil
830 * End:
831 *
832 */