]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/makeconv/makeconv.c
ICU-8.11.4.tar.gz
[apple/icu.git] / icuSources / tools / makeconv / makeconv.c
CommitLineData
b75a7d8f
A
1/*
2 ********************************************************************************
3 *
73c04bcf 4 * Copyright (C) 1998-2006, International Business Machines
b75a7d8f
A
5 * Corporation and others. All Rights Reserved.
6 *
7 ********************************************************************************
8 *
9 *
10 * makeconv.c:
11 * tool creating a binary (compressed) representation of the conversion mapping
12 * table (IBM NLTC ucmap format).
13 *
14 * 05/04/2000 helena Added fallback mapping into the picture...
15 * 06/29/2000 helena Major rewrite of the callback APIs.
16 */
17
18#include <stdio.h>
19#include "unicode/putil.h"
b75a7d8f
A
20#include "unicode/ucnv_err.h"
21#include "ucnv_bld.h"
22#include "ucnv_imp.h"
23#include "ucnv_cnv.h"
24#include "cstring.h"
25#include "cmemory.h"
374ca955 26#include "uinvchar.h"
b75a7d8f
A
27#include "filestrm.h"
28#include "toolutil.h"
29#include "uoptions.h"
30#include "unicode/udata.h"
31#include "unewdata.h"
374ca955
A
32#include "uparse.h"
33#include "ucm.h"
b75a7d8f
A
34#include "makeconv.h"
35#include "genmbcs.h"
36
37#define DEBUG 0
38
73c04bcf 39
374ca955
A
40typedef struct ConvData {
41 UCMFile *ucm;
42 NewConverter *cnvData, *extData;
43 UConverterSharedData sharedData;
44 UConverterStaticData staticData;
45} ConvData;
46
47static void
48initConvData(ConvData *data) {
49 uprv_memset(data, 0, sizeof(ConvData));
50 data->sharedData.structSize=sizeof(UConverterSharedData);
51 data->staticData.structSize=sizeof(UConverterStaticData);
52 data->sharedData.staticData=&data->staticData;
53}
54
55static void
56cleanupConvData(ConvData *data) {
57 if(data!=NULL) {
58 if(data->cnvData!=NULL) {
59 data->cnvData->close(data->cnvData);
60 data->cnvData=NULL;
61 }
62 if(data->extData!=NULL) {
63 data->extData->close(data->extData);
64 data->extData=NULL;
65 }
66 ucm_close(data->ucm);
67 data->ucm=NULL;
68 }
69}
70
b75a7d8f
A
71/*
72 * from ucnvstat.c - static prototypes of data-based converters
73 */
74extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
75
76/*
77 * Global - verbosity
78 */
79UBool VERBOSE = FALSE;
b75a7d8f 80
374ca955
A
81static void
82createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
b75a7d8f
A
83
84/*
85 * Set up the UNewData and write the converter..
86 */
374ca955
A
87static void
88writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
b75a7d8f
A
89
90UBool haveCopyright=TRUE;
91
92static UDataInfo dataInfo={
93 sizeof(UDataInfo),
94 0,
95
96 U_IS_BIG_ENDIAN,
97 U_CHARSET_FAMILY,
98 sizeof(UChar),
99 0,
100
101 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */
102 {6, 2, 0, 0}, /* formatVersion */
103 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */
104};
105
374ca955
A
106static void
107writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
b75a7d8f
A
108{
109 UNewDataMemory *mem = NULL;
110 uint32_t sz2;
111 uint32_t size = 0;
374ca955 112 int32_t tableType;
b75a7d8f
A
113
114 if(U_FAILURE(*status))
115 {
116 return;
117 }
118
374ca955
A
119 tableType=TABLE_NONE;
120 if(data->cnvData!=NULL) {
121 tableType|=TABLE_BASE;
122 }
123 if(data->extData!=NULL) {
124 tableType|=TABLE_EXT;
125 }
126
b75a7d8f
A
127 mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
128
129 if(U_FAILURE(*status))
130 {
131 fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
132 cnvName,
133 "cnv",
134 u_errorName(*status));
135 return;
136 }
137
138 if(VERBOSE)
139 {
140 fprintf(stderr, "- Opened udata %s.%s\n", cnvName, "cnv");
141 }
142
374ca955 143
b75a7d8f 144 /* all read only, clean, platform independent data. Mmmm. :) */
374ca955 145 udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
b75a7d8f
A
146 size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */
147 /* Now, write the table */
374ca955
A
148 if(tableType&TABLE_BASE) {
149 size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
150 }
151 if(tableType&TABLE_EXT) {
152 size += data->extData->write(data->extData, &data->staticData, mem, tableType);
153 }
b75a7d8f
A
154
155 sz2 = udata_finish(mem, status);
156 if(size != sz2)
157 {
374ca955 158 fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
b75a7d8f
A
159 *status=U_INTERNAL_PROGRAM_ERROR;
160 }
161 if(VERBOSE)
162 {
374ca955 163 fprintf(stderr, "- Wrote %u bytes to the udata.\n", (int)sz2);
b75a7d8f
A
164 }
165}
166
167static UOption options[]={
168 UOPTION_HELP_H, /* 0 Numbers for those who*/
169 UOPTION_HELP_QUESTION_MARK, /* 1 can't count. */
170 UOPTION_COPYRIGHT, /* 2 */
171 UOPTION_VERSION, /* 3 */
172 UOPTION_DESTDIR, /* 4 */
173 UOPTION_VERBOSE, /* 5 */
b75a7d8f
A
174};
175
176int main(int argc, char* argv[])
177{
374ca955
A
178 ConvData data;
179 UErrorCode err = U_ZERO_ERROR, localError;
b75a7d8f 180 char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
b75a7d8f 181 const char* destdir, *arg;
b75a7d8f
A
182 size_t destdirlen;
183 char* dot = NULL, *outBasename;
184 char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
185 char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH];
186 UVersionInfo icuVersion;
374ca955
A
187 UBool printFilename;
188
189 err = U_ZERO_ERROR;
b75a7d8f
A
190
191 U_MAIN_INIT_ARGS(argc, argv);
192
193 /* Set up the ICU version number */
194 u_getVersion(icuVersion);
195 uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
196
197 /* preset then read command line options */
198 options[4].value=u_getDataDirectory();
199 argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
200
201 /* error handling, printing usage message */
202 if(argc<0) {
203 fprintf(stderr,
204 "error in command line argument \"%s\"\n",
205 argv[-argc]);
206 } else if(argc<2) {
207 argc=-1;
208 }
209 if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
210 fprintf(stderr,
211 "usage: %s [-options] files...\n"
212 "\tread .ucm codepage mapping files and write .cnv files\n"
213 "options:\n"
214 "\t-h or -? or --help this usage text\n"
215 "\t-V or --version show a version message\n"
216 "\t-c or --copyright include a copyright notice\n"
217 "\t-d or --destdir destination directory, followed by the path\n"
218 "\t-v or --verbose Turn on verbose output\n",
219 argv[0]);
b75a7d8f
A
220 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
221 }
222
223 if(options[3].doesOccur) {
73c04bcf 224 fprintf(stderr,"makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
b75a7d8f 225 dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
73c04bcf 226 fprintf(stderr, U_COPYRIGHT_STRING "\n");
b75a7d8f
A
227 exit(0);
228 }
229
b75a7d8f
A
230 /* get the options values */
231 haveCopyright = options[2].doesOccur;
232 destdir = options[4].value;
233 VERBOSE = options[5].doesOccur;
234
235 if (destdir != NULL && *destdir != 0) {
236 uprv_strcpy(outFileName, destdir);
237 destdirlen = uprv_strlen(destdir);
238 outBasename = outFileName + destdirlen;
239 if (*(outBasename - 1) != U_FILE_SEP_CHAR) {
240 *outBasename++ = U_FILE_SEP_CHAR;
241 ++destdirlen;
242 }
243 } else {
244 destdirlen = 0;
245 outBasename = outFileName;
246 }
247
248#if DEBUG
249 {
250 int i;
251 printf("makeconv: processing %d files...\n", argc - 1);
252 for(i=1; i<argc; ++i) {
253 printf("%s ", argv[i]);
254 }
255 printf("\n");
256 fflush(stdout);
257 }
258#endif
259
374ca955
A
260 err = U_ZERO_ERROR;
261 printFilename = (UBool) (argc > 2 || VERBOSE);
262 for (++argv; --argc; ++argv)
b75a7d8f 263 {
374ca955 264 arg = getLongPathname(*argv);
b75a7d8f 265
374ca955
A
266 /*produces the right destination path for display*/
267 if (destdirlen != 0)
b75a7d8f 268 {
374ca955 269 const char *basename;
b75a7d8f 270
374ca955
A
271 /* find the last file sepator */
272 basename = findBasename(arg);
273 uprv_strcpy(outBasename, basename);
b75a7d8f 274 }
374ca955 275 else
b75a7d8f 276 {
374ca955 277 uprv_strcpy(outFileName, arg);
b75a7d8f
A
278 }
279
374ca955
A
280 /*removes the extension if any is found*/
281 dot = uprv_strrchr(outBasename, '.');
282 if (dot)
b75a7d8f 283 {
374ca955 284 *dot = '\0';
b75a7d8f
A
285 }
286
374ca955
A
287 /* the basename without extension is the converter name */
288 uprv_strcpy(cnvName, outBasename);
b75a7d8f 289
374ca955
A
290 /*Adds the target extension*/
291 uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION);
b75a7d8f
A
292
293#if DEBUG
294 printf("makeconv: processing %s ...\n", arg);
295 fflush(stdout);
296#endif
374ca955
A
297 localError = U_ZERO_ERROR;
298 initConvData(&data);
299 createConverter(&data, arg, &localError);
b75a7d8f 300
374ca955 301 if (U_FAILURE(localError))
b75a7d8f 302 {
374ca955
A
303 /* if an error is found, print out an error msg and keep going */
304 fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
305 u_errorName(localError));
306 if(U_SUCCESS(err)) {
307 err = localError;
308 }
b75a7d8f 309 }
374ca955 310 else
b75a7d8f 311 {
374ca955
A
312 /* Make the static data name equal to the file name */
313 if( /*VERBOSE && */ uprv_stricmp(cnvName,data.staticData.name))
314 {
315 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
b75a7d8f
A
316 cnvName,
317 CONVERTER_FILE_EXTENSION,
374ca955
A
318 data.staticData.name);
319 }
320
321 uprv_strcpy((char*)data.staticData.name, cnvName);
322
323 if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
324 fprintf(stderr,
325 "Error: A converter name must contain only invariant characters.\n"
326 "%s is not a valid converter name.\n",
327 data.staticData.name);
328 if(U_SUCCESS(err)) {
329 err = U_INVALID_TABLE_FORMAT;
330 }
331 }
332
73c04bcf 333 uprv_strcpy(cnvNameWithPkg, cnvName);
374ca955
A
334
335 localError = U_ZERO_ERROR;
336 writeConverterData(&data, cnvNameWithPkg, destdir, &localError);
374ca955
A
337
338 if(U_FAILURE(localError))
339 {
340 /* if an error is found, print out an error msg and keep going*/
341 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
342 u_errorName(localError));
343 if(U_SUCCESS(err)) {
344 err = localError;
345 }
346 }
347 else if (printFilename)
348 {
349 puts(outFileName);
350 }
b75a7d8f 351 }
374ca955
A
352 fflush(stdout);
353 fflush(stderr);
354
355 cleanupConvData(&data);
b75a7d8f
A
356 }
357
374ca955 358 return err;
b75a7d8f
A
359}
360
361static void
362getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
363 if( (name[0]=='i' || name[0]=='I') &&
364 (name[1]=='b' || name[1]=='B') &&
365 (name[2]=='m' || name[2]=='M')
366 ) {
367 name+=3;
368 if(*name=='-') {
369 ++name;
370 }
371 *pPlatform=UCNV_IBM;
372 *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
373 } else {
374 *pPlatform=UCNV_UNKNOWN;
375 *pCCSID=0;
376 }
377}
378
374ca955
A
379static void
380readHeader(ConvData *data,
381 FileStream* convFile,
382 const char* converterName,
383 UErrorCode *pErrorCode) {
b75a7d8f 384 char line[200];
374ca955
A
385 char *s, *key, *value;
386 const UConverterStaticData *prototype;
b75a7d8f 387 UConverterStaticData *staticData;
b75a7d8f
A
388
389 if(U_FAILURE(*pErrorCode)) {
390 return;
391 }
392
374ca955 393 staticData=&data->staticData;
b75a7d8f
A
394 staticData->platform=UCNV_IBM;
395 staticData->subCharLen=0;
396
397 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
374ca955
A
398 /* basic parsing and handling of state-related items */
399 if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
b75a7d8f
A
400 continue;
401 }
402
403 /* stop at the beginning of the mapping section */
374ca955 404 if(uprv_strcmp(line, "CHARMAP")==0) {
b75a7d8f
A
405 break;
406 }
407
b75a7d8f
A
408 /* collect the information from the header field, ignore unknown keys */
409 if(uprv_strcmp(key, "code_set_name")==0) {
410 if(*value!=0) {
374ca955 411 uprv_strcpy((char *)staticData->name, value);
b75a7d8f
A
412 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
413 }
b75a7d8f 414 } else if(uprv_strcmp(key, "subchar")==0) {
374ca955
A
415 uint8_t bytes[UCNV_EXT_MAX_BYTES];
416 int8_t length;
417
418 s=value;
419 length=ucm_parseBytes(bytes, line, (const char **)&s);
420 if(1<=length && length<=4 && *s==0) {
421 staticData->subCharLen=length;
422 uprv_memcpy(staticData->subChar, bytes, length);
b75a7d8f
A
423 } else {
424 fprintf(stderr, "error: illegal <subchar> %s\n", value);
425 *pErrorCode=U_INVALID_TABLE_FORMAT;
426 return;
427 }
428 } else if(uprv_strcmp(key, "subchar1")==0) {
374ca955 429 uint8_t bytes[UCNV_EXT_MAX_BYTES];
b75a7d8f 430
374ca955
A
431 s=value;
432 if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
433 staticData->subChar1=bytes[0];
b75a7d8f
A
434 } else {
435 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
436 *pErrorCode=U_INVALID_TABLE_FORMAT;
437 return;
438 }
374ca955
A
439 }
440 }
441
442 /* copy values from the UCMFile to the static data */
443 staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
444 staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
445 staticData->conversionType=data->ucm->states.conversionType;
446
447 if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
448 fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
449 *pErrorCode=U_INVALID_TABLE_FORMAT;
450 return;
451 }
452
453 /*
454 * Now that we know the type, copy any 'default' values from the table.
455 * We need not check the type any further because the parser only
456 * recognizes what we have prototypes for.
457 *
458 * For delta (extension-only) tables, copy values from the base file
459 * instead, see createConverter().
460 */
461 if(data->ucm->baseName[0]==0) {
462 prototype=ucnv_converterStaticData[staticData->conversionType];
463 if(prototype!=NULL) {
464 if(staticData->name[0]==0) {
465 uprv_strcpy((char *)staticData->name, prototype->name);
466 }
467
468 if(staticData->codepage==0) {
469 staticData->codepage=prototype->codepage;
470 }
471
472 if(staticData->platform==0) {
473 staticData->platform=prototype->platform;
474 }
475
476 if(staticData->minBytesPerChar==0) {
477 staticData->minBytesPerChar=prototype->minBytesPerChar;
b75a7d8f
A
478 }
479
480 if(staticData->maxBytesPerChar==0) {
374ca955 481 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
b75a7d8f 482 }
374ca955
A
483
484 if(staticData->subCharLen==0) {
485 staticData->subCharLen=prototype->subCharLen;
486 if(prototype->subCharLen>0) {
487 uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
b75a7d8f
A
488 }
489 }
b75a7d8f
A
490 }
491 }
492
374ca955
A
493 if(data->ucm->states.outputType<0) {
494 data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
495 }
496
497 if( staticData->subChar1!=0 &&
498 (staticData->minBytesPerChar>1 ||
499 (staticData->conversionType!=UCNV_MBCS &&
500 staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
b75a7d8f
A
501 ) {
502 fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
503 *pErrorCode=U_INVALID_TABLE_FORMAT;
504 }
505}
506
374ca955
A
507/* return TRUE if a base table was read, FALSE for an extension table */
508static UBool
509readFile(ConvData *data, const char* converterName,
510 UErrorCode *pErrorCode) {
511 char line[200];
512 char *end;
513 FileStream *convFile;
b75a7d8f 514
374ca955
A
515 UCMStates *baseStates;
516 UBool dataIsBase;
b75a7d8f 517
374ca955
A
518 if(U_FAILURE(*pErrorCode)) {
519 return FALSE;
520 }
b75a7d8f 521
374ca955 522 data->ucm=ucm_open();
b75a7d8f 523
374ca955
A
524 convFile=T_FileStream_open(converterName, "r");
525 if(convFile==NULL) {
526 *pErrorCode=U_FILE_ACCESS_ERROR;
527 return FALSE;
528 }
b75a7d8f 529
374ca955
A
530 readHeader(data, convFile, converterName, pErrorCode);
531 if(U_FAILURE(*pErrorCode)) {
532 return FALSE;
b75a7d8f
A
533 }
534
374ca955
A
535 if(data->ucm->baseName[0]==0) {
536 dataIsBase=TRUE;
537 baseStates=&data->ucm->states;
538 ucm_processStates(baseStates);
539 } else {
540 dataIsBase=FALSE;
541 baseStates=NULL;
b75a7d8f 542 }
b75a7d8f 543
374ca955
A
544 /* read the base table */
545 ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
546 if(U_FAILURE(*pErrorCode)) {
547 return FALSE;
b75a7d8f
A
548 }
549
374ca955
A
550 /* read an extension table if there is one */
551 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
552 end=uprv_strchr(line, 0);
553 while(line<end &&
554 (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
555 --end;
556 }
557 *end=0;
558
559 if(line[0]=='#' || u_skipWhitespace(line)==end) {
560 continue; /* ignore empty and comment lines */
561 }
562
563 if(0==uprv_strcmp(line, "CHARMAP")) {
564 /* read the extension table */
565 ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
566 } else {
567 fprintf(stderr, "unexpected text after the base mapping table\n");
568 }
569 break;
b75a7d8f 570 }
374ca955
A
571
572 T_FileStream_close(convFile);
573
574 if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
b75a7d8f 575 fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
374ca955 576 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 577 }
374ca955
A
578
579 return dataIsBase;
b75a7d8f
A
580}
581
374ca955
A
582static void
583createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
584 ConvData baseData;
585 UBool dataIsBase;
b75a7d8f 586
374ca955
A
587 UConverterStaticData *staticData;
588 UCMStates *states, *baseStates;
b75a7d8f 589
374ca955
A
590 if(U_FAILURE(*pErrorCode)) {
591 return;
b75a7d8f
A
592 }
593
374ca955 594 initConvData(data);
b75a7d8f 595
374ca955
A
596 dataIsBase=readFile(data, converterName, pErrorCode);
597 if(U_FAILURE(*pErrorCode)) {
598 return;
b75a7d8f
A
599 }
600
374ca955
A
601 staticData=&data->staticData;
602 states=&data->ucm->states;
b75a7d8f 603
374ca955
A
604 if(dataIsBase) {
605 data->cnvData=MBCSOpen(data->ucm);
606 if(data->cnvData==NULL) {
607 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
b75a7d8f 608
374ca955
A
609 } else if(!data->cnvData->isValid(data->cnvData,
610 staticData->subChar, staticData->subCharLen)
611 ) {
612 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
613 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 614
374ca955
A
615 } else if(staticData->subChar1!=0 &&
616 !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
617 ) {
618 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
619 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 620
374ca955
A
621 } else if(data->ucm->ext->mappingsLength>0) {
622 /* prepare the extension table, if there is one */
623 data->extData=CnvExtOpen(data->ucm);
624 if(data->extData==NULL) {
625 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
b75a7d8f 626
374ca955
A
627 } else if(
628 !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE) ||
629 !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
630 ) {
631 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f
A
632 }
633 }
374ca955
A
634
635 /* add the base table after ucm_checkBaseExt()! */
636 if( U_SUCCESS(*pErrorCode) &&
637 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
638 ) {
639 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 640 }
374ca955
A
641 } else {
642 char baseFilename[500];
643 char *basename;
644
645 initConvData(&baseData);
646
647 /* assemble a path/filename for data->ucm->baseName */
648 uprv_strcpy(baseFilename, converterName);
649 basename=(char *)findBasename(baseFilename);
650 uprv_strcpy(basename, data->ucm->baseName);
651 uprv_strcat(basename, ".ucm");
652
653 /* read the base table */
654 dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
655 if(U_FAILURE(*pErrorCode)) {
656 return;
657 } else if(!dataIsBase) {
658 fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
659 *pErrorCode=U_INVALID_TABLE_FORMAT;
660 } else {
661 /* prepare the extension table */
662 data->extData=CnvExtOpen(data->ucm);
663 if(data->extData==NULL) {
664 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
665
666 } else {
667 /* fill in gaps in extension file header fields */
668 UCMapping *m, *mLimit;
669 uint8_t fallbackFlags;
670
671 baseStates=&baseData.ucm->states;
672 if(states->conversionType==UCNV_DBCS) {
673 staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
674 } else if(states->minCharLength==0) {
675 staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
676 }
677 if(states->maxCharLength<states->minCharLength) {
678 staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
679 }
680
681 if(staticData->subCharLen==0) {
682 uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
683 staticData->subCharLen=baseData.staticData.subCharLen;
684 }
685 /*
686 * do not copy subChar1 -
687 * only use what is explicitly specified
688 * because it cannot be unset in the extension file header
689 */
690
691 /* get the fallback flags */
692 fallbackFlags=0;
693 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
694 m<mLimit && fallbackFlags!=3;
695 ++m
b75a7d8f 696 ) {
374ca955
A
697 if(m->f==1) {
698 fallbackFlags|=1;
699 } else if(m->f==3) {
700 fallbackFlags|=2;
701 }
b75a7d8f 702 }
374ca955
A
703 for(m=data->ucm->base->mappings, mLimit=m+data->ucm->base->mappingsLength;
704 m<mLimit && fallbackFlags!=3;
705 ++m
b75a7d8f 706 ) {
374ca955
A
707 if(m->f==1) {
708 fallbackFlags|=1;
709 } else if(m->f==3) {
710 fallbackFlags|=2;
711 }
b75a7d8f 712 }
b75a7d8f 713
374ca955
A
714 if(fallbackFlags&1) {
715 staticData->hasFromUnicodeFallback=TRUE;
716 }
717 if(fallbackFlags&2) {
718 staticData->hasToUnicodeFallback=TRUE;
719 }
b75a7d8f 720
374ca955
A
721 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
722 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
723 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 724
374ca955
A
725 } else if(1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
726 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
727 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 728
374ca955
A
729 } else if(
730 !ucm_checkValidity(data->ucm->ext, baseStates) ||
731 !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE) ||
732 !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
733 ) {
734 *pErrorCode=U_INVALID_TABLE_FORMAT;
735 }
736 }
737 }
738
739 cleanupConvData(&baseData);
740 }
b75a7d8f
A
741}
742
743/*
744 * Hey, Emacs, please set the following:
745 *
746 * Local Variables:
747 * indent-tabs-mode: nil
748 * End:
749 *
750 */