ICU-531.31.tar.gz
[apple/icu.git] / icuSources / tools / makeconv / makeconv.c
CommitLineData
b75a7d8f
A
1/*
2 ********************************************************************************
3 *
51004dcb 4 * Copyright (C) 1998-2012, International Business Machines
b75a7d8f
A
5 * Corporation and others. All Rights Reserved.
6 *
7 ********************************************************************************
8 *
9 *
10 * makeconv.c:
11 * tool creating a binary (compressed) representation of the conversion mapping
12 * table (IBM NLTC ucmap format).
13 *
14 * 05/04/2000 helena Added fallback mapping into the picture...
15 * 06/29/2000 helena Major rewrite of the callback APIs.
16 */
17
18#include <stdio.h>
19#include "unicode/putil.h"
b75a7d8f
A
20#include "unicode/ucnv_err.h"
21#include "ucnv_bld.h"
22#include "ucnv_imp.h"
23#include "ucnv_cnv.h"
24#include "cstring.h"
25#include "cmemory.h"
374ca955 26#include "uinvchar.h"
b75a7d8f
A
27#include "filestrm.h"
28#include "toolutil.h"
29#include "uoptions.h"
30#include "unicode/udata.h"
31#include "unewdata.h"
374ca955
A
32#include "uparse.h"
33#include "ucm.h"
b75a7d8f
A
34#include "makeconv.h"
35#include "genmbcs.h"
36
46f4442e 37#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
b75a7d8f 38
46f4442e 39#define DEBUG 0
73c04bcf 40
374ca955
A
41typedef struct ConvData {
42 UCMFile *ucm;
43 NewConverter *cnvData, *extData;
44 UConverterSharedData sharedData;
45 UConverterStaticData staticData;
46} ConvData;
47
48static void
49initConvData(ConvData *data) {
50 uprv_memset(data, 0, sizeof(ConvData));
51 data->sharedData.structSize=sizeof(UConverterSharedData);
52 data->staticData.structSize=sizeof(UConverterStaticData);
53 data->sharedData.staticData=&data->staticData;
54}
55
56static void
57cleanupConvData(ConvData *data) {
58 if(data!=NULL) {
59 if(data->cnvData!=NULL) {
60 data->cnvData->close(data->cnvData);
61 data->cnvData=NULL;
62 }
63 if(data->extData!=NULL) {
64 data->extData->close(data->extData);
65 data->extData=NULL;
66 }
67 ucm_close(data->ucm);
68 data->ucm=NULL;
69 }
70}
71
b75a7d8f
A
72/*
73 * from ucnvstat.c - static prototypes of data-based converters
74 */
75extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
76
77/*
78 * Global - verbosity
79 */
80UBool VERBOSE = FALSE;
46f4442e 81UBool SMALL = FALSE;
729e4ab9 82UBool IGNORE_SISO_CHECK = FALSE;
b75a7d8f 83
374ca955
A
84static void
85createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
b75a7d8f
A
86
87/*
88 * Set up the UNewData and write the converter..
89 */
374ca955
A
90static void
91writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
b75a7d8f
A
92
93UBool haveCopyright=TRUE;
94
95static UDataInfo dataInfo={
96 sizeof(UDataInfo),
97 0,
98
99 U_IS_BIG_ENDIAN,
100 U_CHARSET_FAMILY,
101 sizeof(UChar),
102 0,
103
104 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */
105 {6, 2, 0, 0}, /* formatVersion */
106 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */
107};
108
374ca955
A
109static void
110writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
b75a7d8f
A
111{
112 UNewDataMemory *mem = NULL;
113 uint32_t sz2;
114 uint32_t size = 0;
374ca955 115 int32_t tableType;
b75a7d8f
A
116
117 if(U_FAILURE(*status))
118 {
119 return;
120 }
121
374ca955
A
122 tableType=TABLE_NONE;
123 if(data->cnvData!=NULL) {
124 tableType|=TABLE_BASE;
125 }
126 if(data->extData!=NULL) {
127 tableType|=TABLE_EXT;
128 }
129
b75a7d8f
A
130 mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
131
132 if(U_FAILURE(*status))
133 {
134 fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
135 cnvName,
136 "cnv",
137 u_errorName(*status));
138 return;
139 }
140
141 if(VERBOSE)
142 {
46f4442e 143 printf("- Opened udata %s.%s\n", cnvName, "cnv");
b75a7d8f
A
144 }
145
374ca955 146
b75a7d8f 147 /* all read only, clean, platform independent data. Mmmm. :) */
374ca955 148 udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
b75a7d8f
A
149 size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */
150 /* Now, write the table */
374ca955
A
151 if(tableType&TABLE_BASE) {
152 size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
153 }
154 if(tableType&TABLE_EXT) {
155 size += data->extData->write(data->extData, &data->staticData, mem, tableType);
156 }
b75a7d8f
A
157
158 sz2 = udata_finish(mem, status);
159 if(size != sz2)
160 {
374ca955 161 fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
b75a7d8f
A
162 *status=U_INTERNAL_PROGRAM_ERROR;
163 }
164 if(VERBOSE)
165 {
46f4442e 166 printf("- Wrote %u bytes to the udata.\n", (int)sz2);
b75a7d8f
A
167 }
168}
169
46f4442e
A
170enum {
171 OPT_HELP_H,
172 OPT_HELP_QUESTION_MARK,
173 OPT_COPYRIGHT,
174 OPT_VERSION,
175 OPT_DESTDIR,
176 OPT_VERBOSE,
177 OPT_SMALL,
729e4ab9 178 OPT_IGNORE_SISO_CHECK,
46f4442e
A
179 OPT_COUNT
180};
181
b75a7d8f 182static UOption options[]={
46f4442e
A
183 UOPTION_HELP_H,
184 UOPTION_HELP_QUESTION_MARK,
185 UOPTION_COPYRIGHT,
186 UOPTION_VERSION,
187 UOPTION_DESTDIR,
188 UOPTION_VERBOSE,
729e4ab9
A
189 { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
190 { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }
b75a7d8f
A
191};
192
193int main(int argc, char* argv[])
194{
374ca955
A
195 ConvData data;
196 UErrorCode err = U_ZERO_ERROR, localError;
b75a7d8f 197 char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
b75a7d8f 198 const char* destdir, *arg;
b75a7d8f
A
199 size_t destdirlen;
200 char* dot = NULL, *outBasename;
201 char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
202 char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH];
203 UVersionInfo icuVersion;
374ca955
A
204 UBool printFilename;
205
206 err = U_ZERO_ERROR;
b75a7d8f
A
207
208 U_MAIN_INIT_ARGS(argc, argv);
209
210 /* Set up the ICU version number */
211 u_getVersion(icuVersion);
212 uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
213
214 /* preset then read command line options */
46f4442e
A
215 options[OPT_DESTDIR].value=u_getDataDirectory();
216 argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
b75a7d8f
A
217
218 /* error handling, printing usage message */
219 if(argc<0) {
220 fprintf(stderr,
221 "error in command line argument \"%s\"\n",
222 argv[-argc]);
223 } else if(argc<2) {
224 argc=-1;
225 }
46f4442e
A
226 if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
227 FILE *stdfile=argc<0 ? stderr : stdout;
228 fprintf(stdfile,
b75a7d8f
A
229 "usage: %s [-options] files...\n"
230 "\tread .ucm codepage mapping files and write .cnv files\n"
231 "options:\n"
232 "\t-h or -? or --help this usage text\n"
233 "\t-V or --version show a version message\n"
234 "\t-c or --copyright include a copyright notice\n"
235 "\t-d or --destdir destination directory, followed by the path\n"
236 "\t-v or --verbose Turn on verbose output\n",
237 argv[0]);
46f4442e
A
238 fprintf(stdfile,
239 "\t --small Generate smaller .cnv files. They will be\n"
240 "\t significantly smaller but may not be compatible with\n"
241 "\t older versions of ICU and will require heap memory\n"
729e4ab9
A
242 "\t allocation when loaded.\n"
243 "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n");
b75a7d8f
A
244 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
245 }
246
46f4442e 247 if(options[OPT_VERSION].doesOccur) {
51004dcb 248 printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
46f4442e
A
249 dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
250 printf("%s\n", U_COPYRIGHT_STRING);
b75a7d8f
A
251 exit(0);
252 }
253
b75a7d8f 254 /* get the options values */
46f4442e
A
255 haveCopyright = options[OPT_COPYRIGHT].doesOccur;
256 destdir = options[OPT_DESTDIR].value;
257 VERBOSE = options[OPT_VERBOSE].doesOccur;
258 SMALL = options[OPT_SMALL].doesOccur;
b75a7d8f 259
729e4ab9
A
260 if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
261 IGNORE_SISO_CHECK = TRUE;
262 }
263
b75a7d8f
A
264 if (destdir != NULL && *destdir != 0) {
265 uprv_strcpy(outFileName, destdir);
266 destdirlen = uprv_strlen(destdir);
267 outBasename = outFileName + destdirlen;
268 if (*(outBasename - 1) != U_FILE_SEP_CHAR) {
269 *outBasename++ = U_FILE_SEP_CHAR;
270 ++destdirlen;
271 }
272 } else {
273 destdirlen = 0;
274 outBasename = outFileName;
275 }
276
277#if DEBUG
278 {
279 int i;
280 printf("makeconv: processing %d files...\n", argc - 1);
281 for(i=1; i<argc; ++i) {
282 printf("%s ", argv[i]);
283 }
284 printf("\n");
285 fflush(stdout);
286 }
287#endif
288
374ca955
A
289 err = U_ZERO_ERROR;
290 printFilename = (UBool) (argc > 2 || VERBOSE);
291 for (++argv; --argc; ++argv)
b75a7d8f 292 {
374ca955 293 arg = getLongPathname(*argv);
b75a7d8f 294
46f4442e 295 /* Check for potential buffer overflow */
51004dcb 296 if(strlen(arg) >= UCNV_MAX_FULL_FILE_NAME_LENGTH)
46f4442e
A
297 {
298 fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR));
299 return U_BUFFER_OVERFLOW_ERROR;
300 }
301
374ca955
A
302 /*produces the right destination path for display*/
303 if (destdirlen != 0)
b75a7d8f 304 {
374ca955 305 const char *basename;
b75a7d8f 306
374ca955
A
307 /* find the last file sepator */
308 basename = findBasename(arg);
309 uprv_strcpy(outBasename, basename);
b75a7d8f 310 }
374ca955 311 else
b75a7d8f 312 {
374ca955 313 uprv_strcpy(outFileName, arg);
b75a7d8f
A
314 }
315
374ca955
A
316 /*removes the extension if any is found*/
317 dot = uprv_strrchr(outBasename, '.');
318 if (dot)
b75a7d8f 319 {
374ca955 320 *dot = '\0';
b75a7d8f
A
321 }
322
374ca955
A
323 /* the basename without extension is the converter name */
324 uprv_strcpy(cnvName, outBasename);
b75a7d8f 325
374ca955
A
326 /*Adds the target extension*/
327 uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION);
b75a7d8f
A
328
329#if DEBUG
330 printf("makeconv: processing %s ...\n", arg);
331 fflush(stdout);
332#endif
374ca955
A
333 localError = U_ZERO_ERROR;
334 initConvData(&data);
335 createConverter(&data, arg, &localError);
b75a7d8f 336
374ca955 337 if (U_FAILURE(localError))
b75a7d8f 338 {
374ca955
A
339 /* if an error is found, print out an error msg and keep going */
340 fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
341 u_errorName(localError));
342 if(U_SUCCESS(err)) {
343 err = localError;
344 }
b75a7d8f 345 }
374ca955 346 else
b75a7d8f 347 {
46f4442e
A
348 /* Insure the static data name matches the file name */
349 /* Changed to ignore directory and only compare base name
350 LDH 1/2/08*/
351 char *p;
352 p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
353
354 if(p == NULL) /* OK, try alternate */
355 {
356 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
357 if(p == NULL)
358 {
359 p=cnvName; /* If no separators, no problem */
360 }
361 }
362 else
363 {
364 p++; /* If found separtor, don't include it in compare */
365 }
366 if(uprv_stricmp(p,data.staticData.name))
374ca955
A
367 {
368 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
46f4442e 369 cnvName, CONVERTER_FILE_EXTENSION,
374ca955
A
370 data.staticData.name);
371 }
372
373 uprv_strcpy((char*)data.staticData.name, cnvName);
374
375 if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
376 fprintf(stderr,
377 "Error: A converter name must contain only invariant characters.\n"
378 "%s is not a valid converter name.\n",
379 data.staticData.name);
380 if(U_SUCCESS(err)) {
381 err = U_INVALID_TABLE_FORMAT;
382 }
383 }
384
73c04bcf 385 uprv_strcpy(cnvNameWithPkg, cnvName);
374ca955
A
386
387 localError = U_ZERO_ERROR;
388 writeConverterData(&data, cnvNameWithPkg, destdir, &localError);
374ca955
A
389
390 if(U_FAILURE(localError))
391 {
392 /* if an error is found, print out an error msg and keep going*/
393 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
394 u_errorName(localError));
395 if(U_SUCCESS(err)) {
396 err = localError;
397 }
398 }
399 else if (printFilename)
400 {
46f4442e 401 puts(outBasename);
374ca955 402 }
b75a7d8f 403 }
374ca955
A
404 fflush(stdout);
405 fflush(stderr);
406
407 cleanupConvData(&data);
b75a7d8f
A
408 }
409
374ca955 410 return err;
b75a7d8f
A
411}
412
413static void
414getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
415 if( (name[0]=='i' || name[0]=='I') &&
416 (name[1]=='b' || name[1]=='B') &&
417 (name[2]=='m' || name[2]=='M')
418 ) {
419 name+=3;
420 if(*name=='-') {
421 ++name;
422 }
423 *pPlatform=UCNV_IBM;
424 *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
425 } else {
426 *pPlatform=UCNV_UNKNOWN;
427 *pCCSID=0;
428 }
429}
430
374ca955
A
431static void
432readHeader(ConvData *data,
433 FileStream* convFile,
434 const char* converterName,
435 UErrorCode *pErrorCode) {
4388f060 436 char line[1024];
374ca955
A
437 char *s, *key, *value;
438 const UConverterStaticData *prototype;
b75a7d8f 439 UConverterStaticData *staticData;
b75a7d8f
A
440
441 if(U_FAILURE(*pErrorCode)) {
442 return;
443 }
444
374ca955 445 staticData=&data->staticData;
b75a7d8f
A
446 staticData->platform=UCNV_IBM;
447 staticData->subCharLen=0;
448
449 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
374ca955
A
450 /* basic parsing and handling of state-related items */
451 if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
b75a7d8f
A
452 continue;
453 }
454
455 /* stop at the beginning of the mapping section */
374ca955 456 if(uprv_strcmp(line, "CHARMAP")==0) {
b75a7d8f
A
457 break;
458 }
459
b75a7d8f
A
460 /* collect the information from the header field, ignore unknown keys */
461 if(uprv_strcmp(key, "code_set_name")==0) {
462 if(*value!=0) {
374ca955 463 uprv_strcpy((char *)staticData->name, value);
b75a7d8f
A
464 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
465 }
b75a7d8f 466 } else if(uprv_strcmp(key, "subchar")==0) {
374ca955
A
467 uint8_t bytes[UCNV_EXT_MAX_BYTES];
468 int8_t length;
469
470 s=value;
471 length=ucm_parseBytes(bytes, line, (const char **)&s);
472 if(1<=length && length<=4 && *s==0) {
473 staticData->subCharLen=length;
474 uprv_memcpy(staticData->subChar, bytes, length);
b75a7d8f
A
475 } else {
476 fprintf(stderr, "error: illegal <subchar> %s\n", value);
477 *pErrorCode=U_INVALID_TABLE_FORMAT;
478 return;
479 }
480 } else if(uprv_strcmp(key, "subchar1")==0) {
374ca955 481 uint8_t bytes[UCNV_EXT_MAX_BYTES];
b75a7d8f 482
374ca955
A
483 s=value;
484 if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
485 staticData->subChar1=bytes[0];
b75a7d8f
A
486 } else {
487 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
488 *pErrorCode=U_INVALID_TABLE_FORMAT;
489 return;
490 }
374ca955
A
491 }
492 }
493
494 /* copy values from the UCMFile to the static data */
495 staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
496 staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
497 staticData->conversionType=data->ucm->states.conversionType;
498
499 if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
500 fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
501 *pErrorCode=U_INVALID_TABLE_FORMAT;
502 return;
503 }
504
505 /*
506 * Now that we know the type, copy any 'default' values from the table.
507 * We need not check the type any further because the parser only
508 * recognizes what we have prototypes for.
509 *
510 * For delta (extension-only) tables, copy values from the base file
511 * instead, see createConverter().
512 */
513 if(data->ucm->baseName[0]==0) {
514 prototype=ucnv_converterStaticData[staticData->conversionType];
515 if(prototype!=NULL) {
516 if(staticData->name[0]==0) {
517 uprv_strcpy((char *)staticData->name, prototype->name);
518 }
519
520 if(staticData->codepage==0) {
521 staticData->codepage=prototype->codepage;
522 }
523
524 if(staticData->platform==0) {
525 staticData->platform=prototype->platform;
526 }
527
528 if(staticData->minBytesPerChar==0) {
529 staticData->minBytesPerChar=prototype->minBytesPerChar;
b75a7d8f
A
530 }
531
532 if(staticData->maxBytesPerChar==0) {
374ca955 533 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
b75a7d8f 534 }
374ca955
A
535
536 if(staticData->subCharLen==0) {
537 staticData->subCharLen=prototype->subCharLen;
538 if(prototype->subCharLen>0) {
539 uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
b75a7d8f
A
540 }
541 }
b75a7d8f
A
542 }
543 }
544
374ca955
A
545 if(data->ucm->states.outputType<0) {
546 data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
547 }
548
549 if( staticData->subChar1!=0 &&
550 (staticData->minBytesPerChar>1 ||
551 (staticData->conversionType!=UCNV_MBCS &&
552 staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
b75a7d8f
A
553 ) {
554 fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
555 *pErrorCode=U_INVALID_TABLE_FORMAT;
556 }
557}
558
374ca955
A
559/* return TRUE if a base table was read, FALSE for an extension table */
560static UBool
561readFile(ConvData *data, const char* converterName,
562 UErrorCode *pErrorCode) {
4388f060 563 char line[1024];
374ca955
A
564 char *end;
565 FileStream *convFile;
b75a7d8f 566
374ca955
A
567 UCMStates *baseStates;
568 UBool dataIsBase;
b75a7d8f 569
374ca955
A
570 if(U_FAILURE(*pErrorCode)) {
571 return FALSE;
572 }
b75a7d8f 573
374ca955 574 data->ucm=ucm_open();
b75a7d8f 575
374ca955
A
576 convFile=T_FileStream_open(converterName, "r");
577 if(convFile==NULL) {
578 *pErrorCode=U_FILE_ACCESS_ERROR;
579 return FALSE;
580 }
b75a7d8f 581
374ca955
A
582 readHeader(data, convFile, converterName, pErrorCode);
583 if(U_FAILURE(*pErrorCode)) {
584 return FALSE;
b75a7d8f
A
585 }
586
374ca955
A
587 if(data->ucm->baseName[0]==0) {
588 dataIsBase=TRUE;
589 baseStates=&data->ucm->states;
729e4ab9 590 ucm_processStates(baseStates, IGNORE_SISO_CHECK);
374ca955
A
591 } else {
592 dataIsBase=FALSE;
593 baseStates=NULL;
b75a7d8f 594 }
b75a7d8f 595
374ca955
A
596 /* read the base table */
597 ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
598 if(U_FAILURE(*pErrorCode)) {
599 return FALSE;
b75a7d8f
A
600 }
601
374ca955
A
602 /* read an extension table if there is one */
603 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
604 end=uprv_strchr(line, 0);
605 while(line<end &&
606 (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
607 --end;
608 }
609 *end=0;
610
611 if(line[0]=='#' || u_skipWhitespace(line)==end) {
612 continue; /* ignore empty and comment lines */
613 }
614
615 if(0==uprv_strcmp(line, "CHARMAP")) {
616 /* read the extension table */
617 ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
618 } else {
619 fprintf(stderr, "unexpected text after the base mapping table\n");
620 }
621 break;
b75a7d8f 622 }
374ca955
A
623
624 T_FileStream_close(convFile);
625
626 if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
b75a7d8f 627 fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
374ca955 628 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 629 }
374ca955
A
630
631 return dataIsBase;
b75a7d8f
A
632}
633
374ca955
A
634static void
635createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
636 ConvData baseData;
637 UBool dataIsBase;
b75a7d8f 638
374ca955
A
639 UConverterStaticData *staticData;
640 UCMStates *states, *baseStates;
b75a7d8f 641
374ca955
A
642 if(U_FAILURE(*pErrorCode)) {
643 return;
b75a7d8f
A
644 }
645
374ca955 646 initConvData(data);
b75a7d8f 647
374ca955
A
648 dataIsBase=readFile(data, converterName, pErrorCode);
649 if(U_FAILURE(*pErrorCode)) {
650 return;
b75a7d8f
A
651 }
652
374ca955
A
653 staticData=&data->staticData;
654 states=&data->ucm->states;
b75a7d8f 655
374ca955 656 if(dataIsBase) {
46f4442e
A
657 /*
658 * Build a normal .cnv file with a base table
659 * and an optional extension table.
660 */
374ca955
A
661 data->cnvData=MBCSOpen(data->ucm);
662 if(data->cnvData==NULL) {
663 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
b75a7d8f 664
374ca955
A
665 } else if(!data->cnvData->isValid(data->cnvData,
666 staticData->subChar, staticData->subCharLen)
667 ) {
668 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
669 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 670
374ca955
A
671 } else if(staticData->subChar1!=0 &&
672 !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
673 ) {
674 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
675 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 676
46f4442e
A
677 } else if(
678 data->ucm->ext->mappingsLength>0 &&
679 !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
680 ) {
681 *pErrorCode=U_INVALID_TABLE_FORMAT;
682 } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
683 /* sort the table so that it can be turned into UTF-8-friendly data */
684 ucm_sortTable(data->ucm->base);
685 }
b75a7d8f 686
46f4442e
A
687 if(U_SUCCESS(*pErrorCode)) {
688 if(
689 /* add the base table after ucm_checkBaseExt()! */
690 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
374ca955
A
691 ) {
692 *pErrorCode=U_INVALID_TABLE_FORMAT;
46f4442e
A
693 } else {
694 /*
695 * addTable() may have requested moving more mappings to the extension table
696 * if they fit into the base toUnicode table but not into the
697 * base fromUnicode table.
698 * (Especially for UTF-8-friendly fromUnicode tables.)
699 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
700 * to be excluded from the extension toUnicode data.
701 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
702 * the base fromUnicode table.
703 */
704 ucm_moveMappings(data->ucm->base, data->ucm->ext);
705 ucm_sortTable(data->ucm->ext);
706 if(data->ucm->ext->mappingsLength>0) {
707 /* prepare the extension table, if there is one */
708 data->extData=CnvExtOpen(data->ucm);
709 if(data->extData==NULL) {
710 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
711 } else if(
712 !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
713 ) {
714 *pErrorCode=U_INVALID_TABLE_FORMAT;
715 }
716 }
b75a7d8f
A
717 }
718 }
374ca955 719 } else {
46f4442e 720 /* Build an extension-only .cnv file. */
374ca955
A
721 char baseFilename[500];
722 char *basename;
723
724 initConvData(&baseData);
725
726 /* assemble a path/filename for data->ucm->baseName */
727 uprv_strcpy(baseFilename, converterName);
728 basename=(char *)findBasename(baseFilename);
729 uprv_strcpy(basename, data->ucm->baseName);
730 uprv_strcat(basename, ".ucm");
731
732 /* read the base table */
733 dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
734 if(U_FAILURE(*pErrorCode)) {
735 return;
736 } else if(!dataIsBase) {
737 fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
738 *pErrorCode=U_INVALID_TABLE_FORMAT;
739 } else {
740 /* prepare the extension table */
741 data->extData=CnvExtOpen(data->ucm);
742 if(data->extData==NULL) {
743 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
374ca955
A
744 } else {
745 /* fill in gaps in extension file header fields */
746 UCMapping *m, *mLimit;
747 uint8_t fallbackFlags;
748
749 baseStates=&baseData.ucm->states;
750 if(states->conversionType==UCNV_DBCS) {
751 staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
752 } else if(states->minCharLength==0) {
753 staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
754 }
755 if(states->maxCharLength<states->minCharLength) {
756 staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
757 }
758
759 if(staticData->subCharLen==0) {
760 uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
761 staticData->subCharLen=baseData.staticData.subCharLen;
762 }
763 /*
764 * do not copy subChar1 -
765 * only use what is explicitly specified
766 * because it cannot be unset in the extension file header
767 */
768
769 /* get the fallback flags */
770 fallbackFlags=0;
771 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
772 m<mLimit && fallbackFlags!=3;
773 ++m
b75a7d8f 774 ) {
374ca955
A
775 if(m->f==1) {
776 fallbackFlags|=1;
777 } else if(m->f==3) {
778 fallbackFlags|=2;
779 }
b75a7d8f 780 }
b75a7d8f 781
374ca955
A
782 if(fallbackFlags&1) {
783 staticData->hasFromUnicodeFallback=TRUE;
784 }
785 if(fallbackFlags&2) {
786 staticData->hasToUnicodeFallback=TRUE;
787 }
b75a7d8f 788
374ca955
A
789 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
790 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
791 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 792
729e4ab9 793 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
374ca955
A
794 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
795 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 796
374ca955
A
797 } else if(
798 !ucm_checkValidity(data->ucm->ext, baseStates) ||
46f4442e 799 !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
374ca955
A
800 ) {
801 *pErrorCode=U_INVALID_TABLE_FORMAT;
46f4442e
A
802 } else {
803 if(states->maxCharLength>1) {
804 /*
805 * When building a normal .cnv file with a base table
806 * for an MBCS (not SBCS) table with explicit precision flags,
807 * the MBCSAddTable() function marks some mappings for moving
808 * to the extension table.
809 * They fit into the base toUnicode table but not into the
810 * base fromUnicode table.
811 * (Note: We do have explicit precision flags because they are
812 * required for extension table generation, and
813 * ucm_checkBaseExt() verified it.)
814 *
815 * We do not call MBCSAddTable() here (we probably could)
816 * so we need to do the analysis before building the extension table.
817 * We assume that MBCSAddTable() will build a UTF-8-friendly table.
818 * Redundant mappings in the extension table are ok except they cost some size.
819 *
820 * Do this after ucm_checkBaseExt().
821 */
822 const MBCSData *mbcsData=MBCSGetDummy();
823 int32_t needsMove=0;
824 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
825 m<mLimit;
826 ++m
827 ) {
828 if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
829 m->f|=MBCS_FROM_U_EXT_FLAG;
830 m->moveFlag=UCM_MOVE_TO_EXT;
831 ++needsMove;
832 }
833 }
834
835 if(needsMove!=0) {
836 ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
837 ucm_sortTable(data->ucm->ext);
838 }
839 }
840 if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
841 *pErrorCode=U_INVALID_TABLE_FORMAT;
842 }
374ca955
A
843 }
844 }
845 }
846
847 cleanupConvData(&baseData);
848 }
b75a7d8f
A
849}
850
851/*
852 * Hey, Emacs, please set the following:
853 *
854 * Local Variables:
855 * indent-tabs-mode: nil
856 * End:
857 *
858 */