]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/makeconv/makeconv.cpp
ICU-57166.0.1.tar.gz
[apple/icu.git] / icuSources / tools / makeconv / makeconv.cpp
CommitLineData
b75a7d8f
A
1/*
2 ********************************************************************************
3 *
2ca993e8 4 * Copyright (C) 1998-2015, International Business Machines
b75a7d8f
A
5 * Corporation and others. All Rights Reserved.
6 *
7 ********************************************************************************
8 *
9 *
2ca993e8 10 * makeconv.cpp:
b75a7d8f
A
11 * tool creating a binary (compressed) representation of the conversion mapping
12 * table (IBM NLTC ucmap format).
13 *
14 * 05/04/2000 helena Added fallback mapping into the picture...
15 * 06/29/2000 helena Major rewrite of the callback APIs.
16 */
17
18#include <stdio.h>
19#include "unicode/putil.h"
b75a7d8f 20#include "unicode/ucnv_err.h"
2ca993e8 21#include "charstr.h"
b75a7d8f
A
22#include "ucnv_bld.h"
23#include "ucnv_imp.h"
24#include "ucnv_cnv.h"
25#include "cstring.h"
26#include "cmemory.h"
374ca955 27#include "uinvchar.h"
b75a7d8f
A
28#include "filestrm.h"
29#include "toolutil.h"
30#include "uoptions.h"
31#include "unicode/udata.h"
32#include "unewdata.h"
374ca955
A
33#include "uparse.h"
34#include "ucm.h"
b75a7d8f
A
35#include "makeconv.h"
36#include "genmbcs.h"
37
46f4442e 38#define DEBUG 0
73c04bcf 39
374ca955
A
40typedef struct ConvData {
41 UCMFile *ucm;
42 NewConverter *cnvData, *extData;
43 UConverterSharedData sharedData;
44 UConverterStaticData staticData;
45} ConvData;
46
47static void
48initConvData(ConvData *data) {
49 uprv_memset(data, 0, sizeof(ConvData));
50 data->sharedData.structSize=sizeof(UConverterSharedData);
51 data->staticData.structSize=sizeof(UConverterStaticData);
52 data->sharedData.staticData=&data->staticData;
53}
54
55static void
56cleanupConvData(ConvData *data) {
57 if(data!=NULL) {
58 if(data->cnvData!=NULL) {
59 data->cnvData->close(data->cnvData);
60 data->cnvData=NULL;
61 }
62 if(data->extData!=NULL) {
63 data->extData->close(data->extData);
64 data->extData=NULL;
65 }
66 ucm_close(data->ucm);
67 data->ucm=NULL;
68 }
69}
70
b75a7d8f
A
71/*
72 * from ucnvstat.c - static prototypes of data-based converters
73 */
2ca993e8 74U_CAPI const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
b75a7d8f
A
75
76/*
77 * Global - verbosity
78 */
79UBool VERBOSE = FALSE;
2ca993e8 80UBool QUIET = FALSE;
46f4442e 81UBool SMALL = FALSE;
729e4ab9 82UBool IGNORE_SISO_CHECK = FALSE;
b75a7d8f 83
374ca955
A
84static void
85createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
b75a7d8f
A
86
87/*
88 * Set up the UNewData and write the converter..
89 */
374ca955
A
90static void
91writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
b75a7d8f
A
92
93UBool haveCopyright=TRUE;
94
95static UDataInfo dataInfo={
96 sizeof(UDataInfo),
97 0,
98
99 U_IS_BIG_ENDIAN,
100 U_CHARSET_FAMILY,
101 sizeof(UChar),
102 0,
103
104 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */
105 {6, 2, 0, 0}, /* formatVersion */
106 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */
107};
108
374ca955
A
109static void
110writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
b75a7d8f
A
111{
112 UNewDataMemory *mem = NULL;
113 uint32_t sz2;
114 uint32_t size = 0;
374ca955 115 int32_t tableType;
b75a7d8f
A
116
117 if(U_FAILURE(*status))
118 {
119 return;
120 }
121
374ca955
A
122 tableType=TABLE_NONE;
123 if(data->cnvData!=NULL) {
124 tableType|=TABLE_BASE;
125 }
126 if(data->extData!=NULL) {
127 tableType|=TABLE_EXT;
128 }
129
b75a7d8f
A
130 mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
131
132 if(U_FAILURE(*status))
133 {
134 fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
135 cnvName,
136 "cnv",
137 u_errorName(*status));
138 return;
139 }
140
141 if(VERBOSE)
142 {
46f4442e 143 printf("- Opened udata %s.%s\n", cnvName, "cnv");
b75a7d8f
A
144 }
145
374ca955 146
b75a7d8f 147 /* all read only, clean, platform independent data. Mmmm. :) */
374ca955 148 udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
b75a7d8f
A
149 size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */
150 /* Now, write the table */
374ca955
A
151 if(tableType&TABLE_BASE) {
152 size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
153 }
154 if(tableType&TABLE_EXT) {
155 size += data->extData->write(data->extData, &data->staticData, mem, tableType);
156 }
b75a7d8f
A
157
158 sz2 = udata_finish(mem, status);
159 if(size != sz2)
160 {
374ca955 161 fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
b75a7d8f
A
162 *status=U_INTERNAL_PROGRAM_ERROR;
163 }
164 if(VERBOSE)
165 {
46f4442e 166 printf("- Wrote %u bytes to the udata.\n", (int)sz2);
b75a7d8f
A
167 }
168}
169
46f4442e
A
170enum {
171 OPT_HELP_H,
172 OPT_HELP_QUESTION_MARK,
173 OPT_COPYRIGHT,
174 OPT_VERSION,
175 OPT_DESTDIR,
176 OPT_VERBOSE,
177 OPT_SMALL,
729e4ab9 178 OPT_IGNORE_SISO_CHECK,
2ca993e8
A
179 OPT_QUIET,
180
46f4442e
A
181 OPT_COUNT
182};
183
b75a7d8f 184static UOption options[]={
46f4442e
A
185 UOPTION_HELP_H,
186 UOPTION_HELP_QUESTION_MARK,
187 UOPTION_COPYRIGHT,
188 UOPTION_VERSION,
189 UOPTION_DESTDIR,
190 UOPTION_VERBOSE,
729e4ab9 191 { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
2ca993e8
A
192 { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
193 UOPTION_QUIET,
b75a7d8f
A
194};
195
196int main(int argc, char* argv[])
197{
374ca955 198 ConvData data;
b75a7d8f 199 char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
b75a7d8f
A
200
201 U_MAIN_INIT_ARGS(argc, argv);
202
203 /* Set up the ICU version number */
2ca993e8 204 UVersionInfo icuVersion;
b75a7d8f
A
205 u_getVersion(icuVersion);
206 uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
207
208 /* preset then read command line options */
46f4442e 209 options[OPT_DESTDIR].value=u_getDataDirectory();
b331163b 210 argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
b75a7d8f
A
211
212 /* error handling, printing usage message */
213 if(argc<0) {
214 fprintf(stderr,
215 "error in command line argument \"%s\"\n",
216 argv[-argc]);
217 } else if(argc<2) {
218 argc=-1;
219 }
46f4442e
A
220 if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
221 FILE *stdfile=argc<0 ? stderr : stdout;
222 fprintf(stdfile,
b75a7d8f
A
223 "usage: %s [-options] files...\n"
224 "\tread .ucm codepage mapping files and write .cnv files\n"
225 "options:\n"
226 "\t-h or -? or --help this usage text\n"
227 "\t-V or --version show a version message\n"
228 "\t-c or --copyright include a copyright notice\n"
229 "\t-d or --destdir destination directory, followed by the path\n"
2ca993e8
A
230 "\t-v or --verbose Turn on verbose output\n"
231 "\t-q or --quiet do not display warnings and progress\n",
b75a7d8f 232 argv[0]);
46f4442e
A
233 fprintf(stdfile,
234 "\t --small Generate smaller .cnv files. They will be\n"
235 "\t significantly smaller but may not be compatible with\n"
236 "\t older versions of ICU and will require heap memory\n"
729e4ab9
A
237 "\t allocation when loaded.\n"
238 "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n");
b75a7d8f
A
239 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
240 }
241
46f4442e 242 if(options[OPT_VERSION].doesOccur) {
51004dcb 243 printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
46f4442e
A
244 dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
245 printf("%s\n", U_COPYRIGHT_STRING);
b75a7d8f
A
246 exit(0);
247 }
248
b75a7d8f 249 /* get the options values */
46f4442e 250 haveCopyright = options[OPT_COPYRIGHT].doesOccur;
2ca993e8 251 const char *destdir = options[OPT_DESTDIR].value;
46f4442e 252 VERBOSE = options[OPT_VERBOSE].doesOccur;
2ca993e8 253 QUIET = options[OPT_QUIET].doesOccur;
46f4442e 254 SMALL = options[OPT_SMALL].doesOccur;
b75a7d8f 255
729e4ab9
A
256 if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
257 IGNORE_SISO_CHECK = TRUE;
258 }
259
2ca993e8
A
260 icu::CharString outFileName;
261 UErrorCode err = U_ZERO_ERROR;
b75a7d8f 262 if (destdir != NULL && *destdir != 0) {
2ca993e8
A
263 outFileName.append(destdir, err).ensureEndsWithFileSeparator(err);
264 if (U_FAILURE(err)) {
265 return err;
b75a7d8f 266 }
b75a7d8f 267 }
2ca993e8 268 int32_t outBasenameStart = outFileName.length();
b75a7d8f
A
269
270#if DEBUG
271 {
272 int i;
273 printf("makeconv: processing %d files...\n", argc - 1);
274 for(i=1; i<argc; ++i) {
275 printf("%s ", argv[i]);
276 }
277 printf("\n");
278 fflush(stdout);
279 }
280#endif
281
2ca993e8 282 UBool printFilename = (UBool) (argc > 2 || VERBOSE);
374ca955 283 for (++argv; --argc; ++argv)
b75a7d8f 284 {
2ca993e8
A
285 UErrorCode localError = U_ZERO_ERROR;
286 const char *arg = getLongPathname(*argv);
46f4442e 287
374ca955 288 /*produces the right destination path for display*/
2ca993e8
A
289 outFileName.truncate(outBasenameStart);
290 if (outBasenameStart != 0)
b75a7d8f 291 {
374ca955 292 /* find the last file sepator */
2ca993e8
A
293 const char *basename = findBasename(arg);
294 outFileName.append(basename, localError);
b75a7d8f 295 }
374ca955 296 else
b75a7d8f 297 {
2ca993e8
A
298 outFileName.append(arg, localError);
299 }
300 if (U_FAILURE(localError)) {
301 return localError;
b75a7d8f
A
302 }
303
374ca955 304 /*removes the extension if any is found*/
2ca993e8
A
305 int32_t lastDotIndex = outFileName.lastIndexOf('.');
306 if (lastDotIndex >= outBasenameStart) {
307 outFileName.truncate(lastDotIndex);
b75a7d8f
A
308 }
309
374ca955 310 /* the basename without extension is the converter name */
2ca993e8
A
311 if ((outFileName.length() - outBasenameStart) >= UPRV_LENGTHOF(cnvName)) {
312 fprintf(stderr, "converter name %s too long\n", outFileName.data() + outBasenameStart);
313 return U_BUFFER_OVERFLOW_ERROR;
314 }
315 uprv_strcpy(cnvName, outFileName.data() + outBasenameStart);
b75a7d8f 316
374ca955 317 /*Adds the target extension*/
2ca993e8
A
318 outFileName.append(CONVERTER_FILE_EXTENSION, localError);
319 if (U_FAILURE(localError)) {
320 return localError;
321 }
b75a7d8f
A
322
323#if DEBUG
324 printf("makeconv: processing %s ...\n", arg);
325 fflush(stdout);
326#endif
374ca955
A
327 initConvData(&data);
328 createConverter(&data, arg, &localError);
b75a7d8f 329
374ca955 330 if (U_FAILURE(localError))
b75a7d8f 331 {
374ca955 332 /* if an error is found, print out an error msg and keep going */
2ca993e8
A
333 fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n",
334 outFileName.data(), arg, u_errorName(localError));
374ca955
A
335 if(U_SUCCESS(err)) {
336 err = localError;
337 }
b75a7d8f 338 }
374ca955 339 else
b75a7d8f 340 {
46f4442e
A
341 /* Insure the static data name matches the file name */
342 /* Changed to ignore directory and only compare base name
343 LDH 1/2/08*/
344 char *p;
345 p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
346
347 if(p == NULL) /* OK, try alternate */
348 {
349 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
350 if(p == NULL)
351 {
352 p=cnvName; /* If no separators, no problem */
353 }
354 }
355 else
356 {
2ca993e8 357 p++; /* If found separator, don't include it in compare */
46f4442e 358 }
2ca993e8 359 if(uprv_stricmp(p,data.staticData.name) && !QUIET)
374ca955
A
360 {
361 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
46f4442e 362 cnvName, CONVERTER_FILE_EXTENSION,
374ca955
A
363 data.staticData.name);
364 }
365
366 uprv_strcpy((char*)data.staticData.name, cnvName);
367
368 if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
369 fprintf(stderr,
370 "Error: A converter name must contain only invariant characters.\n"
371 "%s is not a valid converter name.\n",
372 data.staticData.name);
373 if(U_SUCCESS(err)) {
374 err = U_INVALID_TABLE_FORMAT;
375 }
376 }
377
374ca955 378 localError = U_ZERO_ERROR;
2ca993e8 379 writeConverterData(&data, cnvName, destdir, &localError);
374ca955
A
380
381 if(U_FAILURE(localError))
382 {
383 /* if an error is found, print out an error msg and keep going*/
2ca993e8 384 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName.data(), arg,
374ca955
A
385 u_errorName(localError));
386 if(U_SUCCESS(err)) {
387 err = localError;
388 }
389 }
390 else if (printFilename)
391 {
2ca993e8 392 puts(outFileName.data() + outBasenameStart);
374ca955 393 }
b75a7d8f 394 }
374ca955
A
395 fflush(stdout);
396 fflush(stderr);
397
398 cleanupConvData(&data);
b75a7d8f
A
399 }
400
374ca955 401 return err;
b75a7d8f
A
402}
403
404static void
405getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
406 if( (name[0]=='i' || name[0]=='I') &&
407 (name[1]=='b' || name[1]=='B') &&
408 (name[2]=='m' || name[2]=='M')
409 ) {
410 name+=3;
411 if(*name=='-') {
412 ++name;
413 }
414 *pPlatform=UCNV_IBM;
415 *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
416 } else {
417 *pPlatform=UCNV_UNKNOWN;
418 *pCCSID=0;
419 }
420}
421
374ca955
A
422static void
423readHeader(ConvData *data,
424 FileStream* convFile,
374ca955 425 UErrorCode *pErrorCode) {
4388f060 426 char line[1024];
374ca955
A
427 char *s, *key, *value;
428 const UConverterStaticData *prototype;
b75a7d8f 429 UConverterStaticData *staticData;
b75a7d8f
A
430
431 if(U_FAILURE(*pErrorCode)) {
432 return;
433 }
434
374ca955 435 staticData=&data->staticData;
b75a7d8f
A
436 staticData->platform=UCNV_IBM;
437 staticData->subCharLen=0;
438
439 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
374ca955
A
440 /* basic parsing and handling of state-related items */
441 if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
b75a7d8f
A
442 continue;
443 }
444
445 /* stop at the beginning of the mapping section */
374ca955 446 if(uprv_strcmp(line, "CHARMAP")==0) {
b75a7d8f
A
447 break;
448 }
449
b75a7d8f
A
450 /* collect the information from the header field, ignore unknown keys */
451 if(uprv_strcmp(key, "code_set_name")==0) {
452 if(*value!=0) {
374ca955 453 uprv_strcpy((char *)staticData->name, value);
b75a7d8f
A
454 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
455 }
b75a7d8f 456 } else if(uprv_strcmp(key, "subchar")==0) {
374ca955
A
457 uint8_t bytes[UCNV_EXT_MAX_BYTES];
458 int8_t length;
459
460 s=value;
461 length=ucm_parseBytes(bytes, line, (const char **)&s);
462 if(1<=length && length<=4 && *s==0) {
463 staticData->subCharLen=length;
464 uprv_memcpy(staticData->subChar, bytes, length);
b75a7d8f
A
465 } else {
466 fprintf(stderr, "error: illegal <subchar> %s\n", value);
467 *pErrorCode=U_INVALID_TABLE_FORMAT;
468 return;
469 }
470 } else if(uprv_strcmp(key, "subchar1")==0) {
374ca955 471 uint8_t bytes[UCNV_EXT_MAX_BYTES];
b75a7d8f 472
374ca955
A
473 s=value;
474 if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
475 staticData->subChar1=bytes[0];
b75a7d8f
A
476 } else {
477 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
478 *pErrorCode=U_INVALID_TABLE_FORMAT;
479 return;
480 }
374ca955
A
481 }
482 }
483
484 /* copy values from the UCMFile to the static data */
485 staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
486 staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
487 staticData->conversionType=data->ucm->states.conversionType;
488
489 if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
490 fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
491 *pErrorCode=U_INVALID_TABLE_FORMAT;
492 return;
493 }
494
495 /*
496 * Now that we know the type, copy any 'default' values from the table.
497 * We need not check the type any further because the parser only
498 * recognizes what we have prototypes for.
499 *
500 * For delta (extension-only) tables, copy values from the base file
501 * instead, see createConverter().
502 */
503 if(data->ucm->baseName[0]==0) {
504 prototype=ucnv_converterStaticData[staticData->conversionType];
505 if(prototype!=NULL) {
506 if(staticData->name[0]==0) {
507 uprv_strcpy((char *)staticData->name, prototype->name);
508 }
509
510 if(staticData->codepage==0) {
511 staticData->codepage=prototype->codepage;
512 }
513
514 if(staticData->platform==0) {
515 staticData->platform=prototype->platform;
516 }
517
518 if(staticData->minBytesPerChar==0) {
519 staticData->minBytesPerChar=prototype->minBytesPerChar;
b75a7d8f
A
520 }
521
522 if(staticData->maxBytesPerChar==0) {
374ca955 523 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
b75a7d8f 524 }
374ca955
A
525
526 if(staticData->subCharLen==0) {
527 staticData->subCharLen=prototype->subCharLen;
528 if(prototype->subCharLen>0) {
529 uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
b75a7d8f
A
530 }
531 }
b75a7d8f
A
532 }
533 }
534
374ca955
A
535 if(data->ucm->states.outputType<0) {
536 data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
537 }
538
539 if( staticData->subChar1!=0 &&
540 (staticData->minBytesPerChar>1 ||
541 (staticData->conversionType!=UCNV_MBCS &&
542 staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
b75a7d8f
A
543 ) {
544 fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
545 *pErrorCode=U_INVALID_TABLE_FORMAT;
546 }
547}
548
374ca955
A
549/* return TRUE if a base table was read, FALSE for an extension table */
550static UBool
551readFile(ConvData *data, const char* converterName,
552 UErrorCode *pErrorCode) {
4388f060 553 char line[1024];
374ca955
A
554 char *end;
555 FileStream *convFile;
b75a7d8f 556
374ca955
A
557 UCMStates *baseStates;
558 UBool dataIsBase;
b75a7d8f 559
374ca955
A
560 if(U_FAILURE(*pErrorCode)) {
561 return FALSE;
562 }
b75a7d8f 563
374ca955 564 data->ucm=ucm_open();
b75a7d8f 565
374ca955
A
566 convFile=T_FileStream_open(converterName, "r");
567 if(convFile==NULL) {
568 *pErrorCode=U_FILE_ACCESS_ERROR;
569 return FALSE;
570 }
b75a7d8f 571
2ca993e8 572 readHeader(data, convFile, pErrorCode);
374ca955
A
573 if(U_FAILURE(*pErrorCode)) {
574 return FALSE;
b75a7d8f
A
575 }
576
374ca955
A
577 if(data->ucm->baseName[0]==0) {
578 dataIsBase=TRUE;
579 baseStates=&data->ucm->states;
729e4ab9 580 ucm_processStates(baseStates, IGNORE_SISO_CHECK);
374ca955
A
581 } else {
582 dataIsBase=FALSE;
583 baseStates=NULL;
b75a7d8f 584 }
b75a7d8f 585
374ca955
A
586 /* read the base table */
587 ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
588 if(U_FAILURE(*pErrorCode)) {
589 return FALSE;
b75a7d8f
A
590 }
591
374ca955
A
592 /* read an extension table if there is one */
593 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
594 end=uprv_strchr(line, 0);
595 while(line<end &&
596 (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
597 --end;
598 }
599 *end=0;
600
601 if(line[0]=='#' || u_skipWhitespace(line)==end) {
602 continue; /* ignore empty and comment lines */
603 }
604
605 if(0==uprv_strcmp(line, "CHARMAP")) {
606 /* read the extension table */
607 ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
608 } else {
609 fprintf(stderr, "unexpected text after the base mapping table\n");
610 }
611 break;
b75a7d8f 612 }
374ca955
A
613
614 T_FileStream_close(convFile);
615
616 if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
b75a7d8f 617 fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
374ca955 618 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 619 }
374ca955
A
620
621 return dataIsBase;
b75a7d8f
A
622}
623
374ca955
A
624static void
625createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
626 ConvData baseData;
627 UBool dataIsBase;
b75a7d8f 628
374ca955
A
629 UConverterStaticData *staticData;
630 UCMStates *states, *baseStates;
b75a7d8f 631
374ca955
A
632 if(U_FAILURE(*pErrorCode)) {
633 return;
b75a7d8f
A
634 }
635
374ca955 636 initConvData(data);
b75a7d8f 637
374ca955
A
638 dataIsBase=readFile(data, converterName, pErrorCode);
639 if(U_FAILURE(*pErrorCode)) {
640 return;
b75a7d8f
A
641 }
642
374ca955
A
643 staticData=&data->staticData;
644 states=&data->ucm->states;
b75a7d8f 645
374ca955 646 if(dataIsBase) {
46f4442e
A
647 /*
648 * Build a normal .cnv file with a base table
649 * and an optional extension table.
650 */
374ca955
A
651 data->cnvData=MBCSOpen(data->ucm);
652 if(data->cnvData==NULL) {
653 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
b75a7d8f 654
374ca955
A
655 } else if(!data->cnvData->isValid(data->cnvData,
656 staticData->subChar, staticData->subCharLen)
657 ) {
658 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
659 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 660
374ca955
A
661 } else if(staticData->subChar1!=0 &&
662 !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
663 ) {
664 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
665 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 666
46f4442e
A
667 } else if(
668 data->ucm->ext->mappingsLength>0 &&
669 !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
670 ) {
671 *pErrorCode=U_INVALID_TABLE_FORMAT;
672 } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
673 /* sort the table so that it can be turned into UTF-8-friendly data */
674 ucm_sortTable(data->ucm->base);
675 }
b75a7d8f 676
46f4442e
A
677 if(U_SUCCESS(*pErrorCode)) {
678 if(
679 /* add the base table after ucm_checkBaseExt()! */
680 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
374ca955
A
681 ) {
682 *pErrorCode=U_INVALID_TABLE_FORMAT;
46f4442e
A
683 } else {
684 /*
685 * addTable() may have requested moving more mappings to the extension table
686 * if they fit into the base toUnicode table but not into the
687 * base fromUnicode table.
688 * (Especially for UTF-8-friendly fromUnicode tables.)
689 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
690 * to be excluded from the extension toUnicode data.
691 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
692 * the base fromUnicode table.
693 */
694 ucm_moveMappings(data->ucm->base, data->ucm->ext);
695 ucm_sortTable(data->ucm->ext);
696 if(data->ucm->ext->mappingsLength>0) {
697 /* prepare the extension table, if there is one */
698 data->extData=CnvExtOpen(data->ucm);
699 if(data->extData==NULL) {
700 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
701 } else if(
702 !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
703 ) {
704 *pErrorCode=U_INVALID_TABLE_FORMAT;
705 }
706 }
b75a7d8f
A
707 }
708 }
374ca955 709 } else {
46f4442e 710 /* Build an extension-only .cnv file. */
374ca955
A
711 char baseFilename[500];
712 char *basename;
713
714 initConvData(&baseData);
715
716 /* assemble a path/filename for data->ucm->baseName */
717 uprv_strcpy(baseFilename, converterName);
718 basename=(char *)findBasename(baseFilename);
719 uprv_strcpy(basename, data->ucm->baseName);
720 uprv_strcat(basename, ".ucm");
721
722 /* read the base table */
723 dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
724 if(U_FAILURE(*pErrorCode)) {
725 return;
726 } else if(!dataIsBase) {
727 fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
728 *pErrorCode=U_INVALID_TABLE_FORMAT;
729 } else {
730 /* prepare the extension table */
731 data->extData=CnvExtOpen(data->ucm);
732 if(data->extData==NULL) {
733 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
374ca955
A
734 } else {
735 /* fill in gaps in extension file header fields */
736 UCMapping *m, *mLimit;
737 uint8_t fallbackFlags;
738
739 baseStates=&baseData.ucm->states;
740 if(states->conversionType==UCNV_DBCS) {
741 staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
742 } else if(states->minCharLength==0) {
743 staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
744 }
745 if(states->maxCharLength<states->minCharLength) {
746 staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
747 }
748
749 if(staticData->subCharLen==0) {
750 uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
751 staticData->subCharLen=baseData.staticData.subCharLen;
752 }
753 /*
754 * do not copy subChar1 -
755 * only use what is explicitly specified
756 * because it cannot be unset in the extension file header
757 */
758
759 /* get the fallback flags */
760 fallbackFlags=0;
761 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
762 m<mLimit && fallbackFlags!=3;
763 ++m
b75a7d8f 764 ) {
374ca955
A
765 if(m->f==1) {
766 fallbackFlags|=1;
767 } else if(m->f==3) {
768 fallbackFlags|=2;
769 }
b75a7d8f 770 }
b75a7d8f 771
374ca955
A
772 if(fallbackFlags&1) {
773 staticData->hasFromUnicodeFallback=TRUE;
774 }
775 if(fallbackFlags&2) {
776 staticData->hasToUnicodeFallback=TRUE;
777 }
b75a7d8f 778
374ca955
A
779 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
780 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
781 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 782
729e4ab9 783 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
374ca955
A
784 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
785 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 786
374ca955
A
787 } else if(
788 !ucm_checkValidity(data->ucm->ext, baseStates) ||
46f4442e 789 !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
374ca955
A
790 ) {
791 *pErrorCode=U_INVALID_TABLE_FORMAT;
46f4442e
A
792 } else {
793 if(states->maxCharLength>1) {
794 /*
795 * When building a normal .cnv file with a base table
796 * for an MBCS (not SBCS) table with explicit precision flags,
797 * the MBCSAddTable() function marks some mappings for moving
798 * to the extension table.
799 * They fit into the base toUnicode table but not into the
800 * base fromUnicode table.
801 * (Note: We do have explicit precision flags because they are
802 * required for extension table generation, and
803 * ucm_checkBaseExt() verified it.)
804 *
805 * We do not call MBCSAddTable() here (we probably could)
806 * so we need to do the analysis before building the extension table.
807 * We assume that MBCSAddTable() will build a UTF-8-friendly table.
808 * Redundant mappings in the extension table are ok except they cost some size.
809 *
810 * Do this after ucm_checkBaseExt().
811 */
812 const MBCSData *mbcsData=MBCSGetDummy();
813 int32_t needsMove=0;
814 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
815 m<mLimit;
816 ++m
817 ) {
818 if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
819 m->f|=MBCS_FROM_U_EXT_FLAG;
820 m->moveFlag=UCM_MOVE_TO_EXT;
821 ++needsMove;
822 }
823 }
824
825 if(needsMove!=0) {
826 ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
827 ucm_sortTable(data->ucm->ext);
828 }
829 }
830 if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
831 *pErrorCode=U_INVALID_TABLE_FORMAT;
832 }
374ca955
A
833 }
834 }
835 }
836
837 cleanupConvData(&baseData);
838 }
b75a7d8f
A
839}
840
841/*
842 * Hey, Emacs, please set the following:
843 *
844 * Local Variables:
845 * indent-tabs-mode: nil
846 * End:
847 *
848 */