]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/makeconv/makeconv.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / tools / makeconv / makeconv.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4 ********************************************************************************
5 *
2ca993e8 6 * Copyright (C) 1998-2015, International Business Machines
b75a7d8f
A
7 * Corporation and others. All Rights Reserved.
8 *
9 ********************************************************************************
10 *
11 *
2ca993e8 12 * makeconv.cpp:
b75a7d8f
A
13 * tool creating a binary (compressed) representation of the conversion mapping
14 * table (IBM NLTC ucmap format).
15 *
16 * 05/04/2000 helena Added fallback mapping into the picture...
17 * 06/29/2000 helena Major rewrite of the callback APIs.
18 */
19
20#include <stdio.h>
21#include "unicode/putil.h"
b75a7d8f 22#include "unicode/ucnv_err.h"
2ca993e8 23#include "charstr.h"
b75a7d8f
A
24#include "ucnv_bld.h"
25#include "ucnv_imp.h"
26#include "ucnv_cnv.h"
27#include "cstring.h"
28#include "cmemory.h"
374ca955 29#include "uinvchar.h"
b75a7d8f
A
30#include "filestrm.h"
31#include "toolutil.h"
32#include "uoptions.h"
33#include "unicode/udata.h"
34#include "unewdata.h"
374ca955
A
35#include "uparse.h"
36#include "ucm.h"
b75a7d8f
A
37#include "makeconv.h"
38#include "genmbcs.h"
39
46f4442e 40#define DEBUG 0
73c04bcf 41
374ca955
A
42typedef struct ConvData {
43 UCMFile *ucm;
44 NewConverter *cnvData, *extData;
45 UConverterSharedData sharedData;
46 UConverterStaticData staticData;
47} ConvData;
48
49static void
50initConvData(ConvData *data) {
51 uprv_memset(data, 0, sizeof(ConvData));
52 data->sharedData.structSize=sizeof(UConverterSharedData);
53 data->staticData.structSize=sizeof(UConverterStaticData);
54 data->sharedData.staticData=&data->staticData;
55}
56
57static void
58cleanupConvData(ConvData *data) {
59 if(data!=NULL) {
60 if(data->cnvData!=NULL) {
61 data->cnvData->close(data->cnvData);
62 data->cnvData=NULL;
63 }
64 if(data->extData!=NULL) {
65 data->extData->close(data->extData);
66 data->extData=NULL;
67 }
68 ucm_close(data->ucm);
69 data->ucm=NULL;
70 }
71}
72
b75a7d8f
A
73/*
74 * from ucnvstat.c - static prototypes of data-based converters
75 */
2ca993e8 76U_CAPI const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
b75a7d8f
A
77
78/*
79 * Global - verbosity
80 */
81UBool VERBOSE = FALSE;
2ca993e8 82UBool QUIET = FALSE;
46f4442e 83UBool SMALL = FALSE;
729e4ab9 84UBool IGNORE_SISO_CHECK = FALSE;
b75a7d8f 85
374ca955
A
86static void
87createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
b75a7d8f
A
88
89/*
90 * Set up the UNewData and write the converter..
91 */
374ca955
A
92static void
93writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
b75a7d8f
A
94
95UBool haveCopyright=TRUE;
96
97static UDataInfo dataInfo={
98 sizeof(UDataInfo),
99 0,
100
101 U_IS_BIG_ENDIAN,
102 U_CHARSET_FAMILY,
103 sizeof(UChar),
104 0,
105
106 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */
107 {6, 2, 0, 0}, /* formatVersion */
108 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */
109};
110
374ca955
A
111static void
112writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
b75a7d8f
A
113{
114 UNewDataMemory *mem = NULL;
115 uint32_t sz2;
116 uint32_t size = 0;
374ca955 117 int32_t tableType;
b75a7d8f
A
118
119 if(U_FAILURE(*status))
120 {
121 return;
122 }
123
374ca955
A
124 tableType=TABLE_NONE;
125 if(data->cnvData!=NULL) {
126 tableType|=TABLE_BASE;
127 }
128 if(data->extData!=NULL) {
129 tableType|=TABLE_EXT;
130 }
131
b75a7d8f
A
132 mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
133
134 if(U_FAILURE(*status))
135 {
136 fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
137 cnvName,
138 "cnv",
139 u_errorName(*status));
140 return;
141 }
142
143 if(VERBOSE)
144 {
46f4442e 145 printf("- Opened udata %s.%s\n", cnvName, "cnv");
b75a7d8f
A
146 }
147
374ca955 148
b75a7d8f 149 /* all read only, clean, platform independent data. Mmmm. :) */
374ca955 150 udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
b75a7d8f
A
151 size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */
152 /* Now, write the table */
374ca955
A
153 if(tableType&TABLE_BASE) {
154 size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
155 }
156 if(tableType&TABLE_EXT) {
157 size += data->extData->write(data->extData, &data->staticData, mem, tableType);
158 }
b75a7d8f
A
159
160 sz2 = udata_finish(mem, status);
161 if(size != sz2)
162 {
374ca955 163 fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
b75a7d8f
A
164 *status=U_INTERNAL_PROGRAM_ERROR;
165 }
166 if(VERBOSE)
167 {
46f4442e 168 printf("- Wrote %u bytes to the udata.\n", (int)sz2);
b75a7d8f
A
169 }
170}
171
46f4442e
A
172enum {
173 OPT_HELP_H,
174 OPT_HELP_QUESTION_MARK,
175 OPT_COPYRIGHT,
176 OPT_VERSION,
177 OPT_DESTDIR,
178 OPT_VERBOSE,
179 OPT_SMALL,
729e4ab9 180 OPT_IGNORE_SISO_CHECK,
2ca993e8 181 OPT_QUIET,
3d1f044b 182 OPT_SOURCEDIR,
2ca993e8 183
46f4442e
A
184 OPT_COUNT
185};
186
b75a7d8f 187static UOption options[]={
46f4442e
A
188 UOPTION_HELP_H,
189 UOPTION_HELP_QUESTION_MARK,
190 UOPTION_COPYRIGHT,
191 UOPTION_VERSION,
192 UOPTION_DESTDIR,
193 UOPTION_VERBOSE,
729e4ab9 194 { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
2ca993e8
A
195 { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
196 UOPTION_QUIET,
3d1f044b 197 UOPTION_SOURCEDIR,
b75a7d8f
A
198};
199
200int main(int argc, char* argv[])
201{
374ca955 202 ConvData data;
b75a7d8f 203 char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
b75a7d8f
A
204
205 U_MAIN_INIT_ARGS(argc, argv);
206
207 /* Set up the ICU version number */
2ca993e8 208 UVersionInfo icuVersion;
b75a7d8f
A
209 u_getVersion(icuVersion);
210 uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
211
212 /* preset then read command line options */
46f4442e 213 options[OPT_DESTDIR].value=u_getDataDirectory();
b331163b 214 argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
b75a7d8f
A
215
216 /* error handling, printing usage message */
217 if(argc<0) {
218 fprintf(stderr,
219 "error in command line argument \"%s\"\n",
220 argv[-argc]);
221 } else if(argc<2) {
222 argc=-1;
223 }
46f4442e
A
224 if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
225 FILE *stdfile=argc<0 ? stderr : stdout;
226 fprintf(stdfile,
b75a7d8f
A
227 "usage: %s [-options] files...\n"
228 "\tread .ucm codepage mapping files and write .cnv files\n"
229 "options:\n"
230 "\t-h or -? or --help this usage text\n"
231 "\t-V or --version show a version message\n"
232 "\t-c or --copyright include a copyright notice\n"
233 "\t-d or --destdir destination directory, followed by the path\n"
2ca993e8 234 "\t-v or --verbose Turn on verbose output\n"
3d1f044b
A
235 "\t-q or --quiet do not display warnings and progress\n"
236 "\t-s or --sourcedir source directory, followed by the path\n",
b75a7d8f 237 argv[0]);
46f4442e
A
238 fprintf(stdfile,
239 "\t --small Generate smaller .cnv files. They will be\n"
240 "\t significantly smaller but may not be compatible with\n"
241 "\t older versions of ICU and will require heap memory\n"
729e4ab9
A
242 "\t allocation when loaded.\n"
243 "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n");
b75a7d8f
A
244 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
245 }
246
46f4442e 247 if(options[OPT_VERSION].doesOccur) {
51004dcb 248 printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
46f4442e
A
249 dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
250 printf("%s\n", U_COPYRIGHT_STRING);
b75a7d8f
A
251 exit(0);
252 }
253
b75a7d8f 254 /* get the options values */
46f4442e 255 haveCopyright = options[OPT_COPYRIGHT].doesOccur;
2ca993e8 256 const char *destdir = options[OPT_DESTDIR].value;
46f4442e 257 VERBOSE = options[OPT_VERBOSE].doesOccur;
2ca993e8 258 QUIET = options[OPT_QUIET].doesOccur;
46f4442e 259 SMALL = options[OPT_SMALL].doesOccur;
b75a7d8f 260
729e4ab9
A
261 if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
262 IGNORE_SISO_CHECK = TRUE;
263 }
264
2ca993e8
A
265 icu::CharString outFileName;
266 UErrorCode err = U_ZERO_ERROR;
b75a7d8f 267 if (destdir != NULL && *destdir != 0) {
2ca993e8
A
268 outFileName.append(destdir, err).ensureEndsWithFileSeparator(err);
269 if (U_FAILURE(err)) {
270 return err;
b75a7d8f 271 }
b75a7d8f 272 }
2ca993e8 273 int32_t outBasenameStart = outFileName.length();
b75a7d8f
A
274
275#if DEBUG
276 {
277 int i;
278 printf("makeconv: processing %d files...\n", argc - 1);
279 for(i=1; i<argc; ++i) {
280 printf("%s ", argv[i]);
281 }
282 printf("\n");
283 fflush(stdout);
284 }
285#endif
286
2ca993e8 287 UBool printFilename = (UBool) (argc > 2 || VERBOSE);
3d1f044b 288 icu::CharString pathBuf;
374ca955 289 for (++argv; --argc; ++argv)
b75a7d8f 290 {
2ca993e8
A
291 UErrorCode localError = U_ZERO_ERROR;
292 const char *arg = getLongPathname(*argv);
46f4442e 293
3d1f044b
A
294 const char* sourcedir = options[OPT_SOURCEDIR].value;
295 if (sourcedir != NULL && *sourcedir != 0 && uprv_strcmp(sourcedir, ".") != 0) {
296 pathBuf.clear();
297 pathBuf.appendPathPart(sourcedir, localError);
298 pathBuf.appendPathPart(arg, localError);
299 arg = pathBuf.data();
300 }
301
374ca955 302 /*produces the right destination path for display*/
2ca993e8
A
303 outFileName.truncate(outBasenameStart);
304 if (outBasenameStart != 0)
b75a7d8f 305 {
374ca955 306 /* find the last file sepator */
2ca993e8
A
307 const char *basename = findBasename(arg);
308 outFileName.append(basename, localError);
b75a7d8f 309 }
374ca955 310 else
b75a7d8f 311 {
2ca993e8
A
312 outFileName.append(arg, localError);
313 }
314 if (U_FAILURE(localError)) {
315 return localError;
b75a7d8f
A
316 }
317
374ca955 318 /*removes the extension if any is found*/
2ca993e8
A
319 int32_t lastDotIndex = outFileName.lastIndexOf('.');
320 if (lastDotIndex >= outBasenameStart) {
321 outFileName.truncate(lastDotIndex);
b75a7d8f
A
322 }
323
374ca955 324 /* the basename without extension is the converter name */
2ca993e8
A
325 if ((outFileName.length() - outBasenameStart) >= UPRV_LENGTHOF(cnvName)) {
326 fprintf(stderr, "converter name %s too long\n", outFileName.data() + outBasenameStart);
327 return U_BUFFER_OVERFLOW_ERROR;
328 }
329 uprv_strcpy(cnvName, outFileName.data() + outBasenameStart);
b75a7d8f 330
374ca955 331 /*Adds the target extension*/
2ca993e8
A
332 outFileName.append(CONVERTER_FILE_EXTENSION, localError);
333 if (U_FAILURE(localError)) {
334 return localError;
335 }
b75a7d8f
A
336
337#if DEBUG
338 printf("makeconv: processing %s ...\n", arg);
339 fflush(stdout);
340#endif
374ca955
A
341 initConvData(&data);
342 createConverter(&data, arg, &localError);
b75a7d8f 343
374ca955 344 if (U_FAILURE(localError))
b75a7d8f 345 {
374ca955 346 /* if an error is found, print out an error msg and keep going */
2ca993e8
A
347 fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n",
348 outFileName.data(), arg, u_errorName(localError));
374ca955
A
349 if(U_SUCCESS(err)) {
350 err = localError;
351 }
b75a7d8f 352 }
374ca955 353 else
b75a7d8f 354 {
46f4442e
A
355 /* Insure the static data name matches the file name */
356 /* Changed to ignore directory and only compare base name
357 LDH 1/2/08*/
358 char *p;
359 p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
360
361 if(p == NULL) /* OK, try alternate */
362 {
363 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
364 if(p == NULL)
365 {
366 p=cnvName; /* If no separators, no problem */
367 }
368 }
369 else
370 {
2ca993e8 371 p++; /* If found separator, don't include it in compare */
46f4442e 372 }
2ca993e8 373 if(uprv_stricmp(p,data.staticData.name) && !QUIET)
374ca955
A
374 {
375 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
46f4442e 376 cnvName, CONVERTER_FILE_EXTENSION,
374ca955
A
377 data.staticData.name);
378 }
379
380 uprv_strcpy((char*)data.staticData.name, cnvName);
381
382 if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
383 fprintf(stderr,
384 "Error: A converter name must contain only invariant characters.\n"
385 "%s is not a valid converter name.\n",
386 data.staticData.name);
387 if(U_SUCCESS(err)) {
388 err = U_INVALID_TABLE_FORMAT;
389 }
390 }
391
374ca955 392 localError = U_ZERO_ERROR;
2ca993e8 393 writeConverterData(&data, cnvName, destdir, &localError);
374ca955
A
394
395 if(U_FAILURE(localError))
396 {
397 /* if an error is found, print out an error msg and keep going*/
2ca993e8 398 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName.data(), arg,
374ca955
A
399 u_errorName(localError));
400 if(U_SUCCESS(err)) {
401 err = localError;
402 }
403 }
404 else if (printFilename)
405 {
2ca993e8 406 puts(outFileName.data() + outBasenameStart);
374ca955 407 }
b75a7d8f 408 }
374ca955
A
409 fflush(stdout);
410 fflush(stderr);
411
412 cleanupConvData(&data);
b75a7d8f
A
413 }
414
374ca955 415 return err;
b75a7d8f
A
416}
417
418static void
419getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
420 if( (name[0]=='i' || name[0]=='I') &&
421 (name[1]=='b' || name[1]=='B') &&
422 (name[2]=='m' || name[2]=='M')
423 ) {
424 name+=3;
425 if(*name=='-') {
426 ++name;
427 }
428 *pPlatform=UCNV_IBM;
429 *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
430 } else {
431 *pPlatform=UCNV_UNKNOWN;
432 *pCCSID=0;
433 }
434}
435
374ca955
A
436static void
437readHeader(ConvData *data,
438 FileStream* convFile,
374ca955 439 UErrorCode *pErrorCode) {
4388f060 440 char line[1024];
374ca955
A
441 char *s, *key, *value;
442 const UConverterStaticData *prototype;
b75a7d8f 443 UConverterStaticData *staticData;
b75a7d8f
A
444
445 if(U_FAILURE(*pErrorCode)) {
446 return;
447 }
448
374ca955 449 staticData=&data->staticData;
b75a7d8f
A
450 staticData->platform=UCNV_IBM;
451 staticData->subCharLen=0;
452
453 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
374ca955
A
454 /* basic parsing and handling of state-related items */
455 if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
b75a7d8f
A
456 continue;
457 }
458
459 /* stop at the beginning of the mapping section */
374ca955 460 if(uprv_strcmp(line, "CHARMAP")==0) {
b75a7d8f
A
461 break;
462 }
463
b75a7d8f
A
464 /* collect the information from the header field, ignore unknown keys */
465 if(uprv_strcmp(key, "code_set_name")==0) {
466 if(*value!=0) {
374ca955 467 uprv_strcpy((char *)staticData->name, value);
b75a7d8f
A
468 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
469 }
b75a7d8f 470 } else if(uprv_strcmp(key, "subchar")==0) {
374ca955
A
471 uint8_t bytes[UCNV_EXT_MAX_BYTES];
472 int8_t length;
473
474 s=value;
475 length=ucm_parseBytes(bytes, line, (const char **)&s);
476 if(1<=length && length<=4 && *s==0) {
477 staticData->subCharLen=length;
478 uprv_memcpy(staticData->subChar, bytes, length);
b75a7d8f
A
479 } else {
480 fprintf(stderr, "error: illegal <subchar> %s\n", value);
481 *pErrorCode=U_INVALID_TABLE_FORMAT;
482 return;
483 }
484 } else if(uprv_strcmp(key, "subchar1")==0) {
374ca955 485 uint8_t bytes[UCNV_EXT_MAX_BYTES];
b75a7d8f 486
374ca955
A
487 s=value;
488 if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
489 staticData->subChar1=bytes[0];
b75a7d8f
A
490 } else {
491 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
492 *pErrorCode=U_INVALID_TABLE_FORMAT;
493 return;
494 }
374ca955
A
495 }
496 }
497
498 /* copy values from the UCMFile to the static data */
499 staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
500 staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
501 staticData->conversionType=data->ucm->states.conversionType;
502
503 if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
504 fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
505 *pErrorCode=U_INVALID_TABLE_FORMAT;
506 return;
507 }
508
509 /*
510 * Now that we know the type, copy any 'default' values from the table.
511 * We need not check the type any further because the parser only
512 * recognizes what we have prototypes for.
513 *
514 * For delta (extension-only) tables, copy values from the base file
515 * instead, see createConverter().
516 */
517 if(data->ucm->baseName[0]==0) {
518 prototype=ucnv_converterStaticData[staticData->conversionType];
519 if(prototype!=NULL) {
520 if(staticData->name[0]==0) {
521 uprv_strcpy((char *)staticData->name, prototype->name);
522 }
523
524 if(staticData->codepage==0) {
525 staticData->codepage=prototype->codepage;
526 }
527
528 if(staticData->platform==0) {
529 staticData->platform=prototype->platform;
530 }
531
532 if(staticData->minBytesPerChar==0) {
533 staticData->minBytesPerChar=prototype->minBytesPerChar;
b75a7d8f
A
534 }
535
536 if(staticData->maxBytesPerChar==0) {
374ca955 537 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
b75a7d8f 538 }
374ca955
A
539
540 if(staticData->subCharLen==0) {
541 staticData->subCharLen=prototype->subCharLen;
542 if(prototype->subCharLen>0) {
543 uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
b75a7d8f
A
544 }
545 }
b75a7d8f
A
546 }
547 }
548
374ca955
A
549 if(data->ucm->states.outputType<0) {
550 data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
551 }
552
553 if( staticData->subChar1!=0 &&
554 (staticData->minBytesPerChar>1 ||
555 (staticData->conversionType!=UCNV_MBCS &&
556 staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
b75a7d8f
A
557 ) {
558 fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
559 *pErrorCode=U_INVALID_TABLE_FORMAT;
560 }
561}
562
374ca955
A
563/* return TRUE if a base table was read, FALSE for an extension table */
564static UBool
565readFile(ConvData *data, const char* converterName,
566 UErrorCode *pErrorCode) {
4388f060 567 char line[1024];
374ca955
A
568 char *end;
569 FileStream *convFile;
b75a7d8f 570
374ca955
A
571 UCMStates *baseStates;
572 UBool dataIsBase;
b75a7d8f 573
374ca955
A
574 if(U_FAILURE(*pErrorCode)) {
575 return FALSE;
576 }
b75a7d8f 577
374ca955 578 data->ucm=ucm_open();
b75a7d8f 579
374ca955
A
580 convFile=T_FileStream_open(converterName, "r");
581 if(convFile==NULL) {
582 *pErrorCode=U_FILE_ACCESS_ERROR;
583 return FALSE;
584 }
b75a7d8f 585
2ca993e8 586 readHeader(data, convFile, pErrorCode);
374ca955
A
587 if(U_FAILURE(*pErrorCode)) {
588 return FALSE;
b75a7d8f
A
589 }
590
374ca955
A
591 if(data->ucm->baseName[0]==0) {
592 dataIsBase=TRUE;
593 baseStates=&data->ucm->states;
729e4ab9 594 ucm_processStates(baseStates, IGNORE_SISO_CHECK);
374ca955
A
595 } else {
596 dataIsBase=FALSE;
597 baseStates=NULL;
b75a7d8f 598 }
b75a7d8f 599
374ca955
A
600 /* read the base table */
601 ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
602 if(U_FAILURE(*pErrorCode)) {
603 return FALSE;
b75a7d8f
A
604 }
605
374ca955
A
606 /* read an extension table if there is one */
607 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
608 end=uprv_strchr(line, 0);
609 while(line<end &&
610 (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
611 --end;
612 }
613 *end=0;
614
615 if(line[0]=='#' || u_skipWhitespace(line)==end) {
616 continue; /* ignore empty and comment lines */
617 }
618
619 if(0==uprv_strcmp(line, "CHARMAP")) {
620 /* read the extension table */
621 ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
622 } else {
623 fprintf(stderr, "unexpected text after the base mapping table\n");
624 }
625 break;
b75a7d8f 626 }
374ca955
A
627
628 T_FileStream_close(convFile);
629
630 if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
b75a7d8f 631 fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
374ca955 632 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 633 }
374ca955
A
634
635 return dataIsBase;
b75a7d8f
A
636}
637
374ca955
A
638static void
639createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
640 ConvData baseData;
641 UBool dataIsBase;
b75a7d8f 642
374ca955
A
643 UConverterStaticData *staticData;
644 UCMStates *states, *baseStates;
b75a7d8f 645
374ca955
A
646 if(U_FAILURE(*pErrorCode)) {
647 return;
b75a7d8f
A
648 }
649
374ca955 650 initConvData(data);
b75a7d8f 651
374ca955
A
652 dataIsBase=readFile(data, converterName, pErrorCode);
653 if(U_FAILURE(*pErrorCode)) {
654 return;
b75a7d8f
A
655 }
656
374ca955
A
657 staticData=&data->staticData;
658 states=&data->ucm->states;
b75a7d8f 659
374ca955 660 if(dataIsBase) {
46f4442e
A
661 /*
662 * Build a normal .cnv file with a base table
663 * and an optional extension table.
664 */
374ca955
A
665 data->cnvData=MBCSOpen(data->ucm);
666 if(data->cnvData==NULL) {
667 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
b75a7d8f 668
374ca955
A
669 } else if(!data->cnvData->isValid(data->cnvData,
670 staticData->subChar, staticData->subCharLen)
671 ) {
672 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
673 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 674
374ca955
A
675 } else if(staticData->subChar1!=0 &&
676 !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
677 ) {
678 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
679 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 680
46f4442e
A
681 } else if(
682 data->ucm->ext->mappingsLength>0 &&
683 !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
684 ) {
685 *pErrorCode=U_INVALID_TABLE_FORMAT;
686 } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
687 /* sort the table so that it can be turned into UTF-8-friendly data */
688 ucm_sortTable(data->ucm->base);
689 }
b75a7d8f 690
46f4442e
A
691 if(U_SUCCESS(*pErrorCode)) {
692 if(
693 /* add the base table after ucm_checkBaseExt()! */
694 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
374ca955
A
695 ) {
696 *pErrorCode=U_INVALID_TABLE_FORMAT;
46f4442e
A
697 } else {
698 /*
699 * addTable() may have requested moving more mappings to the extension table
700 * if they fit into the base toUnicode table but not into the
701 * base fromUnicode table.
702 * (Especially for UTF-8-friendly fromUnicode tables.)
703 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
704 * to be excluded from the extension toUnicode data.
705 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
706 * the base fromUnicode table.
707 */
708 ucm_moveMappings(data->ucm->base, data->ucm->ext);
709 ucm_sortTable(data->ucm->ext);
710 if(data->ucm->ext->mappingsLength>0) {
711 /* prepare the extension table, if there is one */
712 data->extData=CnvExtOpen(data->ucm);
713 if(data->extData==NULL) {
714 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
715 } else if(
716 !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
717 ) {
718 *pErrorCode=U_INVALID_TABLE_FORMAT;
719 }
720 }
b75a7d8f
A
721 }
722 }
374ca955 723 } else {
46f4442e 724 /* Build an extension-only .cnv file. */
374ca955
A
725 char baseFilename[500];
726 char *basename;
727
728 initConvData(&baseData);
729
730 /* assemble a path/filename for data->ucm->baseName */
731 uprv_strcpy(baseFilename, converterName);
732 basename=(char *)findBasename(baseFilename);
733 uprv_strcpy(basename, data->ucm->baseName);
734 uprv_strcat(basename, ".ucm");
735
736 /* read the base table */
737 dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
738 if(U_FAILURE(*pErrorCode)) {
739 return;
740 } else if(!dataIsBase) {
741 fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
742 *pErrorCode=U_INVALID_TABLE_FORMAT;
743 } else {
744 /* prepare the extension table */
745 data->extData=CnvExtOpen(data->ucm);
746 if(data->extData==NULL) {
747 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
374ca955
A
748 } else {
749 /* fill in gaps in extension file header fields */
750 UCMapping *m, *mLimit;
751 uint8_t fallbackFlags;
752
753 baseStates=&baseData.ucm->states;
754 if(states->conversionType==UCNV_DBCS) {
755 staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
756 } else if(states->minCharLength==0) {
757 staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
758 }
759 if(states->maxCharLength<states->minCharLength) {
760 staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
761 }
762
763 if(staticData->subCharLen==0) {
764 uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
765 staticData->subCharLen=baseData.staticData.subCharLen;
766 }
767 /*
768 * do not copy subChar1 -
769 * only use what is explicitly specified
770 * because it cannot be unset in the extension file header
771 */
772
773 /* get the fallback flags */
774 fallbackFlags=0;
775 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
776 m<mLimit && fallbackFlags!=3;
777 ++m
b75a7d8f 778 ) {
374ca955
A
779 if(m->f==1) {
780 fallbackFlags|=1;
781 } else if(m->f==3) {
782 fallbackFlags|=2;
783 }
b75a7d8f 784 }
b75a7d8f 785
374ca955
A
786 if(fallbackFlags&1) {
787 staticData->hasFromUnicodeFallback=TRUE;
788 }
789 if(fallbackFlags&2) {
790 staticData->hasToUnicodeFallback=TRUE;
791 }
b75a7d8f 792
374ca955
A
793 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
794 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
795 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 796
729e4ab9 797 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
374ca955
A
798 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
799 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 800
374ca955
A
801 } else if(
802 !ucm_checkValidity(data->ucm->ext, baseStates) ||
46f4442e 803 !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
374ca955
A
804 ) {
805 *pErrorCode=U_INVALID_TABLE_FORMAT;
46f4442e
A
806 } else {
807 if(states->maxCharLength>1) {
808 /*
809 * When building a normal .cnv file with a base table
810 * for an MBCS (not SBCS) table with explicit precision flags,
811 * the MBCSAddTable() function marks some mappings for moving
812 * to the extension table.
813 * They fit into the base toUnicode table but not into the
814 * base fromUnicode table.
815 * (Note: We do have explicit precision flags because they are
816 * required for extension table generation, and
817 * ucm_checkBaseExt() verified it.)
818 *
819 * We do not call MBCSAddTable() here (we probably could)
820 * so we need to do the analysis before building the extension table.
821 * We assume that MBCSAddTable() will build a UTF-8-friendly table.
822 * Redundant mappings in the extension table are ok except they cost some size.
823 *
824 * Do this after ucm_checkBaseExt().
825 */
826 const MBCSData *mbcsData=MBCSGetDummy();
827 int32_t needsMove=0;
828 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
829 m<mLimit;
830 ++m
831 ) {
832 if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
833 m->f|=MBCS_FROM_U_EXT_FLAG;
834 m->moveFlag=UCM_MOVE_TO_EXT;
835 ++needsMove;
836 }
837 }
838
839 if(needsMove!=0) {
840 ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
841 ucm_sortTable(data->ucm->ext);
842 }
843 }
844 if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
845 *pErrorCode=U_INVALID_TABLE_FORMAT;
846 }
374ca955
A
847 }
848 }
849 }
850
851 cleanupConvData(&baseData);
852 }
b75a7d8f
A
853}
854
855/*
856 * Hey, Emacs, please set the following:
857 *
858 * Local Variables:
859 * indent-tabs-mode: nil
860 * End:
861 *
862 */