]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/makeconv/makeconv.cpp
ICU-62141.0.1.tar.gz
[apple/icu.git] / icuSources / tools / makeconv / makeconv.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4 ********************************************************************************
5 *
2ca993e8 6 * Copyright (C) 1998-2015, International Business Machines
b75a7d8f
A
7 * Corporation and others. All Rights Reserved.
8 *
9 ********************************************************************************
10 *
11 *
2ca993e8 12 * makeconv.cpp:
b75a7d8f
A
13 * tool creating a binary (compressed) representation of the conversion mapping
14 * table (IBM NLTC ucmap format).
15 *
16 * 05/04/2000 helena Added fallback mapping into the picture...
17 * 06/29/2000 helena Major rewrite of the callback APIs.
18 */
19
20#include <stdio.h>
21#include "unicode/putil.h"
b75a7d8f 22#include "unicode/ucnv_err.h"
2ca993e8 23#include "charstr.h"
b75a7d8f
A
24#include "ucnv_bld.h"
25#include "ucnv_imp.h"
26#include "ucnv_cnv.h"
27#include "cstring.h"
28#include "cmemory.h"
374ca955 29#include "uinvchar.h"
b75a7d8f
A
30#include "filestrm.h"
31#include "toolutil.h"
32#include "uoptions.h"
33#include "unicode/udata.h"
34#include "unewdata.h"
374ca955
A
35#include "uparse.h"
36#include "ucm.h"
b75a7d8f
A
37#include "makeconv.h"
38#include "genmbcs.h"
39
46f4442e 40#define DEBUG 0
73c04bcf 41
374ca955
A
42typedef struct ConvData {
43 UCMFile *ucm;
44 NewConverter *cnvData, *extData;
45 UConverterSharedData sharedData;
46 UConverterStaticData staticData;
47} ConvData;
48
49static void
50initConvData(ConvData *data) {
51 uprv_memset(data, 0, sizeof(ConvData));
52 data->sharedData.structSize=sizeof(UConverterSharedData);
53 data->staticData.structSize=sizeof(UConverterStaticData);
54 data->sharedData.staticData=&data->staticData;
55}
56
57static void
58cleanupConvData(ConvData *data) {
59 if(data!=NULL) {
60 if(data->cnvData!=NULL) {
61 data->cnvData->close(data->cnvData);
62 data->cnvData=NULL;
63 }
64 if(data->extData!=NULL) {
65 data->extData->close(data->extData);
66 data->extData=NULL;
67 }
68 ucm_close(data->ucm);
69 data->ucm=NULL;
70 }
71}
72
b75a7d8f
A
73/*
74 * from ucnvstat.c - static prototypes of data-based converters
75 */
2ca993e8 76U_CAPI const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
b75a7d8f
A
77
78/*
79 * Global - verbosity
80 */
81UBool VERBOSE = FALSE;
2ca993e8 82UBool QUIET = FALSE;
46f4442e 83UBool SMALL = FALSE;
729e4ab9 84UBool IGNORE_SISO_CHECK = FALSE;
b75a7d8f 85
374ca955
A
86static void
87createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
b75a7d8f
A
88
89/*
90 * Set up the UNewData and write the converter..
91 */
374ca955
A
92static void
93writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
b75a7d8f
A
94
95UBool haveCopyright=TRUE;
96
97static UDataInfo dataInfo={
98 sizeof(UDataInfo),
99 0,
100
101 U_IS_BIG_ENDIAN,
102 U_CHARSET_FAMILY,
103 sizeof(UChar),
104 0,
105
106 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */
107 {6, 2, 0, 0}, /* formatVersion */
108 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */
109};
110
374ca955
A
111static void
112writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
b75a7d8f
A
113{
114 UNewDataMemory *mem = NULL;
115 uint32_t sz2;
116 uint32_t size = 0;
374ca955 117 int32_t tableType;
b75a7d8f
A
118
119 if(U_FAILURE(*status))
120 {
121 return;
122 }
123
374ca955
A
124 tableType=TABLE_NONE;
125 if(data->cnvData!=NULL) {
126 tableType|=TABLE_BASE;
127 }
128 if(data->extData!=NULL) {
129 tableType|=TABLE_EXT;
130 }
131
b75a7d8f
A
132 mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
133
134 if(U_FAILURE(*status))
135 {
136 fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
137 cnvName,
138 "cnv",
139 u_errorName(*status));
140 return;
141 }
142
143 if(VERBOSE)
144 {
46f4442e 145 printf("- Opened udata %s.%s\n", cnvName, "cnv");
b75a7d8f
A
146 }
147
374ca955 148
b75a7d8f 149 /* all read only, clean, platform independent data. Mmmm. :) */
374ca955 150 udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
b75a7d8f
A
151 size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */
152 /* Now, write the table */
374ca955
A
153 if(tableType&TABLE_BASE) {
154 size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
155 }
156 if(tableType&TABLE_EXT) {
157 size += data->extData->write(data->extData, &data->staticData, mem, tableType);
158 }
b75a7d8f
A
159
160 sz2 = udata_finish(mem, status);
161 if(size != sz2)
162 {
374ca955 163 fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
b75a7d8f
A
164 *status=U_INTERNAL_PROGRAM_ERROR;
165 }
166 if(VERBOSE)
167 {
46f4442e 168 printf("- Wrote %u bytes to the udata.\n", (int)sz2);
b75a7d8f
A
169 }
170}
171
46f4442e
A
172enum {
173 OPT_HELP_H,
174 OPT_HELP_QUESTION_MARK,
175 OPT_COPYRIGHT,
176 OPT_VERSION,
177 OPT_DESTDIR,
178 OPT_VERBOSE,
179 OPT_SMALL,
729e4ab9 180 OPT_IGNORE_SISO_CHECK,
2ca993e8
A
181 OPT_QUIET,
182
46f4442e
A
183 OPT_COUNT
184};
185
b75a7d8f 186static UOption options[]={
46f4442e
A
187 UOPTION_HELP_H,
188 UOPTION_HELP_QUESTION_MARK,
189 UOPTION_COPYRIGHT,
190 UOPTION_VERSION,
191 UOPTION_DESTDIR,
192 UOPTION_VERBOSE,
729e4ab9 193 { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
2ca993e8
A
194 { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
195 UOPTION_QUIET,
b75a7d8f
A
196};
197
198int main(int argc, char* argv[])
199{
374ca955 200 ConvData data;
b75a7d8f 201 char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
b75a7d8f
A
202
203 U_MAIN_INIT_ARGS(argc, argv);
204
205 /* Set up the ICU version number */
2ca993e8 206 UVersionInfo icuVersion;
b75a7d8f
A
207 u_getVersion(icuVersion);
208 uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
209
210 /* preset then read command line options */
46f4442e 211 options[OPT_DESTDIR].value=u_getDataDirectory();
b331163b 212 argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
b75a7d8f
A
213
214 /* error handling, printing usage message */
215 if(argc<0) {
216 fprintf(stderr,
217 "error in command line argument \"%s\"\n",
218 argv[-argc]);
219 } else if(argc<2) {
220 argc=-1;
221 }
46f4442e
A
222 if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
223 FILE *stdfile=argc<0 ? stderr : stdout;
224 fprintf(stdfile,
b75a7d8f
A
225 "usage: %s [-options] files...\n"
226 "\tread .ucm codepage mapping files and write .cnv files\n"
227 "options:\n"
228 "\t-h or -? or --help this usage text\n"
229 "\t-V or --version show a version message\n"
230 "\t-c or --copyright include a copyright notice\n"
231 "\t-d or --destdir destination directory, followed by the path\n"
2ca993e8
A
232 "\t-v or --verbose Turn on verbose output\n"
233 "\t-q or --quiet do not display warnings and progress\n",
b75a7d8f 234 argv[0]);
46f4442e
A
235 fprintf(stdfile,
236 "\t --small Generate smaller .cnv files. They will be\n"
237 "\t significantly smaller but may not be compatible with\n"
238 "\t older versions of ICU and will require heap memory\n"
729e4ab9
A
239 "\t allocation when loaded.\n"
240 "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n");
b75a7d8f
A
241 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
242 }
243
46f4442e 244 if(options[OPT_VERSION].doesOccur) {
51004dcb 245 printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
46f4442e
A
246 dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
247 printf("%s\n", U_COPYRIGHT_STRING);
b75a7d8f
A
248 exit(0);
249 }
250
b75a7d8f 251 /* get the options values */
46f4442e 252 haveCopyright = options[OPT_COPYRIGHT].doesOccur;
2ca993e8 253 const char *destdir = options[OPT_DESTDIR].value;
46f4442e 254 VERBOSE = options[OPT_VERBOSE].doesOccur;
2ca993e8 255 QUIET = options[OPT_QUIET].doesOccur;
46f4442e 256 SMALL = options[OPT_SMALL].doesOccur;
b75a7d8f 257
729e4ab9
A
258 if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
259 IGNORE_SISO_CHECK = TRUE;
260 }
261
2ca993e8
A
262 icu::CharString outFileName;
263 UErrorCode err = U_ZERO_ERROR;
b75a7d8f 264 if (destdir != NULL && *destdir != 0) {
2ca993e8
A
265 outFileName.append(destdir, err).ensureEndsWithFileSeparator(err);
266 if (U_FAILURE(err)) {
267 return err;
b75a7d8f 268 }
b75a7d8f 269 }
2ca993e8 270 int32_t outBasenameStart = outFileName.length();
b75a7d8f
A
271
272#if DEBUG
273 {
274 int i;
275 printf("makeconv: processing %d files...\n", argc - 1);
276 for(i=1; i<argc; ++i) {
277 printf("%s ", argv[i]);
278 }
279 printf("\n");
280 fflush(stdout);
281 }
282#endif
283
2ca993e8 284 UBool printFilename = (UBool) (argc > 2 || VERBOSE);
374ca955 285 for (++argv; --argc; ++argv)
b75a7d8f 286 {
2ca993e8
A
287 UErrorCode localError = U_ZERO_ERROR;
288 const char *arg = getLongPathname(*argv);
46f4442e 289
374ca955 290 /*produces the right destination path for display*/
2ca993e8
A
291 outFileName.truncate(outBasenameStart);
292 if (outBasenameStart != 0)
b75a7d8f 293 {
374ca955 294 /* find the last file sepator */
2ca993e8
A
295 const char *basename = findBasename(arg);
296 outFileName.append(basename, localError);
b75a7d8f 297 }
374ca955 298 else
b75a7d8f 299 {
2ca993e8
A
300 outFileName.append(arg, localError);
301 }
302 if (U_FAILURE(localError)) {
303 return localError;
b75a7d8f
A
304 }
305
374ca955 306 /*removes the extension if any is found*/
2ca993e8
A
307 int32_t lastDotIndex = outFileName.lastIndexOf('.');
308 if (lastDotIndex >= outBasenameStart) {
309 outFileName.truncate(lastDotIndex);
b75a7d8f
A
310 }
311
374ca955 312 /* the basename without extension is the converter name */
2ca993e8
A
313 if ((outFileName.length() - outBasenameStart) >= UPRV_LENGTHOF(cnvName)) {
314 fprintf(stderr, "converter name %s too long\n", outFileName.data() + outBasenameStart);
315 return U_BUFFER_OVERFLOW_ERROR;
316 }
317 uprv_strcpy(cnvName, outFileName.data() + outBasenameStart);
b75a7d8f 318
374ca955 319 /*Adds the target extension*/
2ca993e8
A
320 outFileName.append(CONVERTER_FILE_EXTENSION, localError);
321 if (U_FAILURE(localError)) {
322 return localError;
323 }
b75a7d8f
A
324
325#if DEBUG
326 printf("makeconv: processing %s ...\n", arg);
327 fflush(stdout);
328#endif
374ca955
A
329 initConvData(&data);
330 createConverter(&data, arg, &localError);
b75a7d8f 331
374ca955 332 if (U_FAILURE(localError))
b75a7d8f 333 {
374ca955 334 /* if an error is found, print out an error msg and keep going */
2ca993e8
A
335 fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n",
336 outFileName.data(), arg, u_errorName(localError));
374ca955
A
337 if(U_SUCCESS(err)) {
338 err = localError;
339 }
b75a7d8f 340 }
374ca955 341 else
b75a7d8f 342 {
46f4442e
A
343 /* Insure the static data name matches the file name */
344 /* Changed to ignore directory and only compare base name
345 LDH 1/2/08*/
346 char *p;
347 p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
348
349 if(p == NULL) /* OK, try alternate */
350 {
351 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
352 if(p == NULL)
353 {
354 p=cnvName; /* If no separators, no problem */
355 }
356 }
357 else
358 {
2ca993e8 359 p++; /* If found separator, don't include it in compare */
46f4442e 360 }
2ca993e8 361 if(uprv_stricmp(p,data.staticData.name) && !QUIET)
374ca955
A
362 {
363 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
46f4442e 364 cnvName, CONVERTER_FILE_EXTENSION,
374ca955
A
365 data.staticData.name);
366 }
367
368 uprv_strcpy((char*)data.staticData.name, cnvName);
369
370 if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
371 fprintf(stderr,
372 "Error: A converter name must contain only invariant characters.\n"
373 "%s is not a valid converter name.\n",
374 data.staticData.name);
375 if(U_SUCCESS(err)) {
376 err = U_INVALID_TABLE_FORMAT;
377 }
378 }
379
374ca955 380 localError = U_ZERO_ERROR;
2ca993e8 381 writeConverterData(&data, cnvName, destdir, &localError);
374ca955
A
382
383 if(U_FAILURE(localError))
384 {
385 /* if an error is found, print out an error msg and keep going*/
2ca993e8 386 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName.data(), arg,
374ca955
A
387 u_errorName(localError));
388 if(U_SUCCESS(err)) {
389 err = localError;
390 }
391 }
392 else if (printFilename)
393 {
2ca993e8 394 puts(outFileName.data() + outBasenameStart);
374ca955 395 }
b75a7d8f 396 }
374ca955
A
397 fflush(stdout);
398 fflush(stderr);
399
400 cleanupConvData(&data);
b75a7d8f
A
401 }
402
374ca955 403 return err;
b75a7d8f
A
404}
405
406static void
407getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
408 if( (name[0]=='i' || name[0]=='I') &&
409 (name[1]=='b' || name[1]=='B') &&
410 (name[2]=='m' || name[2]=='M')
411 ) {
412 name+=3;
413 if(*name=='-') {
414 ++name;
415 }
416 *pPlatform=UCNV_IBM;
417 *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
418 } else {
419 *pPlatform=UCNV_UNKNOWN;
420 *pCCSID=0;
421 }
422}
423
374ca955
A
424static void
425readHeader(ConvData *data,
426 FileStream* convFile,
374ca955 427 UErrorCode *pErrorCode) {
4388f060 428 char line[1024];
374ca955
A
429 char *s, *key, *value;
430 const UConverterStaticData *prototype;
b75a7d8f 431 UConverterStaticData *staticData;
b75a7d8f
A
432
433 if(U_FAILURE(*pErrorCode)) {
434 return;
435 }
436
374ca955 437 staticData=&data->staticData;
b75a7d8f
A
438 staticData->platform=UCNV_IBM;
439 staticData->subCharLen=0;
440
441 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
374ca955
A
442 /* basic parsing and handling of state-related items */
443 if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
b75a7d8f
A
444 continue;
445 }
446
447 /* stop at the beginning of the mapping section */
374ca955 448 if(uprv_strcmp(line, "CHARMAP")==0) {
b75a7d8f
A
449 break;
450 }
451
b75a7d8f
A
452 /* collect the information from the header field, ignore unknown keys */
453 if(uprv_strcmp(key, "code_set_name")==0) {
454 if(*value!=0) {
374ca955 455 uprv_strcpy((char *)staticData->name, value);
b75a7d8f
A
456 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
457 }
b75a7d8f 458 } else if(uprv_strcmp(key, "subchar")==0) {
374ca955
A
459 uint8_t bytes[UCNV_EXT_MAX_BYTES];
460 int8_t length;
461
462 s=value;
463 length=ucm_parseBytes(bytes, line, (const char **)&s);
464 if(1<=length && length<=4 && *s==0) {
465 staticData->subCharLen=length;
466 uprv_memcpy(staticData->subChar, bytes, length);
b75a7d8f
A
467 } else {
468 fprintf(stderr, "error: illegal <subchar> %s\n", value);
469 *pErrorCode=U_INVALID_TABLE_FORMAT;
470 return;
471 }
472 } else if(uprv_strcmp(key, "subchar1")==0) {
374ca955 473 uint8_t bytes[UCNV_EXT_MAX_BYTES];
b75a7d8f 474
374ca955
A
475 s=value;
476 if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
477 staticData->subChar1=bytes[0];
b75a7d8f
A
478 } else {
479 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
480 *pErrorCode=U_INVALID_TABLE_FORMAT;
481 return;
482 }
374ca955
A
483 }
484 }
485
486 /* copy values from the UCMFile to the static data */
487 staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
488 staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
489 staticData->conversionType=data->ucm->states.conversionType;
490
491 if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
492 fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
493 *pErrorCode=U_INVALID_TABLE_FORMAT;
494 return;
495 }
496
497 /*
498 * Now that we know the type, copy any 'default' values from the table.
499 * We need not check the type any further because the parser only
500 * recognizes what we have prototypes for.
501 *
502 * For delta (extension-only) tables, copy values from the base file
503 * instead, see createConverter().
504 */
505 if(data->ucm->baseName[0]==0) {
506 prototype=ucnv_converterStaticData[staticData->conversionType];
507 if(prototype!=NULL) {
508 if(staticData->name[0]==0) {
509 uprv_strcpy((char *)staticData->name, prototype->name);
510 }
511
512 if(staticData->codepage==0) {
513 staticData->codepage=prototype->codepage;
514 }
515
516 if(staticData->platform==0) {
517 staticData->platform=prototype->platform;
518 }
519
520 if(staticData->minBytesPerChar==0) {
521 staticData->minBytesPerChar=prototype->minBytesPerChar;
b75a7d8f
A
522 }
523
524 if(staticData->maxBytesPerChar==0) {
374ca955 525 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
b75a7d8f 526 }
374ca955
A
527
528 if(staticData->subCharLen==0) {
529 staticData->subCharLen=prototype->subCharLen;
530 if(prototype->subCharLen>0) {
531 uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
b75a7d8f
A
532 }
533 }
b75a7d8f
A
534 }
535 }
536
374ca955
A
537 if(data->ucm->states.outputType<0) {
538 data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
539 }
540
541 if( staticData->subChar1!=0 &&
542 (staticData->minBytesPerChar>1 ||
543 (staticData->conversionType!=UCNV_MBCS &&
544 staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
b75a7d8f
A
545 ) {
546 fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
547 *pErrorCode=U_INVALID_TABLE_FORMAT;
548 }
549}
550
374ca955
A
551/* return TRUE if a base table was read, FALSE for an extension table */
552static UBool
553readFile(ConvData *data, const char* converterName,
554 UErrorCode *pErrorCode) {
4388f060 555 char line[1024];
374ca955
A
556 char *end;
557 FileStream *convFile;
b75a7d8f 558
374ca955
A
559 UCMStates *baseStates;
560 UBool dataIsBase;
b75a7d8f 561
374ca955
A
562 if(U_FAILURE(*pErrorCode)) {
563 return FALSE;
564 }
b75a7d8f 565
374ca955 566 data->ucm=ucm_open();
b75a7d8f 567
374ca955
A
568 convFile=T_FileStream_open(converterName, "r");
569 if(convFile==NULL) {
570 *pErrorCode=U_FILE_ACCESS_ERROR;
571 return FALSE;
572 }
b75a7d8f 573
2ca993e8 574 readHeader(data, convFile, pErrorCode);
374ca955
A
575 if(U_FAILURE(*pErrorCode)) {
576 return FALSE;
b75a7d8f
A
577 }
578
374ca955
A
579 if(data->ucm->baseName[0]==0) {
580 dataIsBase=TRUE;
581 baseStates=&data->ucm->states;
729e4ab9 582 ucm_processStates(baseStates, IGNORE_SISO_CHECK);
374ca955
A
583 } else {
584 dataIsBase=FALSE;
585 baseStates=NULL;
b75a7d8f 586 }
b75a7d8f 587
374ca955
A
588 /* read the base table */
589 ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
590 if(U_FAILURE(*pErrorCode)) {
591 return FALSE;
b75a7d8f
A
592 }
593
374ca955
A
594 /* read an extension table if there is one */
595 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
596 end=uprv_strchr(line, 0);
597 while(line<end &&
598 (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
599 --end;
600 }
601 *end=0;
602
603 if(line[0]=='#' || u_skipWhitespace(line)==end) {
604 continue; /* ignore empty and comment lines */
605 }
606
607 if(0==uprv_strcmp(line, "CHARMAP")) {
608 /* read the extension table */
609 ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
610 } else {
611 fprintf(stderr, "unexpected text after the base mapping table\n");
612 }
613 break;
b75a7d8f 614 }
374ca955
A
615
616 T_FileStream_close(convFile);
617
618 if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
b75a7d8f 619 fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
374ca955 620 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 621 }
374ca955
A
622
623 return dataIsBase;
b75a7d8f
A
624}
625
374ca955
A
626static void
627createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
628 ConvData baseData;
629 UBool dataIsBase;
b75a7d8f 630
374ca955
A
631 UConverterStaticData *staticData;
632 UCMStates *states, *baseStates;
b75a7d8f 633
374ca955
A
634 if(U_FAILURE(*pErrorCode)) {
635 return;
b75a7d8f
A
636 }
637
374ca955 638 initConvData(data);
b75a7d8f 639
374ca955
A
640 dataIsBase=readFile(data, converterName, pErrorCode);
641 if(U_FAILURE(*pErrorCode)) {
642 return;
b75a7d8f
A
643 }
644
374ca955
A
645 staticData=&data->staticData;
646 states=&data->ucm->states;
b75a7d8f 647
374ca955 648 if(dataIsBase) {
46f4442e
A
649 /*
650 * Build a normal .cnv file with a base table
651 * and an optional extension table.
652 */
374ca955
A
653 data->cnvData=MBCSOpen(data->ucm);
654 if(data->cnvData==NULL) {
655 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
b75a7d8f 656
374ca955
A
657 } else if(!data->cnvData->isValid(data->cnvData,
658 staticData->subChar, staticData->subCharLen)
659 ) {
660 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
661 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 662
374ca955
A
663 } else if(staticData->subChar1!=0 &&
664 !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
665 ) {
666 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
667 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 668
46f4442e
A
669 } else if(
670 data->ucm->ext->mappingsLength>0 &&
671 !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
672 ) {
673 *pErrorCode=U_INVALID_TABLE_FORMAT;
674 } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
675 /* sort the table so that it can be turned into UTF-8-friendly data */
676 ucm_sortTable(data->ucm->base);
677 }
b75a7d8f 678
46f4442e
A
679 if(U_SUCCESS(*pErrorCode)) {
680 if(
681 /* add the base table after ucm_checkBaseExt()! */
682 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
374ca955
A
683 ) {
684 *pErrorCode=U_INVALID_TABLE_FORMAT;
46f4442e
A
685 } else {
686 /*
687 * addTable() may have requested moving more mappings to the extension table
688 * if they fit into the base toUnicode table but not into the
689 * base fromUnicode table.
690 * (Especially for UTF-8-friendly fromUnicode tables.)
691 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
692 * to be excluded from the extension toUnicode data.
693 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
694 * the base fromUnicode table.
695 */
696 ucm_moveMappings(data->ucm->base, data->ucm->ext);
697 ucm_sortTable(data->ucm->ext);
698 if(data->ucm->ext->mappingsLength>0) {
699 /* prepare the extension table, if there is one */
700 data->extData=CnvExtOpen(data->ucm);
701 if(data->extData==NULL) {
702 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
703 } else if(
704 !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
705 ) {
706 *pErrorCode=U_INVALID_TABLE_FORMAT;
707 }
708 }
b75a7d8f
A
709 }
710 }
374ca955 711 } else {
46f4442e 712 /* Build an extension-only .cnv file. */
374ca955
A
713 char baseFilename[500];
714 char *basename;
715
716 initConvData(&baseData);
717
718 /* assemble a path/filename for data->ucm->baseName */
719 uprv_strcpy(baseFilename, converterName);
720 basename=(char *)findBasename(baseFilename);
721 uprv_strcpy(basename, data->ucm->baseName);
722 uprv_strcat(basename, ".ucm");
723
724 /* read the base table */
725 dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
726 if(U_FAILURE(*pErrorCode)) {
727 return;
728 } else if(!dataIsBase) {
729 fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
730 *pErrorCode=U_INVALID_TABLE_FORMAT;
731 } else {
732 /* prepare the extension table */
733 data->extData=CnvExtOpen(data->ucm);
734 if(data->extData==NULL) {
735 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
374ca955
A
736 } else {
737 /* fill in gaps in extension file header fields */
738 UCMapping *m, *mLimit;
739 uint8_t fallbackFlags;
740
741 baseStates=&baseData.ucm->states;
742 if(states->conversionType==UCNV_DBCS) {
743 staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
744 } else if(states->minCharLength==0) {
745 staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
746 }
747 if(states->maxCharLength<states->minCharLength) {
748 staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
749 }
750
751 if(staticData->subCharLen==0) {
752 uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
753 staticData->subCharLen=baseData.staticData.subCharLen;
754 }
755 /*
756 * do not copy subChar1 -
757 * only use what is explicitly specified
758 * because it cannot be unset in the extension file header
759 */
760
761 /* get the fallback flags */
762 fallbackFlags=0;
763 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
764 m<mLimit && fallbackFlags!=3;
765 ++m
b75a7d8f 766 ) {
374ca955
A
767 if(m->f==1) {
768 fallbackFlags|=1;
769 } else if(m->f==3) {
770 fallbackFlags|=2;
771 }
b75a7d8f 772 }
b75a7d8f 773
374ca955
A
774 if(fallbackFlags&1) {
775 staticData->hasFromUnicodeFallback=TRUE;
776 }
777 if(fallbackFlags&2) {
778 staticData->hasToUnicodeFallback=TRUE;
779 }
b75a7d8f 780
374ca955
A
781 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
782 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
783 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 784
729e4ab9 785 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
374ca955
A
786 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
787 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 788
374ca955
A
789 } else if(
790 !ucm_checkValidity(data->ucm->ext, baseStates) ||
46f4442e 791 !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
374ca955
A
792 ) {
793 *pErrorCode=U_INVALID_TABLE_FORMAT;
46f4442e
A
794 } else {
795 if(states->maxCharLength>1) {
796 /*
797 * When building a normal .cnv file with a base table
798 * for an MBCS (not SBCS) table with explicit precision flags,
799 * the MBCSAddTable() function marks some mappings for moving
800 * to the extension table.
801 * They fit into the base toUnicode table but not into the
802 * base fromUnicode table.
803 * (Note: We do have explicit precision flags because they are
804 * required for extension table generation, and
805 * ucm_checkBaseExt() verified it.)
806 *
807 * We do not call MBCSAddTable() here (we probably could)
808 * so we need to do the analysis before building the extension table.
809 * We assume that MBCSAddTable() will build a UTF-8-friendly table.
810 * Redundant mappings in the extension table are ok except they cost some size.
811 *
812 * Do this after ucm_checkBaseExt().
813 */
814 const MBCSData *mbcsData=MBCSGetDummy();
815 int32_t needsMove=0;
816 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
817 m<mLimit;
818 ++m
819 ) {
820 if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
821 m->f|=MBCS_FROM_U_EXT_FLAG;
822 m->moveFlag=UCM_MOVE_TO_EXT;
823 ++needsMove;
824 }
825 }
826
827 if(needsMove!=0) {
828 ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
829 ucm_sortTable(data->ucm->ext);
830 }
831 }
832 if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
833 *pErrorCode=U_INVALID_TABLE_FORMAT;
834 }
374ca955
A
835 }
836 }
837 }
838
839 cleanupConvData(&baseData);
840 }
b75a7d8f
A
841}
842
843/*
844 * Hey, Emacs, please set the following:
845 *
846 * Local Variables:
847 * indent-tabs-mode: nil
848 * End:
849 *
850 */