]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/makeconv/makeconv.c
ICU-400.42.tar.gz
[apple/icu.git] / icuSources / tools / makeconv / makeconv.c
CommitLineData
b75a7d8f
A
1/*
2 ********************************************************************************
3 *
46f4442e 4 * Copyright (C) 1998-2008, International Business Machines
b75a7d8f
A
5 * Corporation and others. All Rights Reserved.
6 *
7 ********************************************************************************
8 *
9 *
10 * makeconv.c:
11 * tool creating a binary (compressed) representation of the conversion mapping
12 * table (IBM NLTC ucmap format).
13 *
14 * 05/04/2000 helena Added fallback mapping into the picture...
15 * 06/29/2000 helena Major rewrite of the callback APIs.
16 */
17
18#include <stdio.h>
19#include "unicode/putil.h"
b75a7d8f
A
20#include "unicode/ucnv_err.h"
21#include "ucnv_bld.h"
22#include "ucnv_imp.h"
23#include "ucnv_cnv.h"
24#include "cstring.h"
25#include "cmemory.h"
374ca955 26#include "uinvchar.h"
b75a7d8f
A
27#include "filestrm.h"
28#include "toolutil.h"
29#include "uoptions.h"
30#include "unicode/udata.h"
31#include "unewdata.h"
374ca955
A
32#include "uparse.h"
33#include "ucm.h"
b75a7d8f
A
34#include "makeconv.h"
35#include "genmbcs.h"
36
46f4442e 37#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
b75a7d8f 38
46f4442e 39#define DEBUG 0
73c04bcf 40
374ca955
A
41typedef struct ConvData {
42 UCMFile *ucm;
43 NewConverter *cnvData, *extData;
44 UConverterSharedData sharedData;
45 UConverterStaticData staticData;
46} ConvData;
47
48static void
49initConvData(ConvData *data) {
50 uprv_memset(data, 0, sizeof(ConvData));
51 data->sharedData.structSize=sizeof(UConverterSharedData);
52 data->staticData.structSize=sizeof(UConverterStaticData);
53 data->sharedData.staticData=&data->staticData;
54}
55
56static void
57cleanupConvData(ConvData *data) {
58 if(data!=NULL) {
59 if(data->cnvData!=NULL) {
60 data->cnvData->close(data->cnvData);
61 data->cnvData=NULL;
62 }
63 if(data->extData!=NULL) {
64 data->extData->close(data->extData);
65 data->extData=NULL;
66 }
67 ucm_close(data->ucm);
68 data->ucm=NULL;
69 }
70}
71
b75a7d8f
A
72/*
73 * from ucnvstat.c - static prototypes of data-based converters
74 */
75extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
76
77/*
78 * Global - verbosity
79 */
80UBool VERBOSE = FALSE;
46f4442e 81UBool SMALL = FALSE;
b75a7d8f 82
374ca955
A
83static void
84createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
b75a7d8f
A
85
86/*
87 * Set up the UNewData and write the converter..
88 */
374ca955
A
89static void
90writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
b75a7d8f
A
91
92UBool haveCopyright=TRUE;
93
94static UDataInfo dataInfo={
95 sizeof(UDataInfo),
96 0,
97
98 U_IS_BIG_ENDIAN,
99 U_CHARSET_FAMILY,
100 sizeof(UChar),
101 0,
102
103 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */
104 {6, 2, 0, 0}, /* formatVersion */
105 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */
106};
107
374ca955
A
108static void
109writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
b75a7d8f
A
110{
111 UNewDataMemory *mem = NULL;
112 uint32_t sz2;
113 uint32_t size = 0;
374ca955 114 int32_t tableType;
b75a7d8f
A
115
116 if(U_FAILURE(*status))
117 {
118 return;
119 }
120
374ca955
A
121 tableType=TABLE_NONE;
122 if(data->cnvData!=NULL) {
123 tableType|=TABLE_BASE;
124 }
125 if(data->extData!=NULL) {
126 tableType|=TABLE_EXT;
127 }
128
b75a7d8f
A
129 mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
130
131 if(U_FAILURE(*status))
132 {
133 fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
134 cnvName,
135 "cnv",
136 u_errorName(*status));
137 return;
138 }
139
140 if(VERBOSE)
141 {
46f4442e 142 printf("- Opened udata %s.%s\n", cnvName, "cnv");
b75a7d8f
A
143 }
144
374ca955 145
b75a7d8f 146 /* all read only, clean, platform independent data. Mmmm. :) */
374ca955 147 udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
b75a7d8f
A
148 size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */
149 /* Now, write the table */
374ca955
A
150 if(tableType&TABLE_BASE) {
151 size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
152 }
153 if(tableType&TABLE_EXT) {
154 size += data->extData->write(data->extData, &data->staticData, mem, tableType);
155 }
b75a7d8f
A
156
157 sz2 = udata_finish(mem, status);
158 if(size != sz2)
159 {
374ca955 160 fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
b75a7d8f
A
161 *status=U_INTERNAL_PROGRAM_ERROR;
162 }
163 if(VERBOSE)
164 {
46f4442e 165 printf("- Wrote %u bytes to the udata.\n", (int)sz2);
b75a7d8f
A
166 }
167}
168
46f4442e
A
169enum {
170 OPT_HELP_H,
171 OPT_HELP_QUESTION_MARK,
172 OPT_COPYRIGHT,
173 OPT_VERSION,
174 OPT_DESTDIR,
175 OPT_VERBOSE,
176 OPT_SMALL,
177 OPT_COUNT
178};
179
b75a7d8f 180static UOption options[]={
46f4442e
A
181 UOPTION_HELP_H,
182 UOPTION_HELP_QUESTION_MARK,
183 UOPTION_COPYRIGHT,
184 UOPTION_VERSION,
185 UOPTION_DESTDIR,
186 UOPTION_VERBOSE,
187 { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }
b75a7d8f
A
188};
189
190int main(int argc, char* argv[])
191{
374ca955
A
192 ConvData data;
193 UErrorCode err = U_ZERO_ERROR, localError;
b75a7d8f 194 char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
b75a7d8f 195 const char* destdir, *arg;
b75a7d8f
A
196 size_t destdirlen;
197 char* dot = NULL, *outBasename;
198 char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
199 char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH];
200 UVersionInfo icuVersion;
374ca955
A
201 UBool printFilename;
202
203 err = U_ZERO_ERROR;
b75a7d8f
A
204
205 U_MAIN_INIT_ARGS(argc, argv);
206
207 /* Set up the ICU version number */
208 u_getVersion(icuVersion);
209 uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
210
211 /* preset then read command line options */
46f4442e
A
212 options[OPT_DESTDIR].value=u_getDataDirectory();
213 argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
b75a7d8f
A
214
215 /* error handling, printing usage message */
216 if(argc<0) {
217 fprintf(stderr,
218 "error in command line argument \"%s\"\n",
219 argv[-argc]);
220 } else if(argc<2) {
221 argc=-1;
222 }
46f4442e
A
223 if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
224 FILE *stdfile=argc<0 ? stderr : stdout;
225 fprintf(stdfile,
b75a7d8f
A
226 "usage: %s [-options] files...\n"
227 "\tread .ucm codepage mapping files and write .cnv files\n"
228 "options:\n"
229 "\t-h or -? or --help this usage text\n"
230 "\t-V or --version show a version message\n"
231 "\t-c or --copyright include a copyright notice\n"
232 "\t-d or --destdir destination directory, followed by the path\n"
233 "\t-v or --verbose Turn on verbose output\n",
234 argv[0]);
46f4442e
A
235 fprintf(stdfile,
236 "\t --small Generate smaller .cnv files. They will be\n"
237 "\t significantly smaller but may not be compatible with\n"
238 "\t older versions of ICU and will require heap memory\n"
239 "\t allocation when loaded.\n");
b75a7d8f
A
240 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
241 }
242
46f4442e
A
243 if(options[OPT_VERSION].doesOccur) {
244 printf("makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
245 dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
246 printf("%s\n", U_COPYRIGHT_STRING);
b75a7d8f
A
247 exit(0);
248 }
249
b75a7d8f 250 /* get the options values */
46f4442e
A
251 haveCopyright = options[OPT_COPYRIGHT].doesOccur;
252 destdir = options[OPT_DESTDIR].value;
253 VERBOSE = options[OPT_VERBOSE].doesOccur;
254 SMALL = options[OPT_SMALL].doesOccur;
b75a7d8f
A
255
256 if (destdir != NULL && *destdir != 0) {
257 uprv_strcpy(outFileName, destdir);
258 destdirlen = uprv_strlen(destdir);
259 outBasename = outFileName + destdirlen;
260 if (*(outBasename - 1) != U_FILE_SEP_CHAR) {
261 *outBasename++ = U_FILE_SEP_CHAR;
262 ++destdirlen;
263 }
264 } else {
265 destdirlen = 0;
266 outBasename = outFileName;
267 }
268
269#if DEBUG
270 {
271 int i;
272 printf("makeconv: processing %d files...\n", argc - 1);
273 for(i=1; i<argc; ++i) {
274 printf("%s ", argv[i]);
275 }
276 printf("\n");
277 fflush(stdout);
278 }
279#endif
280
374ca955
A
281 err = U_ZERO_ERROR;
282 printFilename = (UBool) (argc > 2 || VERBOSE);
283 for (++argv; --argc; ++argv)
b75a7d8f 284 {
374ca955 285 arg = getLongPathname(*argv);
b75a7d8f 286
46f4442e
A
287 /* Check for potential buffer overflow */
288 if(strlen(arg) > UCNV_MAX_FULL_FILE_NAME_LENGTH)
289 {
290 fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR));
291 return U_BUFFER_OVERFLOW_ERROR;
292 }
293
374ca955
A
294 /*produces the right destination path for display*/
295 if (destdirlen != 0)
b75a7d8f 296 {
374ca955 297 const char *basename;
b75a7d8f 298
374ca955
A
299 /* find the last file sepator */
300 basename = findBasename(arg);
301 uprv_strcpy(outBasename, basename);
b75a7d8f 302 }
374ca955 303 else
b75a7d8f 304 {
374ca955 305 uprv_strcpy(outFileName, arg);
b75a7d8f
A
306 }
307
374ca955
A
308 /*removes the extension if any is found*/
309 dot = uprv_strrchr(outBasename, '.');
310 if (dot)
b75a7d8f 311 {
374ca955 312 *dot = '\0';
b75a7d8f
A
313 }
314
374ca955
A
315 /* the basename without extension is the converter name */
316 uprv_strcpy(cnvName, outBasename);
b75a7d8f 317
374ca955
A
318 /*Adds the target extension*/
319 uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION);
b75a7d8f
A
320
321#if DEBUG
322 printf("makeconv: processing %s ...\n", arg);
323 fflush(stdout);
324#endif
374ca955
A
325 localError = U_ZERO_ERROR;
326 initConvData(&data);
327 createConverter(&data, arg, &localError);
b75a7d8f 328
374ca955 329 if (U_FAILURE(localError))
b75a7d8f 330 {
374ca955
A
331 /* if an error is found, print out an error msg and keep going */
332 fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
333 u_errorName(localError));
334 if(U_SUCCESS(err)) {
335 err = localError;
336 }
b75a7d8f 337 }
374ca955 338 else
b75a7d8f 339 {
46f4442e
A
340 /* Insure the static data name matches the file name */
341 /* Changed to ignore directory and only compare base name
342 LDH 1/2/08*/
343 char *p;
344 p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
345
346 if(p == NULL) /* OK, try alternate */
347 {
348 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
349 if(p == NULL)
350 {
351 p=cnvName; /* If no separators, no problem */
352 }
353 }
354 else
355 {
356 p++; /* If found separtor, don't include it in compare */
357 }
358 if(uprv_stricmp(p,data.staticData.name))
374ca955
A
359 {
360 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
46f4442e 361 cnvName, CONVERTER_FILE_EXTENSION,
374ca955
A
362 data.staticData.name);
363 }
364
365 uprv_strcpy((char*)data.staticData.name, cnvName);
366
367 if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
368 fprintf(stderr,
369 "Error: A converter name must contain only invariant characters.\n"
370 "%s is not a valid converter name.\n",
371 data.staticData.name);
372 if(U_SUCCESS(err)) {
373 err = U_INVALID_TABLE_FORMAT;
374 }
375 }
376
73c04bcf 377 uprv_strcpy(cnvNameWithPkg, cnvName);
374ca955
A
378
379 localError = U_ZERO_ERROR;
380 writeConverterData(&data, cnvNameWithPkg, destdir, &localError);
374ca955
A
381
382 if(U_FAILURE(localError))
383 {
384 /* if an error is found, print out an error msg and keep going*/
385 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
386 u_errorName(localError));
387 if(U_SUCCESS(err)) {
388 err = localError;
389 }
390 }
391 else if (printFilename)
392 {
46f4442e 393 puts(outBasename);
374ca955 394 }
b75a7d8f 395 }
374ca955
A
396 fflush(stdout);
397 fflush(stderr);
398
399 cleanupConvData(&data);
b75a7d8f
A
400 }
401
374ca955 402 return err;
b75a7d8f
A
403}
404
405static void
406getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
407 if( (name[0]=='i' || name[0]=='I') &&
408 (name[1]=='b' || name[1]=='B') &&
409 (name[2]=='m' || name[2]=='M')
410 ) {
411 name+=3;
412 if(*name=='-') {
413 ++name;
414 }
415 *pPlatform=UCNV_IBM;
416 *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
417 } else {
418 *pPlatform=UCNV_UNKNOWN;
419 *pCCSID=0;
420 }
421}
422
374ca955
A
423static void
424readHeader(ConvData *data,
425 FileStream* convFile,
426 const char* converterName,
427 UErrorCode *pErrorCode) {
b75a7d8f 428 char line[200];
374ca955
A
429 char *s, *key, *value;
430 const UConverterStaticData *prototype;
b75a7d8f 431 UConverterStaticData *staticData;
b75a7d8f
A
432
433 if(U_FAILURE(*pErrorCode)) {
434 return;
435 }
436
374ca955 437 staticData=&data->staticData;
b75a7d8f
A
438 staticData->platform=UCNV_IBM;
439 staticData->subCharLen=0;
440
441 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
374ca955
A
442 /* basic parsing and handling of state-related items */
443 if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
b75a7d8f
A
444 continue;
445 }
446
447 /* stop at the beginning of the mapping section */
374ca955 448 if(uprv_strcmp(line, "CHARMAP")==0) {
b75a7d8f
A
449 break;
450 }
451
b75a7d8f
A
452 /* collect the information from the header field, ignore unknown keys */
453 if(uprv_strcmp(key, "code_set_name")==0) {
454 if(*value!=0) {
374ca955 455 uprv_strcpy((char *)staticData->name, value);
b75a7d8f
A
456 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
457 }
b75a7d8f 458 } else if(uprv_strcmp(key, "subchar")==0) {
374ca955
A
459 uint8_t bytes[UCNV_EXT_MAX_BYTES];
460 int8_t length;
461
462 s=value;
463 length=ucm_parseBytes(bytes, line, (const char **)&s);
464 if(1<=length && length<=4 && *s==0) {
465 staticData->subCharLen=length;
466 uprv_memcpy(staticData->subChar, bytes, length);
b75a7d8f
A
467 } else {
468 fprintf(stderr, "error: illegal <subchar> %s\n", value);
469 *pErrorCode=U_INVALID_TABLE_FORMAT;
470 return;
471 }
472 } else if(uprv_strcmp(key, "subchar1")==0) {
374ca955 473 uint8_t bytes[UCNV_EXT_MAX_BYTES];
b75a7d8f 474
374ca955
A
475 s=value;
476 if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
477 staticData->subChar1=bytes[0];
b75a7d8f
A
478 } else {
479 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
480 *pErrorCode=U_INVALID_TABLE_FORMAT;
481 return;
482 }
374ca955
A
483 }
484 }
485
486 /* copy values from the UCMFile to the static data */
487 staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
488 staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
489 staticData->conversionType=data->ucm->states.conversionType;
490
491 if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
492 fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
493 *pErrorCode=U_INVALID_TABLE_FORMAT;
494 return;
495 }
496
497 /*
498 * Now that we know the type, copy any 'default' values from the table.
499 * We need not check the type any further because the parser only
500 * recognizes what we have prototypes for.
501 *
502 * For delta (extension-only) tables, copy values from the base file
503 * instead, see createConverter().
504 */
505 if(data->ucm->baseName[0]==0) {
506 prototype=ucnv_converterStaticData[staticData->conversionType];
507 if(prototype!=NULL) {
508 if(staticData->name[0]==0) {
509 uprv_strcpy((char *)staticData->name, prototype->name);
510 }
511
512 if(staticData->codepage==0) {
513 staticData->codepage=prototype->codepage;
514 }
515
516 if(staticData->platform==0) {
517 staticData->platform=prototype->platform;
518 }
519
520 if(staticData->minBytesPerChar==0) {
521 staticData->minBytesPerChar=prototype->minBytesPerChar;
b75a7d8f
A
522 }
523
524 if(staticData->maxBytesPerChar==0) {
374ca955 525 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
b75a7d8f 526 }
374ca955
A
527
528 if(staticData->subCharLen==0) {
529 staticData->subCharLen=prototype->subCharLen;
530 if(prototype->subCharLen>0) {
531 uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
b75a7d8f
A
532 }
533 }
b75a7d8f
A
534 }
535 }
536
374ca955
A
537 if(data->ucm->states.outputType<0) {
538 data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
539 }
540
541 if( staticData->subChar1!=0 &&
542 (staticData->minBytesPerChar>1 ||
543 (staticData->conversionType!=UCNV_MBCS &&
544 staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
b75a7d8f
A
545 ) {
546 fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
547 *pErrorCode=U_INVALID_TABLE_FORMAT;
548 }
549}
550
374ca955
A
551/* return TRUE if a base table was read, FALSE for an extension table */
552static UBool
553readFile(ConvData *data, const char* converterName,
554 UErrorCode *pErrorCode) {
555 char line[200];
556 char *end;
557 FileStream *convFile;
b75a7d8f 558
374ca955
A
559 UCMStates *baseStates;
560 UBool dataIsBase;
b75a7d8f 561
374ca955
A
562 if(U_FAILURE(*pErrorCode)) {
563 return FALSE;
564 }
b75a7d8f 565
374ca955 566 data->ucm=ucm_open();
b75a7d8f 567
374ca955
A
568 convFile=T_FileStream_open(converterName, "r");
569 if(convFile==NULL) {
570 *pErrorCode=U_FILE_ACCESS_ERROR;
571 return FALSE;
572 }
b75a7d8f 573
374ca955
A
574 readHeader(data, convFile, converterName, pErrorCode);
575 if(U_FAILURE(*pErrorCode)) {
576 return FALSE;
b75a7d8f
A
577 }
578
374ca955
A
579 if(data->ucm->baseName[0]==0) {
580 dataIsBase=TRUE;
581 baseStates=&data->ucm->states;
582 ucm_processStates(baseStates);
583 } else {
584 dataIsBase=FALSE;
585 baseStates=NULL;
b75a7d8f 586 }
b75a7d8f 587
374ca955
A
588 /* read the base table */
589 ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
590 if(U_FAILURE(*pErrorCode)) {
591 return FALSE;
b75a7d8f
A
592 }
593
374ca955
A
594 /* read an extension table if there is one */
595 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
596 end=uprv_strchr(line, 0);
597 while(line<end &&
598 (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
599 --end;
600 }
601 *end=0;
602
603 if(line[0]=='#' || u_skipWhitespace(line)==end) {
604 continue; /* ignore empty and comment lines */
605 }
606
607 if(0==uprv_strcmp(line, "CHARMAP")) {
608 /* read the extension table */
609 ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
610 } else {
611 fprintf(stderr, "unexpected text after the base mapping table\n");
612 }
613 break;
b75a7d8f 614 }
374ca955
A
615
616 T_FileStream_close(convFile);
617
618 if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
b75a7d8f 619 fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
374ca955 620 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 621 }
374ca955
A
622
623 return dataIsBase;
b75a7d8f
A
624}
625
374ca955
A
626static void
627createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
628 ConvData baseData;
629 UBool dataIsBase;
b75a7d8f 630
374ca955
A
631 UConverterStaticData *staticData;
632 UCMStates *states, *baseStates;
b75a7d8f 633
374ca955
A
634 if(U_FAILURE(*pErrorCode)) {
635 return;
b75a7d8f
A
636 }
637
374ca955 638 initConvData(data);
b75a7d8f 639
374ca955
A
640 dataIsBase=readFile(data, converterName, pErrorCode);
641 if(U_FAILURE(*pErrorCode)) {
642 return;
b75a7d8f
A
643 }
644
374ca955
A
645 staticData=&data->staticData;
646 states=&data->ucm->states;
b75a7d8f 647
374ca955 648 if(dataIsBase) {
46f4442e
A
649 /*
650 * Build a normal .cnv file with a base table
651 * and an optional extension table.
652 */
374ca955
A
653 data->cnvData=MBCSOpen(data->ucm);
654 if(data->cnvData==NULL) {
655 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
b75a7d8f 656
374ca955
A
657 } else if(!data->cnvData->isValid(data->cnvData,
658 staticData->subChar, staticData->subCharLen)
659 ) {
660 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
661 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 662
374ca955
A
663 } else if(staticData->subChar1!=0 &&
664 !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
665 ) {
666 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
667 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 668
46f4442e
A
669 } else if(
670 data->ucm->ext->mappingsLength>0 &&
671 !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
672 ) {
673 *pErrorCode=U_INVALID_TABLE_FORMAT;
674 } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
675 /* sort the table so that it can be turned into UTF-8-friendly data */
676 ucm_sortTable(data->ucm->base);
677 }
b75a7d8f 678
46f4442e
A
679 if(U_SUCCESS(*pErrorCode)) {
680 if(
681 /* add the base table after ucm_checkBaseExt()! */
682 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
374ca955
A
683 ) {
684 *pErrorCode=U_INVALID_TABLE_FORMAT;
46f4442e
A
685 } else {
686 /*
687 * addTable() may have requested moving more mappings to the extension table
688 * if they fit into the base toUnicode table but not into the
689 * base fromUnicode table.
690 * (Especially for UTF-8-friendly fromUnicode tables.)
691 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
692 * to be excluded from the extension toUnicode data.
693 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
694 * the base fromUnicode table.
695 */
696 ucm_moveMappings(data->ucm->base, data->ucm->ext);
697 ucm_sortTable(data->ucm->ext);
698 if(data->ucm->ext->mappingsLength>0) {
699 /* prepare the extension table, if there is one */
700 data->extData=CnvExtOpen(data->ucm);
701 if(data->extData==NULL) {
702 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
703 } else if(
704 !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
705 ) {
706 *pErrorCode=U_INVALID_TABLE_FORMAT;
707 }
708 }
b75a7d8f
A
709 }
710 }
374ca955 711 } else {
46f4442e 712 /* Build an extension-only .cnv file. */
374ca955
A
713 char baseFilename[500];
714 char *basename;
715
716 initConvData(&baseData);
717
718 /* assemble a path/filename for data->ucm->baseName */
719 uprv_strcpy(baseFilename, converterName);
720 basename=(char *)findBasename(baseFilename);
721 uprv_strcpy(basename, data->ucm->baseName);
722 uprv_strcat(basename, ".ucm");
723
724 /* read the base table */
725 dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
726 if(U_FAILURE(*pErrorCode)) {
727 return;
728 } else if(!dataIsBase) {
729 fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
730 *pErrorCode=U_INVALID_TABLE_FORMAT;
731 } else {
732 /* prepare the extension table */
733 data->extData=CnvExtOpen(data->ucm);
734 if(data->extData==NULL) {
735 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
374ca955
A
736 } else {
737 /* fill in gaps in extension file header fields */
738 UCMapping *m, *mLimit;
739 uint8_t fallbackFlags;
740
741 baseStates=&baseData.ucm->states;
742 if(states->conversionType==UCNV_DBCS) {
743 staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
744 } else if(states->minCharLength==0) {
745 staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
746 }
747 if(states->maxCharLength<states->minCharLength) {
748 staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
749 }
750
751 if(staticData->subCharLen==0) {
752 uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
753 staticData->subCharLen=baseData.staticData.subCharLen;
754 }
755 /*
756 * do not copy subChar1 -
757 * only use what is explicitly specified
758 * because it cannot be unset in the extension file header
759 */
760
761 /* get the fallback flags */
762 fallbackFlags=0;
763 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
764 m<mLimit && fallbackFlags!=3;
765 ++m
b75a7d8f 766 ) {
374ca955
A
767 if(m->f==1) {
768 fallbackFlags|=1;
769 } else if(m->f==3) {
770 fallbackFlags|=2;
771 }
b75a7d8f 772 }
b75a7d8f 773
374ca955
A
774 if(fallbackFlags&1) {
775 staticData->hasFromUnicodeFallback=TRUE;
776 }
777 if(fallbackFlags&2) {
778 staticData->hasToUnicodeFallback=TRUE;
779 }
b75a7d8f 780
374ca955
A
781 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
782 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
783 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 784
374ca955
A
785 } else if(1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
786 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
787 *pErrorCode=U_INVALID_TABLE_FORMAT;
b75a7d8f 788
374ca955
A
789 } else if(
790 !ucm_checkValidity(data->ucm->ext, baseStates) ||
46f4442e 791 !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
374ca955
A
792 ) {
793 *pErrorCode=U_INVALID_TABLE_FORMAT;
46f4442e
A
794 } else {
795 if(states->maxCharLength>1) {
796 /*
797 * When building a normal .cnv file with a base table
798 * for an MBCS (not SBCS) table with explicit precision flags,
799 * the MBCSAddTable() function marks some mappings for moving
800 * to the extension table.
801 * They fit into the base toUnicode table but not into the
802 * base fromUnicode table.
803 * (Note: We do have explicit precision flags because they are
804 * required for extension table generation, and
805 * ucm_checkBaseExt() verified it.)
806 *
807 * We do not call MBCSAddTable() here (we probably could)
808 * so we need to do the analysis before building the extension table.
809 * We assume that MBCSAddTable() will build a UTF-8-friendly table.
810 * Redundant mappings in the extension table are ok except they cost some size.
811 *
812 * Do this after ucm_checkBaseExt().
813 */
814 const MBCSData *mbcsData=MBCSGetDummy();
815 int32_t needsMove=0;
816 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
817 m<mLimit;
818 ++m
819 ) {
820 if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
821 m->f|=MBCS_FROM_U_EXT_FLAG;
822 m->moveFlag=UCM_MOVE_TO_EXT;
823 ++needsMove;
824 }
825 }
826
827 if(needsMove!=0) {
828 ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
829 ucm_sortTable(data->ucm->ext);
830 }
831 }
832 if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
833 *pErrorCode=U_INVALID_TABLE_FORMAT;
834 }
374ca955
A
835 }
836 }
837 }
838
839 cleanupConvData(&baseData);
840 }
b75a7d8f
A
841}
842
843/*
844 * Hey, Emacs, please set the following:
845 *
846 * Local Variables:
847 * indent-tabs-mode: nil
848 * End:
849 *
850 */