]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/makeconv/makeconv.c
ICU-551.30.tar.gz
[apple/icu.git] / icuSources / tools / makeconv / makeconv.c
1 /*
2 ********************************************************************************
3 *
4 * Copyright (C) 1998-2014, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ********************************************************************************
8 *
9 *
10 * makeconv.c:
11 * tool creating a binary (compressed) representation of the conversion mapping
12 * table (IBM NLTC ucmap format).
13 *
14 * 05/04/2000 helena Added fallback mapping into the picture...
15 * 06/29/2000 helena Major rewrite of the callback APIs.
16 */
17
18 #include <stdio.h>
19 #include "unicode/putil.h"
20 #include "unicode/ucnv_err.h"
21 #include "ucnv_bld.h"
22 #include "ucnv_imp.h"
23 #include "ucnv_cnv.h"
24 #include "cstring.h"
25 #include "cmemory.h"
26 #include "uinvchar.h"
27 #include "filestrm.h"
28 #include "toolutil.h"
29 #include "uoptions.h"
30 #include "unicode/udata.h"
31 #include "unewdata.h"
32 #include "uparse.h"
33 #include "ucm.h"
34 #include "makeconv.h"
35 #include "genmbcs.h"
36
37 #define DEBUG 0
38
39 typedef struct ConvData {
40 UCMFile *ucm;
41 NewConverter *cnvData, *extData;
42 UConverterSharedData sharedData;
43 UConverterStaticData staticData;
44 } ConvData;
45
46 static void
47 initConvData(ConvData *data) {
48 uprv_memset(data, 0, sizeof(ConvData));
49 data->sharedData.structSize=sizeof(UConverterSharedData);
50 data->staticData.structSize=sizeof(UConverterStaticData);
51 data->sharedData.staticData=&data->staticData;
52 }
53
54 static void
55 cleanupConvData(ConvData *data) {
56 if(data!=NULL) {
57 if(data->cnvData!=NULL) {
58 data->cnvData->close(data->cnvData);
59 data->cnvData=NULL;
60 }
61 if(data->extData!=NULL) {
62 data->extData->close(data->extData);
63 data->extData=NULL;
64 }
65 ucm_close(data->ucm);
66 data->ucm=NULL;
67 }
68 }
69
70 /*
71 * from ucnvstat.c - static prototypes of data-based converters
72 */
73 extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
74
75 /*
76 * Global - verbosity
77 */
78 UBool VERBOSE = FALSE;
79 UBool SMALL = FALSE;
80 UBool IGNORE_SISO_CHECK = FALSE;
81
82 static void
83 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
84
85 /*
86 * Set up the UNewData and write the converter..
87 */
88 static void
89 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
90
91 UBool haveCopyright=TRUE;
92
93 static UDataInfo dataInfo={
94 sizeof(UDataInfo),
95 0,
96
97 U_IS_BIG_ENDIAN,
98 U_CHARSET_FAMILY,
99 sizeof(UChar),
100 0,
101
102 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */
103 {6, 2, 0, 0}, /* formatVersion */
104 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */
105 };
106
107 static void
108 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
109 {
110 UNewDataMemory *mem = NULL;
111 uint32_t sz2;
112 uint32_t size = 0;
113 int32_t tableType;
114
115 if(U_FAILURE(*status))
116 {
117 return;
118 }
119
120 tableType=TABLE_NONE;
121 if(data->cnvData!=NULL) {
122 tableType|=TABLE_BASE;
123 }
124 if(data->extData!=NULL) {
125 tableType|=TABLE_EXT;
126 }
127
128 mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
129
130 if(U_FAILURE(*status))
131 {
132 fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
133 cnvName,
134 "cnv",
135 u_errorName(*status));
136 return;
137 }
138
139 if(VERBOSE)
140 {
141 printf("- Opened udata %s.%s\n", cnvName, "cnv");
142 }
143
144
145 /* all read only, clean, platform independent data. Mmmm. :) */
146 udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
147 size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */
148 /* Now, write the table */
149 if(tableType&TABLE_BASE) {
150 size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
151 }
152 if(tableType&TABLE_EXT) {
153 size += data->extData->write(data->extData, &data->staticData, mem, tableType);
154 }
155
156 sz2 = udata_finish(mem, status);
157 if(size != sz2)
158 {
159 fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
160 *status=U_INTERNAL_PROGRAM_ERROR;
161 }
162 if(VERBOSE)
163 {
164 printf("- Wrote %u bytes to the udata.\n", (int)sz2);
165 }
166 }
167
168 enum {
169 OPT_HELP_H,
170 OPT_HELP_QUESTION_MARK,
171 OPT_COPYRIGHT,
172 OPT_VERSION,
173 OPT_DESTDIR,
174 OPT_VERBOSE,
175 OPT_SMALL,
176 OPT_IGNORE_SISO_CHECK,
177 OPT_COUNT
178 };
179
180 static UOption options[]={
181 UOPTION_HELP_H,
182 UOPTION_HELP_QUESTION_MARK,
183 UOPTION_COPYRIGHT,
184 UOPTION_VERSION,
185 UOPTION_DESTDIR,
186 UOPTION_VERBOSE,
187 { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
188 { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }
189 };
190
191 int main(int argc, char* argv[])
192 {
193 ConvData data;
194 UErrorCode err = U_ZERO_ERROR, localError;
195 char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
196 const char* destdir, *arg;
197 size_t destdirlen;
198 char* dot = NULL, *outBasename;
199 char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
200 char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH];
201 UVersionInfo icuVersion;
202 UBool printFilename;
203
204 err = U_ZERO_ERROR;
205
206 U_MAIN_INIT_ARGS(argc, argv);
207
208 /* Set up the ICU version number */
209 u_getVersion(icuVersion);
210 uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
211
212 /* preset then read command line options */
213 options[OPT_DESTDIR].value=u_getDataDirectory();
214 argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
215
216 /* error handling, printing usage message */
217 if(argc<0) {
218 fprintf(stderr,
219 "error in command line argument \"%s\"\n",
220 argv[-argc]);
221 } else if(argc<2) {
222 argc=-1;
223 }
224 if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
225 FILE *stdfile=argc<0 ? stderr : stdout;
226 fprintf(stdfile,
227 "usage: %s [-options] files...\n"
228 "\tread .ucm codepage mapping files and write .cnv files\n"
229 "options:\n"
230 "\t-h or -? or --help this usage text\n"
231 "\t-V or --version show a version message\n"
232 "\t-c or --copyright include a copyright notice\n"
233 "\t-d or --destdir destination directory, followed by the path\n"
234 "\t-v or --verbose Turn on verbose output\n",
235 argv[0]);
236 fprintf(stdfile,
237 "\t --small Generate smaller .cnv files. They will be\n"
238 "\t significantly smaller but may not be compatible with\n"
239 "\t older versions of ICU and will require heap memory\n"
240 "\t allocation when loaded.\n"
241 "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n");
242 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
243 }
244
245 if(options[OPT_VERSION].doesOccur) {
246 printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
247 dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
248 printf("%s\n", U_COPYRIGHT_STRING);
249 exit(0);
250 }
251
252 /* get the options values */
253 haveCopyright = options[OPT_COPYRIGHT].doesOccur;
254 destdir = options[OPT_DESTDIR].value;
255 VERBOSE = options[OPT_VERBOSE].doesOccur;
256 SMALL = options[OPT_SMALL].doesOccur;
257
258 if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
259 IGNORE_SISO_CHECK = TRUE;
260 }
261
262 if (destdir != NULL && *destdir != 0) {
263 uprv_strcpy(outFileName, destdir);
264 destdirlen = uprv_strlen(destdir);
265 outBasename = outFileName + destdirlen;
266 if (*(outBasename - 1) != U_FILE_SEP_CHAR) {
267 *outBasename++ = U_FILE_SEP_CHAR;
268 ++destdirlen;
269 }
270 } else {
271 destdirlen = 0;
272 outBasename = outFileName;
273 }
274
275 #if DEBUG
276 {
277 int i;
278 printf("makeconv: processing %d files...\n", argc - 1);
279 for(i=1; i<argc; ++i) {
280 printf("%s ", argv[i]);
281 }
282 printf("\n");
283 fflush(stdout);
284 }
285 #endif
286
287 err = U_ZERO_ERROR;
288 printFilename = (UBool) (argc > 2 || VERBOSE);
289 for (++argv; --argc; ++argv)
290 {
291 arg = getLongPathname(*argv);
292
293 /* Check for potential buffer overflow */
294 if(strlen(arg) >= UCNV_MAX_FULL_FILE_NAME_LENGTH)
295 {
296 fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR));
297 return U_BUFFER_OVERFLOW_ERROR;
298 }
299
300 /*produces the right destination path for display*/
301 if (destdirlen != 0)
302 {
303 const char *basename;
304
305 /* find the last file sepator */
306 basename = findBasename(arg);
307 uprv_strcpy(outBasename, basename);
308 }
309 else
310 {
311 uprv_strcpy(outFileName, arg);
312 }
313
314 /*removes the extension if any is found*/
315 dot = uprv_strrchr(outBasename, '.');
316 if (dot)
317 {
318 *dot = '\0';
319 }
320
321 /* the basename without extension is the converter name */
322 uprv_strcpy(cnvName, outBasename);
323
324 /*Adds the target extension*/
325 uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION);
326
327 #if DEBUG
328 printf("makeconv: processing %s ...\n", arg);
329 fflush(stdout);
330 #endif
331 localError = U_ZERO_ERROR;
332 initConvData(&data);
333 createConverter(&data, arg, &localError);
334
335 if (U_FAILURE(localError))
336 {
337 /* if an error is found, print out an error msg and keep going */
338 fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
339 u_errorName(localError));
340 if(U_SUCCESS(err)) {
341 err = localError;
342 }
343 }
344 else
345 {
346 /* Insure the static data name matches the file name */
347 /* Changed to ignore directory and only compare base name
348 LDH 1/2/08*/
349 char *p;
350 p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
351
352 if(p == NULL) /* OK, try alternate */
353 {
354 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
355 if(p == NULL)
356 {
357 p=cnvName; /* If no separators, no problem */
358 }
359 }
360 else
361 {
362 p++; /* If found separtor, don't include it in compare */
363 }
364 if(uprv_stricmp(p,data.staticData.name))
365 {
366 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
367 cnvName, CONVERTER_FILE_EXTENSION,
368 data.staticData.name);
369 }
370
371 uprv_strcpy((char*)data.staticData.name, cnvName);
372
373 if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
374 fprintf(stderr,
375 "Error: A converter name must contain only invariant characters.\n"
376 "%s is not a valid converter name.\n",
377 data.staticData.name);
378 if(U_SUCCESS(err)) {
379 err = U_INVALID_TABLE_FORMAT;
380 }
381 }
382
383 uprv_strcpy(cnvNameWithPkg, cnvName);
384
385 localError = U_ZERO_ERROR;
386 writeConverterData(&data, cnvNameWithPkg, destdir, &localError);
387
388 if(U_FAILURE(localError))
389 {
390 /* if an error is found, print out an error msg and keep going*/
391 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
392 u_errorName(localError));
393 if(U_SUCCESS(err)) {
394 err = localError;
395 }
396 }
397 else if (printFilename)
398 {
399 puts(outBasename);
400 }
401 }
402 fflush(stdout);
403 fflush(stderr);
404
405 cleanupConvData(&data);
406 }
407
408 return err;
409 }
410
411 static void
412 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
413 if( (name[0]=='i' || name[0]=='I') &&
414 (name[1]=='b' || name[1]=='B') &&
415 (name[2]=='m' || name[2]=='M')
416 ) {
417 name+=3;
418 if(*name=='-') {
419 ++name;
420 }
421 *pPlatform=UCNV_IBM;
422 *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
423 } else {
424 *pPlatform=UCNV_UNKNOWN;
425 *pCCSID=0;
426 }
427 }
428
429 static void
430 readHeader(ConvData *data,
431 FileStream* convFile,
432 const char* converterName,
433 UErrorCode *pErrorCode) {
434 char line[1024];
435 char *s, *key, *value;
436 const UConverterStaticData *prototype;
437 UConverterStaticData *staticData;
438
439 if(U_FAILURE(*pErrorCode)) {
440 return;
441 }
442
443 staticData=&data->staticData;
444 staticData->platform=UCNV_IBM;
445 staticData->subCharLen=0;
446
447 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
448 /* basic parsing and handling of state-related items */
449 if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
450 continue;
451 }
452
453 /* stop at the beginning of the mapping section */
454 if(uprv_strcmp(line, "CHARMAP")==0) {
455 break;
456 }
457
458 /* collect the information from the header field, ignore unknown keys */
459 if(uprv_strcmp(key, "code_set_name")==0) {
460 if(*value!=0) {
461 uprv_strcpy((char *)staticData->name, value);
462 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
463 }
464 } else if(uprv_strcmp(key, "subchar")==0) {
465 uint8_t bytes[UCNV_EXT_MAX_BYTES];
466 int8_t length;
467
468 s=value;
469 length=ucm_parseBytes(bytes, line, (const char **)&s);
470 if(1<=length && length<=4 && *s==0) {
471 staticData->subCharLen=length;
472 uprv_memcpy(staticData->subChar, bytes, length);
473 } else {
474 fprintf(stderr, "error: illegal <subchar> %s\n", value);
475 *pErrorCode=U_INVALID_TABLE_FORMAT;
476 return;
477 }
478 } else if(uprv_strcmp(key, "subchar1")==0) {
479 uint8_t bytes[UCNV_EXT_MAX_BYTES];
480
481 s=value;
482 if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
483 staticData->subChar1=bytes[0];
484 } else {
485 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
486 *pErrorCode=U_INVALID_TABLE_FORMAT;
487 return;
488 }
489 }
490 }
491
492 /* copy values from the UCMFile to the static data */
493 staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
494 staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
495 staticData->conversionType=data->ucm->states.conversionType;
496
497 if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
498 fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
499 *pErrorCode=U_INVALID_TABLE_FORMAT;
500 return;
501 }
502
503 /*
504 * Now that we know the type, copy any 'default' values from the table.
505 * We need not check the type any further because the parser only
506 * recognizes what we have prototypes for.
507 *
508 * For delta (extension-only) tables, copy values from the base file
509 * instead, see createConverter().
510 */
511 if(data->ucm->baseName[0]==0) {
512 prototype=ucnv_converterStaticData[staticData->conversionType];
513 if(prototype!=NULL) {
514 if(staticData->name[0]==0) {
515 uprv_strcpy((char *)staticData->name, prototype->name);
516 }
517
518 if(staticData->codepage==0) {
519 staticData->codepage=prototype->codepage;
520 }
521
522 if(staticData->platform==0) {
523 staticData->platform=prototype->platform;
524 }
525
526 if(staticData->minBytesPerChar==0) {
527 staticData->minBytesPerChar=prototype->minBytesPerChar;
528 }
529
530 if(staticData->maxBytesPerChar==0) {
531 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
532 }
533
534 if(staticData->subCharLen==0) {
535 staticData->subCharLen=prototype->subCharLen;
536 if(prototype->subCharLen>0) {
537 uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
538 }
539 }
540 }
541 }
542
543 if(data->ucm->states.outputType<0) {
544 data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
545 }
546
547 if( staticData->subChar1!=0 &&
548 (staticData->minBytesPerChar>1 ||
549 (staticData->conversionType!=UCNV_MBCS &&
550 staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
551 ) {
552 fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
553 *pErrorCode=U_INVALID_TABLE_FORMAT;
554 }
555 }
556
557 /* return TRUE if a base table was read, FALSE for an extension table */
558 static UBool
559 readFile(ConvData *data, const char* converterName,
560 UErrorCode *pErrorCode) {
561 char line[1024];
562 char *end;
563 FileStream *convFile;
564
565 UCMStates *baseStates;
566 UBool dataIsBase;
567
568 if(U_FAILURE(*pErrorCode)) {
569 return FALSE;
570 }
571
572 data->ucm=ucm_open();
573
574 convFile=T_FileStream_open(converterName, "r");
575 if(convFile==NULL) {
576 *pErrorCode=U_FILE_ACCESS_ERROR;
577 return FALSE;
578 }
579
580 readHeader(data, convFile, converterName, pErrorCode);
581 if(U_FAILURE(*pErrorCode)) {
582 return FALSE;
583 }
584
585 if(data->ucm->baseName[0]==0) {
586 dataIsBase=TRUE;
587 baseStates=&data->ucm->states;
588 ucm_processStates(baseStates, IGNORE_SISO_CHECK);
589 } else {
590 dataIsBase=FALSE;
591 baseStates=NULL;
592 }
593
594 /* read the base table */
595 ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
596 if(U_FAILURE(*pErrorCode)) {
597 return FALSE;
598 }
599
600 /* read an extension table if there is one */
601 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
602 end=uprv_strchr(line, 0);
603 while(line<end &&
604 (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
605 --end;
606 }
607 *end=0;
608
609 if(line[0]=='#' || u_skipWhitespace(line)==end) {
610 continue; /* ignore empty and comment lines */
611 }
612
613 if(0==uprv_strcmp(line, "CHARMAP")) {
614 /* read the extension table */
615 ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
616 } else {
617 fprintf(stderr, "unexpected text after the base mapping table\n");
618 }
619 break;
620 }
621
622 T_FileStream_close(convFile);
623
624 if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
625 fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
626 *pErrorCode=U_INVALID_TABLE_FORMAT;
627 }
628
629 return dataIsBase;
630 }
631
632 static void
633 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
634 ConvData baseData;
635 UBool dataIsBase;
636
637 UConverterStaticData *staticData;
638 UCMStates *states, *baseStates;
639
640 if(U_FAILURE(*pErrorCode)) {
641 return;
642 }
643
644 initConvData(data);
645
646 dataIsBase=readFile(data, converterName, pErrorCode);
647 if(U_FAILURE(*pErrorCode)) {
648 return;
649 }
650
651 staticData=&data->staticData;
652 states=&data->ucm->states;
653
654 if(dataIsBase) {
655 /*
656 * Build a normal .cnv file with a base table
657 * and an optional extension table.
658 */
659 data->cnvData=MBCSOpen(data->ucm);
660 if(data->cnvData==NULL) {
661 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
662
663 } else if(!data->cnvData->isValid(data->cnvData,
664 staticData->subChar, staticData->subCharLen)
665 ) {
666 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
667 *pErrorCode=U_INVALID_TABLE_FORMAT;
668
669 } else if(staticData->subChar1!=0 &&
670 !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
671 ) {
672 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
673 *pErrorCode=U_INVALID_TABLE_FORMAT;
674
675 } else if(
676 data->ucm->ext->mappingsLength>0 &&
677 !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
678 ) {
679 *pErrorCode=U_INVALID_TABLE_FORMAT;
680 } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
681 /* sort the table so that it can be turned into UTF-8-friendly data */
682 ucm_sortTable(data->ucm->base);
683 }
684
685 if(U_SUCCESS(*pErrorCode)) {
686 if(
687 /* add the base table after ucm_checkBaseExt()! */
688 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
689 ) {
690 *pErrorCode=U_INVALID_TABLE_FORMAT;
691 } else {
692 /*
693 * addTable() may have requested moving more mappings to the extension table
694 * if they fit into the base toUnicode table but not into the
695 * base fromUnicode table.
696 * (Especially for UTF-8-friendly fromUnicode tables.)
697 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
698 * to be excluded from the extension toUnicode data.
699 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
700 * the base fromUnicode table.
701 */
702 ucm_moveMappings(data->ucm->base, data->ucm->ext);
703 ucm_sortTable(data->ucm->ext);
704 if(data->ucm->ext->mappingsLength>0) {
705 /* prepare the extension table, if there is one */
706 data->extData=CnvExtOpen(data->ucm);
707 if(data->extData==NULL) {
708 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
709 } else if(
710 !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
711 ) {
712 *pErrorCode=U_INVALID_TABLE_FORMAT;
713 }
714 }
715 }
716 }
717 } else {
718 /* Build an extension-only .cnv file. */
719 char baseFilename[500];
720 char *basename;
721
722 initConvData(&baseData);
723
724 /* assemble a path/filename for data->ucm->baseName */
725 uprv_strcpy(baseFilename, converterName);
726 basename=(char *)findBasename(baseFilename);
727 uprv_strcpy(basename, data->ucm->baseName);
728 uprv_strcat(basename, ".ucm");
729
730 /* read the base table */
731 dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
732 if(U_FAILURE(*pErrorCode)) {
733 return;
734 } else if(!dataIsBase) {
735 fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
736 *pErrorCode=U_INVALID_TABLE_FORMAT;
737 } else {
738 /* prepare the extension table */
739 data->extData=CnvExtOpen(data->ucm);
740 if(data->extData==NULL) {
741 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
742 } else {
743 /* fill in gaps in extension file header fields */
744 UCMapping *m, *mLimit;
745 uint8_t fallbackFlags;
746
747 baseStates=&baseData.ucm->states;
748 if(states->conversionType==UCNV_DBCS) {
749 staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
750 } else if(states->minCharLength==0) {
751 staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
752 }
753 if(states->maxCharLength<states->minCharLength) {
754 staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
755 }
756
757 if(staticData->subCharLen==0) {
758 uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
759 staticData->subCharLen=baseData.staticData.subCharLen;
760 }
761 /*
762 * do not copy subChar1 -
763 * only use what is explicitly specified
764 * because it cannot be unset in the extension file header
765 */
766
767 /* get the fallback flags */
768 fallbackFlags=0;
769 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
770 m<mLimit && fallbackFlags!=3;
771 ++m
772 ) {
773 if(m->f==1) {
774 fallbackFlags|=1;
775 } else if(m->f==3) {
776 fallbackFlags|=2;
777 }
778 }
779
780 if(fallbackFlags&1) {
781 staticData->hasFromUnicodeFallback=TRUE;
782 }
783 if(fallbackFlags&2) {
784 staticData->hasToUnicodeFallback=TRUE;
785 }
786
787 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
788 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
789 *pErrorCode=U_INVALID_TABLE_FORMAT;
790
791 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
792 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
793 *pErrorCode=U_INVALID_TABLE_FORMAT;
794
795 } else if(
796 !ucm_checkValidity(data->ucm->ext, baseStates) ||
797 !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
798 ) {
799 *pErrorCode=U_INVALID_TABLE_FORMAT;
800 } else {
801 if(states->maxCharLength>1) {
802 /*
803 * When building a normal .cnv file with a base table
804 * for an MBCS (not SBCS) table with explicit precision flags,
805 * the MBCSAddTable() function marks some mappings for moving
806 * to the extension table.
807 * They fit into the base toUnicode table but not into the
808 * base fromUnicode table.
809 * (Note: We do have explicit precision flags because they are
810 * required for extension table generation, and
811 * ucm_checkBaseExt() verified it.)
812 *
813 * We do not call MBCSAddTable() here (we probably could)
814 * so we need to do the analysis before building the extension table.
815 * We assume that MBCSAddTable() will build a UTF-8-friendly table.
816 * Redundant mappings in the extension table are ok except they cost some size.
817 *
818 * Do this after ucm_checkBaseExt().
819 */
820 const MBCSData *mbcsData=MBCSGetDummy();
821 int32_t needsMove=0;
822 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
823 m<mLimit;
824 ++m
825 ) {
826 if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
827 m->f|=MBCS_FROM_U_EXT_FLAG;
828 m->moveFlag=UCM_MOVE_TO_EXT;
829 ++needsMove;
830 }
831 }
832
833 if(needsMove!=0) {
834 ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
835 ucm_sortTable(data->ucm->ext);
836 }
837 }
838 if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
839 *pErrorCode=U_INVALID_TABLE_FORMAT;
840 }
841 }
842 }
843 }
844
845 cleanupConvData(&baseData);
846 }
847 }
848
849 /*
850 * Hey, Emacs, please set the following:
851 *
852 * Local Variables:
853 * indent-tabs-mode: nil
854 * End:
855 *
856 */