]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/makeconv/makeconv.c
ICU-511.25.tar.gz
[apple/icu.git] / icuSources / tools / makeconv / makeconv.c
1 /*
2 ********************************************************************************
3 *
4 * Copyright (C) 1998-2012, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ********************************************************************************
8 *
9 *
10 * makeconv.c:
11 * tool creating a binary (compressed) representation of the conversion mapping
12 * table (IBM NLTC ucmap format).
13 *
14 * 05/04/2000 helena Added fallback mapping into the picture...
15 * 06/29/2000 helena Major rewrite of the callback APIs.
16 */
17
18 #include <stdio.h>
19 #include "unicode/putil.h"
20 #include "unicode/ucnv_err.h"
21 #include "ucnv_bld.h"
22 #include "ucnv_imp.h"
23 #include "ucnv_cnv.h"
24 #include "cstring.h"
25 #include "cmemory.h"
26 #include "uinvchar.h"
27 #include "filestrm.h"
28 #include "toolutil.h"
29 #include "uoptions.h"
30 #include "unicode/udata.h"
31 #include "unewdata.h"
32 #include "uparse.h"
33 #include "ucm.h"
34 #include "makeconv.h"
35 #include "genmbcs.h"
36
37 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
38
39 #define DEBUG 0
40
41 typedef struct ConvData {
42 UCMFile *ucm;
43 NewConverter *cnvData, *extData;
44 UConverterSharedData sharedData;
45 UConverterStaticData staticData;
46 } ConvData;
47
48 static void
49 initConvData(ConvData *data) {
50 uprv_memset(data, 0, sizeof(ConvData));
51 data->sharedData.structSize=sizeof(UConverterSharedData);
52 data->staticData.structSize=sizeof(UConverterStaticData);
53 data->sharedData.staticData=&data->staticData;
54 }
55
56 static void
57 cleanupConvData(ConvData *data) {
58 if(data!=NULL) {
59 if(data->cnvData!=NULL) {
60 data->cnvData->close(data->cnvData);
61 data->cnvData=NULL;
62 }
63 if(data->extData!=NULL) {
64 data->extData->close(data->extData);
65 data->extData=NULL;
66 }
67 ucm_close(data->ucm);
68 data->ucm=NULL;
69 }
70 }
71
72 /*
73 * from ucnvstat.c - static prototypes of data-based converters
74 */
75 extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
76
77 /*
78 * Global - verbosity
79 */
80 UBool VERBOSE = FALSE;
81 UBool SMALL = FALSE;
82 UBool IGNORE_SISO_CHECK = FALSE;
83
84 static void
85 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
86
87 /*
88 * Set up the UNewData and write the converter..
89 */
90 static void
91 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
92
93 UBool haveCopyright=TRUE;
94
95 static UDataInfo dataInfo={
96 sizeof(UDataInfo),
97 0,
98
99 U_IS_BIG_ENDIAN,
100 U_CHARSET_FAMILY,
101 sizeof(UChar),
102 0,
103
104 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */
105 {6, 2, 0, 0}, /* formatVersion */
106 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */
107 };
108
109 static void
110 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
111 {
112 UNewDataMemory *mem = NULL;
113 uint32_t sz2;
114 uint32_t size = 0;
115 int32_t tableType;
116
117 if(U_FAILURE(*status))
118 {
119 return;
120 }
121
122 tableType=TABLE_NONE;
123 if(data->cnvData!=NULL) {
124 tableType|=TABLE_BASE;
125 }
126 if(data->extData!=NULL) {
127 tableType|=TABLE_EXT;
128 }
129
130 mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
131
132 if(U_FAILURE(*status))
133 {
134 fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
135 cnvName,
136 "cnv",
137 u_errorName(*status));
138 return;
139 }
140
141 if(VERBOSE)
142 {
143 printf("- Opened udata %s.%s\n", cnvName, "cnv");
144 }
145
146
147 /* all read only, clean, platform independent data. Mmmm. :) */
148 udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
149 size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */
150 /* Now, write the table */
151 if(tableType&TABLE_BASE) {
152 size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
153 }
154 if(tableType&TABLE_EXT) {
155 size += data->extData->write(data->extData, &data->staticData, mem, tableType);
156 }
157
158 sz2 = udata_finish(mem, status);
159 if(size != sz2)
160 {
161 fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
162 *status=U_INTERNAL_PROGRAM_ERROR;
163 }
164 if(VERBOSE)
165 {
166 printf("- Wrote %u bytes to the udata.\n", (int)sz2);
167 }
168 }
169
170 enum {
171 OPT_HELP_H,
172 OPT_HELP_QUESTION_MARK,
173 OPT_COPYRIGHT,
174 OPT_VERSION,
175 OPT_DESTDIR,
176 OPT_VERBOSE,
177 OPT_SMALL,
178 OPT_IGNORE_SISO_CHECK,
179 OPT_COUNT
180 };
181
182 static UOption options[]={
183 UOPTION_HELP_H,
184 UOPTION_HELP_QUESTION_MARK,
185 UOPTION_COPYRIGHT,
186 UOPTION_VERSION,
187 UOPTION_DESTDIR,
188 UOPTION_VERBOSE,
189 { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
190 { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }
191 };
192
193 int main(int argc, char* argv[])
194 {
195 ConvData data;
196 UErrorCode err = U_ZERO_ERROR, localError;
197 char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
198 const char* destdir, *arg;
199 size_t destdirlen;
200 char* dot = NULL, *outBasename;
201 char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
202 char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH];
203 UVersionInfo icuVersion;
204 UBool printFilename;
205
206 err = U_ZERO_ERROR;
207
208 U_MAIN_INIT_ARGS(argc, argv);
209
210 /* Set up the ICU version number */
211 u_getVersion(icuVersion);
212 uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
213
214 /* preset then read command line options */
215 options[OPT_DESTDIR].value=u_getDataDirectory();
216 argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
217
218 /* error handling, printing usage message */
219 if(argc<0) {
220 fprintf(stderr,
221 "error in command line argument \"%s\"\n",
222 argv[-argc]);
223 } else if(argc<2) {
224 argc=-1;
225 }
226 if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
227 FILE *stdfile=argc<0 ? stderr : stdout;
228 fprintf(stdfile,
229 "usage: %s [-options] files...\n"
230 "\tread .ucm codepage mapping files and write .cnv files\n"
231 "options:\n"
232 "\t-h or -? or --help this usage text\n"
233 "\t-V or --version show a version message\n"
234 "\t-c or --copyright include a copyright notice\n"
235 "\t-d or --destdir destination directory, followed by the path\n"
236 "\t-v or --verbose Turn on verbose output\n",
237 argv[0]);
238 fprintf(stdfile,
239 "\t --small Generate smaller .cnv files. They will be\n"
240 "\t significantly smaller but may not be compatible with\n"
241 "\t older versions of ICU and will require heap memory\n"
242 "\t allocation when loaded.\n"
243 "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n");
244 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
245 }
246
247 if(options[OPT_VERSION].doesOccur) {
248 printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
249 dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
250 printf("%s\n", U_COPYRIGHT_STRING);
251 exit(0);
252 }
253
254 /* get the options values */
255 haveCopyright = options[OPT_COPYRIGHT].doesOccur;
256 destdir = options[OPT_DESTDIR].value;
257 VERBOSE = options[OPT_VERBOSE].doesOccur;
258 SMALL = options[OPT_SMALL].doesOccur;
259
260 if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
261 IGNORE_SISO_CHECK = TRUE;
262 }
263
264 if (destdir != NULL && *destdir != 0) {
265 uprv_strcpy(outFileName, destdir);
266 destdirlen = uprv_strlen(destdir);
267 outBasename = outFileName + destdirlen;
268 if (*(outBasename - 1) != U_FILE_SEP_CHAR) {
269 *outBasename++ = U_FILE_SEP_CHAR;
270 ++destdirlen;
271 }
272 } else {
273 destdirlen = 0;
274 outBasename = outFileName;
275 }
276
277 #if DEBUG
278 {
279 int i;
280 printf("makeconv: processing %d files...\n", argc - 1);
281 for(i=1; i<argc; ++i) {
282 printf("%s ", argv[i]);
283 }
284 printf("\n");
285 fflush(stdout);
286 }
287 #endif
288
289 err = U_ZERO_ERROR;
290 printFilename = (UBool) (argc > 2 || VERBOSE);
291 for (++argv; --argc; ++argv)
292 {
293 arg = getLongPathname(*argv);
294
295 /* Check for potential buffer overflow */
296 if(strlen(arg) >= UCNV_MAX_FULL_FILE_NAME_LENGTH)
297 {
298 fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR));
299 return U_BUFFER_OVERFLOW_ERROR;
300 }
301
302 /*produces the right destination path for display*/
303 if (destdirlen != 0)
304 {
305 const char *basename;
306
307 /* find the last file sepator */
308 basename = findBasename(arg);
309 uprv_strcpy(outBasename, basename);
310 }
311 else
312 {
313 uprv_strcpy(outFileName, arg);
314 }
315
316 /*removes the extension if any is found*/
317 dot = uprv_strrchr(outBasename, '.');
318 if (dot)
319 {
320 *dot = '\0';
321 }
322
323 /* the basename without extension is the converter name */
324 uprv_strcpy(cnvName, outBasename);
325
326 /*Adds the target extension*/
327 uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION);
328
329 #if DEBUG
330 printf("makeconv: processing %s ...\n", arg);
331 fflush(stdout);
332 #endif
333 localError = U_ZERO_ERROR;
334 initConvData(&data);
335 createConverter(&data, arg, &localError);
336
337 if (U_FAILURE(localError))
338 {
339 /* if an error is found, print out an error msg and keep going */
340 fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
341 u_errorName(localError));
342 if(U_SUCCESS(err)) {
343 err = localError;
344 }
345 }
346 else
347 {
348 /* Insure the static data name matches the file name */
349 /* Changed to ignore directory and only compare base name
350 LDH 1/2/08*/
351 char *p;
352 p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
353
354 if(p == NULL) /* OK, try alternate */
355 {
356 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
357 if(p == NULL)
358 {
359 p=cnvName; /* If no separators, no problem */
360 }
361 }
362 else
363 {
364 p++; /* If found separtor, don't include it in compare */
365 }
366 if(uprv_stricmp(p,data.staticData.name))
367 {
368 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
369 cnvName, CONVERTER_FILE_EXTENSION,
370 data.staticData.name);
371 }
372
373 uprv_strcpy((char*)data.staticData.name, cnvName);
374
375 if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
376 fprintf(stderr,
377 "Error: A converter name must contain only invariant characters.\n"
378 "%s is not a valid converter name.\n",
379 data.staticData.name);
380 if(U_SUCCESS(err)) {
381 err = U_INVALID_TABLE_FORMAT;
382 }
383 }
384
385 uprv_strcpy(cnvNameWithPkg, cnvName);
386
387 localError = U_ZERO_ERROR;
388 writeConverterData(&data, cnvNameWithPkg, destdir, &localError);
389
390 if(U_FAILURE(localError))
391 {
392 /* if an error is found, print out an error msg and keep going*/
393 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
394 u_errorName(localError));
395 if(U_SUCCESS(err)) {
396 err = localError;
397 }
398 }
399 else if (printFilename)
400 {
401 puts(outBasename);
402 }
403 }
404 fflush(stdout);
405 fflush(stderr);
406
407 cleanupConvData(&data);
408 }
409
410 return err;
411 }
412
413 static void
414 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
415 if( (name[0]=='i' || name[0]=='I') &&
416 (name[1]=='b' || name[1]=='B') &&
417 (name[2]=='m' || name[2]=='M')
418 ) {
419 name+=3;
420 if(*name=='-') {
421 ++name;
422 }
423 *pPlatform=UCNV_IBM;
424 *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
425 } else {
426 *pPlatform=UCNV_UNKNOWN;
427 *pCCSID=0;
428 }
429 }
430
431 static void
432 readHeader(ConvData *data,
433 FileStream* convFile,
434 const char* converterName,
435 UErrorCode *pErrorCode) {
436 char line[1024];
437 char *s, *key, *value;
438 const UConverterStaticData *prototype;
439 UConverterStaticData *staticData;
440
441 if(U_FAILURE(*pErrorCode)) {
442 return;
443 }
444
445 staticData=&data->staticData;
446 staticData->platform=UCNV_IBM;
447 staticData->subCharLen=0;
448
449 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
450 /* basic parsing and handling of state-related items */
451 if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
452 continue;
453 }
454
455 /* stop at the beginning of the mapping section */
456 if(uprv_strcmp(line, "CHARMAP")==0) {
457 break;
458 }
459
460 /* collect the information from the header field, ignore unknown keys */
461 if(uprv_strcmp(key, "code_set_name")==0) {
462 if(*value!=0) {
463 uprv_strcpy((char *)staticData->name, value);
464 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
465 }
466 } else if(uprv_strcmp(key, "subchar")==0) {
467 uint8_t bytes[UCNV_EXT_MAX_BYTES];
468 int8_t length;
469
470 s=value;
471 length=ucm_parseBytes(bytes, line, (const char **)&s);
472 if(1<=length && length<=4 && *s==0) {
473 staticData->subCharLen=length;
474 uprv_memcpy(staticData->subChar, bytes, length);
475 } else {
476 fprintf(stderr, "error: illegal <subchar> %s\n", value);
477 *pErrorCode=U_INVALID_TABLE_FORMAT;
478 return;
479 }
480 } else if(uprv_strcmp(key, "subchar1")==0) {
481 uint8_t bytes[UCNV_EXT_MAX_BYTES];
482
483 s=value;
484 if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
485 staticData->subChar1=bytes[0];
486 } else {
487 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
488 *pErrorCode=U_INVALID_TABLE_FORMAT;
489 return;
490 }
491 }
492 }
493
494 /* copy values from the UCMFile to the static data */
495 staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
496 staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
497 staticData->conversionType=data->ucm->states.conversionType;
498
499 if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
500 fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
501 *pErrorCode=U_INVALID_TABLE_FORMAT;
502 return;
503 }
504
505 /*
506 * Now that we know the type, copy any 'default' values from the table.
507 * We need not check the type any further because the parser only
508 * recognizes what we have prototypes for.
509 *
510 * For delta (extension-only) tables, copy values from the base file
511 * instead, see createConverter().
512 */
513 if(data->ucm->baseName[0]==0) {
514 prototype=ucnv_converterStaticData[staticData->conversionType];
515 if(prototype!=NULL) {
516 if(staticData->name[0]==0) {
517 uprv_strcpy((char *)staticData->name, prototype->name);
518 }
519
520 if(staticData->codepage==0) {
521 staticData->codepage=prototype->codepage;
522 }
523
524 if(staticData->platform==0) {
525 staticData->platform=prototype->platform;
526 }
527
528 if(staticData->minBytesPerChar==0) {
529 staticData->minBytesPerChar=prototype->minBytesPerChar;
530 }
531
532 if(staticData->maxBytesPerChar==0) {
533 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
534 }
535
536 if(staticData->subCharLen==0) {
537 staticData->subCharLen=prototype->subCharLen;
538 if(prototype->subCharLen>0) {
539 uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
540 }
541 }
542 }
543 }
544
545 if(data->ucm->states.outputType<0) {
546 data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
547 }
548
549 if( staticData->subChar1!=0 &&
550 (staticData->minBytesPerChar>1 ||
551 (staticData->conversionType!=UCNV_MBCS &&
552 staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
553 ) {
554 fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
555 *pErrorCode=U_INVALID_TABLE_FORMAT;
556 }
557 }
558
559 /* return TRUE if a base table was read, FALSE for an extension table */
560 static UBool
561 readFile(ConvData *data, const char* converterName,
562 UErrorCode *pErrorCode) {
563 char line[1024];
564 char *end;
565 FileStream *convFile;
566
567 UCMStates *baseStates;
568 UBool dataIsBase;
569
570 if(U_FAILURE(*pErrorCode)) {
571 return FALSE;
572 }
573
574 data->ucm=ucm_open();
575
576 convFile=T_FileStream_open(converterName, "r");
577 if(convFile==NULL) {
578 *pErrorCode=U_FILE_ACCESS_ERROR;
579 return FALSE;
580 }
581
582 readHeader(data, convFile, converterName, pErrorCode);
583 if(U_FAILURE(*pErrorCode)) {
584 return FALSE;
585 }
586
587 if(data->ucm->baseName[0]==0) {
588 dataIsBase=TRUE;
589 baseStates=&data->ucm->states;
590 ucm_processStates(baseStates, IGNORE_SISO_CHECK);
591 } else {
592 dataIsBase=FALSE;
593 baseStates=NULL;
594 }
595
596 /* read the base table */
597 ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
598 if(U_FAILURE(*pErrorCode)) {
599 return FALSE;
600 }
601
602 /* read an extension table if there is one */
603 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
604 end=uprv_strchr(line, 0);
605 while(line<end &&
606 (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
607 --end;
608 }
609 *end=0;
610
611 if(line[0]=='#' || u_skipWhitespace(line)==end) {
612 continue; /* ignore empty and comment lines */
613 }
614
615 if(0==uprv_strcmp(line, "CHARMAP")) {
616 /* read the extension table */
617 ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
618 } else {
619 fprintf(stderr, "unexpected text after the base mapping table\n");
620 }
621 break;
622 }
623
624 T_FileStream_close(convFile);
625
626 if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
627 fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
628 *pErrorCode=U_INVALID_TABLE_FORMAT;
629 }
630
631 return dataIsBase;
632 }
633
634 static void
635 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
636 ConvData baseData;
637 UBool dataIsBase;
638
639 UConverterStaticData *staticData;
640 UCMStates *states, *baseStates;
641
642 if(U_FAILURE(*pErrorCode)) {
643 return;
644 }
645
646 initConvData(data);
647
648 dataIsBase=readFile(data, converterName, pErrorCode);
649 if(U_FAILURE(*pErrorCode)) {
650 return;
651 }
652
653 staticData=&data->staticData;
654 states=&data->ucm->states;
655
656 if(dataIsBase) {
657 /*
658 * Build a normal .cnv file with a base table
659 * and an optional extension table.
660 */
661 data->cnvData=MBCSOpen(data->ucm);
662 if(data->cnvData==NULL) {
663 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
664
665 } else if(!data->cnvData->isValid(data->cnvData,
666 staticData->subChar, staticData->subCharLen)
667 ) {
668 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
669 *pErrorCode=U_INVALID_TABLE_FORMAT;
670
671 } else if(staticData->subChar1!=0 &&
672 !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
673 ) {
674 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
675 *pErrorCode=U_INVALID_TABLE_FORMAT;
676
677 } else if(
678 data->ucm->ext->mappingsLength>0 &&
679 !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
680 ) {
681 *pErrorCode=U_INVALID_TABLE_FORMAT;
682 } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
683 /* sort the table so that it can be turned into UTF-8-friendly data */
684 ucm_sortTable(data->ucm->base);
685 }
686
687 if(U_SUCCESS(*pErrorCode)) {
688 if(
689 /* add the base table after ucm_checkBaseExt()! */
690 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
691 ) {
692 *pErrorCode=U_INVALID_TABLE_FORMAT;
693 } else {
694 /*
695 * addTable() may have requested moving more mappings to the extension table
696 * if they fit into the base toUnicode table but not into the
697 * base fromUnicode table.
698 * (Especially for UTF-8-friendly fromUnicode tables.)
699 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
700 * to be excluded from the extension toUnicode data.
701 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
702 * the base fromUnicode table.
703 */
704 ucm_moveMappings(data->ucm->base, data->ucm->ext);
705 ucm_sortTable(data->ucm->ext);
706 if(data->ucm->ext->mappingsLength>0) {
707 /* prepare the extension table, if there is one */
708 data->extData=CnvExtOpen(data->ucm);
709 if(data->extData==NULL) {
710 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
711 } else if(
712 !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
713 ) {
714 *pErrorCode=U_INVALID_TABLE_FORMAT;
715 }
716 }
717 }
718 }
719 } else {
720 /* Build an extension-only .cnv file. */
721 char baseFilename[500];
722 char *basename;
723
724 initConvData(&baseData);
725
726 /* assemble a path/filename for data->ucm->baseName */
727 uprv_strcpy(baseFilename, converterName);
728 basename=(char *)findBasename(baseFilename);
729 uprv_strcpy(basename, data->ucm->baseName);
730 uprv_strcat(basename, ".ucm");
731
732 /* read the base table */
733 dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
734 if(U_FAILURE(*pErrorCode)) {
735 return;
736 } else if(!dataIsBase) {
737 fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
738 *pErrorCode=U_INVALID_TABLE_FORMAT;
739 } else {
740 /* prepare the extension table */
741 data->extData=CnvExtOpen(data->ucm);
742 if(data->extData==NULL) {
743 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
744 } else {
745 /* fill in gaps in extension file header fields */
746 UCMapping *m, *mLimit;
747 uint8_t fallbackFlags;
748
749 baseStates=&baseData.ucm->states;
750 if(states->conversionType==UCNV_DBCS) {
751 staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
752 } else if(states->minCharLength==0) {
753 staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
754 }
755 if(states->maxCharLength<states->minCharLength) {
756 staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
757 }
758
759 if(staticData->subCharLen==0) {
760 uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
761 staticData->subCharLen=baseData.staticData.subCharLen;
762 }
763 /*
764 * do not copy subChar1 -
765 * only use what is explicitly specified
766 * because it cannot be unset in the extension file header
767 */
768
769 /* get the fallback flags */
770 fallbackFlags=0;
771 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
772 m<mLimit && fallbackFlags!=3;
773 ++m
774 ) {
775 if(m->f==1) {
776 fallbackFlags|=1;
777 } else if(m->f==3) {
778 fallbackFlags|=2;
779 }
780 }
781
782 if(fallbackFlags&1) {
783 staticData->hasFromUnicodeFallback=TRUE;
784 }
785 if(fallbackFlags&2) {
786 staticData->hasToUnicodeFallback=TRUE;
787 }
788
789 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
790 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
791 *pErrorCode=U_INVALID_TABLE_FORMAT;
792
793 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
794 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
795 *pErrorCode=U_INVALID_TABLE_FORMAT;
796
797 } else if(
798 !ucm_checkValidity(data->ucm->ext, baseStates) ||
799 !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
800 ) {
801 *pErrorCode=U_INVALID_TABLE_FORMAT;
802 } else {
803 if(states->maxCharLength>1) {
804 /*
805 * When building a normal .cnv file with a base table
806 * for an MBCS (not SBCS) table with explicit precision flags,
807 * the MBCSAddTable() function marks some mappings for moving
808 * to the extension table.
809 * They fit into the base toUnicode table but not into the
810 * base fromUnicode table.
811 * (Note: We do have explicit precision flags because they are
812 * required for extension table generation, and
813 * ucm_checkBaseExt() verified it.)
814 *
815 * We do not call MBCSAddTable() here (we probably could)
816 * so we need to do the analysis before building the extension table.
817 * We assume that MBCSAddTable() will build a UTF-8-friendly table.
818 * Redundant mappings in the extension table are ok except they cost some size.
819 *
820 * Do this after ucm_checkBaseExt().
821 */
822 const MBCSData *mbcsData=MBCSGetDummy();
823 int32_t needsMove=0;
824 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
825 m<mLimit;
826 ++m
827 ) {
828 if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
829 m->f|=MBCS_FROM_U_EXT_FLAG;
830 m->moveFlag=UCM_MOVE_TO_EXT;
831 ++needsMove;
832 }
833 }
834
835 if(needsMove!=0) {
836 ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
837 ucm_sortTable(data->ucm->ext);
838 }
839 }
840 if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
841 *pErrorCode=U_INVALID_TABLE_FORMAT;
842 }
843 }
844 }
845 }
846
847 cleanupConvData(&baseData);
848 }
849 }
850
851 /*
852 * Hey, Emacs, please set the following:
853 *
854 * Local Variables:
855 * indent-tabs-mode: nil
856 * End:
857 *
858 */