1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ********************************************************************************
6 * Copyright (C) 1998-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 ********************************************************************************
13 * tool creating a binary (compressed) representation of the conversion mapping
14 * table (IBM NLTC ucmap format).
16 * 05/04/2000 helena Added fallback mapping into the picture...
17 * 06/29/2000 helena Major rewrite of the callback APIs.
21 #include "unicode/putil.h"
22 #include "unicode/ucnv_err.h"
33 #include "unicode/udata.h"
42 typedef struct ConvData
{
44 NewConverter
*cnvData
, *extData
;
45 UConverterSharedData sharedData
;
46 UConverterStaticData staticData
;
50 initConvData(ConvData
*data
) {
51 uprv_memset(data
, 0, sizeof(ConvData
));
52 data
->sharedData
.structSize
=sizeof(UConverterSharedData
);
53 data
->staticData
.structSize
=sizeof(UConverterStaticData
);
54 data
->sharedData
.staticData
=&data
->staticData
;
58 cleanupConvData(ConvData
*data
) {
60 if(data
->cnvData
!=NULL
) {
61 data
->cnvData
->close(data
->cnvData
);
64 if(data
->extData
!=NULL
) {
65 data
->extData
->close(data
->extData
);
74 * from ucnvstat.c - static prototypes of data-based converters
76 U_CAPI
const UConverterStaticData
* ucnv_converterStaticData
[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
];
81 UBool VERBOSE
= FALSE
;
84 UBool IGNORE_SISO_CHECK
= FALSE
;
87 createConverter(ConvData
*data
, const char* converterName
, UErrorCode
*pErrorCode
);
90 * Set up the UNewData and write the converter..
93 writeConverterData(ConvData
*data
, const char *cnvName
, const char *cnvDir
, UErrorCode
*status
);
95 UBool haveCopyright
=TRUE
;
97 static UDataInfo dataInfo
={
106 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */
107 {6, 2, 0, 0}, /* formatVersion */
108 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */
112 writeConverterData(ConvData
*data
, const char *cnvName
, const char *cnvDir
, UErrorCode
*status
)
114 UNewDataMemory
*mem
= NULL
;
119 if(U_FAILURE(*status
))
124 tableType
=TABLE_NONE
;
125 if(data
->cnvData
!=NULL
) {
126 tableType
|=TABLE_BASE
;
128 if(data
->extData
!=NULL
) {
129 tableType
|=TABLE_EXT
;
132 mem
= udata_create(cnvDir
, "cnv", cnvName
, &dataInfo
, haveCopyright
? U_COPYRIGHT_STRING
: NULL
, status
);
134 if(U_FAILURE(*status
))
136 fprintf(stderr
, "Couldn't create the udata %s.%s: %s\n",
139 u_errorName(*status
));
145 printf("- Opened udata %s.%s\n", cnvName
, "cnv");
149 /* all read only, clean, platform independent data. Mmmm. :) */
150 udata_writeBlock(mem
, &data
->staticData
, sizeof(UConverterStaticData
));
151 size
+= sizeof(UConverterStaticData
); /* Is 4-aligned - by size */
152 /* Now, write the table */
153 if(tableType
&TABLE_BASE
) {
154 size
+= data
->cnvData
->write(data
->cnvData
, &data
->staticData
, mem
, tableType
);
156 if(tableType
&TABLE_EXT
) {
157 size
+= data
->extData
->write(data
->extData
, &data
->staticData
, mem
, tableType
);
160 sz2
= udata_finish(mem
, status
);
163 fprintf(stderr
, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2
, (int)size
);
164 *status
=U_INTERNAL_PROGRAM_ERROR
;
168 printf("- Wrote %u bytes to the udata.\n", (int)sz2
);
174 OPT_HELP_QUESTION_MARK
,
180 OPT_IGNORE_SISO_CHECK
,
187 static UOption options
[]={
189 UOPTION_HELP_QUESTION_MARK
,
194 { "small", NULL
, NULL
, NULL
, '\1', UOPT_NO_ARG
, 0 },
195 { "ignore-siso-check", NULL
, NULL
, NULL
, '\1', UOPT_NO_ARG
, 0 },
200 int main(int argc
, char* argv
[])
203 char cnvName
[UCNV_MAX_FULL_FILE_NAME_LENGTH
];
205 U_MAIN_INIT_ARGS(argc
, argv
);
207 /* Set up the ICU version number */
208 UVersionInfo icuVersion
;
209 u_getVersion(icuVersion
);
210 uprv_memcpy(&dataInfo
.dataVersion
, &icuVersion
, sizeof(UVersionInfo
));
212 /* preset then read command line options */
213 options
[OPT_DESTDIR
].value
=u_getDataDirectory();
214 argc
=u_parseArgs(argc
, argv
, UPRV_LENGTHOF(options
), options
);
216 /* error handling, printing usage message */
219 "error in command line argument \"%s\"\n",
224 if(argc
<0 || options
[OPT_HELP_H
].doesOccur
|| options
[OPT_HELP_QUESTION_MARK
].doesOccur
) {
225 FILE *stdfile
=argc
<0 ? stderr
: stdout
;
227 "usage: %s [-options] files...\n"
228 "\tread .ucm codepage mapping files and write .cnv files\n"
230 "\t-h or -? or --help this usage text\n"
231 "\t-V or --version show a version message\n"
232 "\t-c or --copyright include a copyright notice\n"
233 "\t-d or --destdir destination directory, followed by the path\n"
234 "\t-v or --verbose Turn on verbose output\n"
235 "\t-q or --quiet do not display warnings and progress\n"
236 "\t-s or --sourcedir source directory, followed by the path\n",
239 "\t --small Generate smaller .cnv files. They will be\n"
240 "\t significantly smaller but may not be compatible with\n"
241 "\t older versions of ICU and will require heap memory\n"
242 "\t allocation when loaded.\n"
243 "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n");
244 return argc
<0 ? U_ILLEGAL_ARGUMENT_ERROR
: U_ZERO_ERROR
;
247 if(options
[OPT_VERSION
].doesOccur
) {
248 printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
249 dataInfo
.formatVersion
[0], dataInfo
.formatVersion
[1]);
250 printf("%s\n", U_COPYRIGHT_STRING
);
254 /* get the options values */
255 haveCopyright
= options
[OPT_COPYRIGHT
].doesOccur
;
256 const char *destdir
= options
[OPT_DESTDIR
].value
;
257 VERBOSE
= options
[OPT_VERBOSE
].doesOccur
;
258 QUIET
= options
[OPT_QUIET
].doesOccur
;
259 SMALL
= options
[OPT_SMALL
].doesOccur
;
261 if (options
[OPT_IGNORE_SISO_CHECK
].doesOccur
) {
262 IGNORE_SISO_CHECK
= TRUE
;
265 icu::CharString outFileName
;
266 UErrorCode err
= U_ZERO_ERROR
;
267 if (destdir
!= NULL
&& *destdir
!= 0) {
268 outFileName
.append(destdir
, err
).ensureEndsWithFileSeparator(err
);
269 if (U_FAILURE(err
)) {
273 int32_t outBasenameStart
= outFileName
.length();
278 printf("makeconv: processing %d files...\n", argc
- 1);
279 for(i
=1; i
<argc
; ++i
) {
280 printf("%s ", argv
[i
]);
287 UBool printFilename
= (UBool
) (argc
> 2 || VERBOSE
);
288 icu::CharString pathBuf
;
289 for (++argv
; --argc
; ++argv
)
291 UErrorCode localError
= U_ZERO_ERROR
;
292 const char *arg
= getLongPathname(*argv
);
294 const char* sourcedir
= options
[OPT_SOURCEDIR
].value
;
295 if (sourcedir
!= NULL
&& *sourcedir
!= 0 && uprv_strcmp(sourcedir
, ".") != 0) {
297 pathBuf
.appendPathPart(sourcedir
, localError
);
298 pathBuf
.appendPathPart(arg
, localError
);
299 arg
= pathBuf
.data();
302 /*produces the right destination path for display*/
303 outFileName
.truncate(outBasenameStart
);
304 if (outBasenameStart
!= 0)
306 /* find the last file sepator */
307 const char *basename
= findBasename(arg
);
308 outFileName
.append(basename
, localError
);
312 outFileName
.append(arg
, localError
);
314 if (U_FAILURE(localError
)) {
318 /*removes the extension if any is found*/
319 int32_t lastDotIndex
= outFileName
.lastIndexOf('.');
320 if (lastDotIndex
>= outBasenameStart
) {
321 outFileName
.truncate(lastDotIndex
);
324 /* the basename without extension is the converter name */
325 if ((outFileName
.length() - outBasenameStart
) >= UPRV_LENGTHOF(cnvName
)) {
326 fprintf(stderr
, "converter name %s too long\n", outFileName
.data() + outBasenameStart
);
327 return U_BUFFER_OVERFLOW_ERROR
;
329 uprv_strcpy(cnvName
, outFileName
.data() + outBasenameStart
);
331 /*Adds the target extension*/
332 outFileName
.append(CONVERTER_FILE_EXTENSION
, localError
);
333 if (U_FAILURE(localError
)) {
338 printf("makeconv: processing %s ...\n", arg
);
342 createConverter(&data
, arg
, &localError
);
344 if (U_FAILURE(localError
))
346 /* if an error is found, print out an error msg and keep going */
347 fprintf(stderr
, "Error creating converter for \"%s\" file for \"%s\" (%s)\n",
348 outFileName
.data(), arg
, u_errorName(localError
));
355 /* Insure the static data name matches the file name */
356 /* Changed to ignore directory and only compare base name
359 p
= strrchr(cnvName
, U_FILE_SEP_CHAR
); /* Find last file separator */
361 if(p
== NULL
) /* OK, try alternate */
363 p
= strrchr(cnvName
, U_FILE_ALT_SEP_CHAR
);
366 p
=cnvName
; /* If no separators, no problem */
371 p
++; /* If found separator, don't include it in compare */
373 if(uprv_stricmp(p
,data
.staticData
.name
) && !QUIET
)
375 fprintf(stderr
, "Warning: %s%s claims to be '%s'\n",
376 cnvName
, CONVERTER_FILE_EXTENSION
,
377 data
.staticData
.name
);
380 uprv_strcpy((char*)data
.staticData
.name
, cnvName
);
382 if(!uprv_isInvariantString((char*)data
.staticData
.name
, -1)) {
384 "Error: A converter name must contain only invariant characters.\n"
385 "%s is not a valid converter name.\n",
386 data
.staticData
.name
);
388 err
= U_INVALID_TABLE_FORMAT
;
392 localError
= U_ZERO_ERROR
;
393 writeConverterData(&data
, cnvName
, destdir
, &localError
);
395 if(U_FAILURE(localError
))
397 /* if an error is found, print out an error msg and keep going*/
398 fprintf(stderr
, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName
.data(), arg
,
399 u_errorName(localError
));
404 else if (printFilename
)
406 puts(outFileName
.data() + outBasenameStart
);
412 cleanupConvData(&data
);
419 getPlatformAndCCSIDFromName(const char *name
, int8_t *pPlatform
, int32_t *pCCSID
) {
420 if( (name
[0]=='i' || name
[0]=='I') &&
421 (name
[1]=='b' || name
[1]=='B') &&
422 (name
[2]=='m' || name
[2]=='M')
429 *pCCSID
=(int32_t)uprv_strtoul(name
, NULL
, 10);
431 *pPlatform
=UCNV_UNKNOWN
;
437 readHeader(ConvData
*data
,
438 FileStream
* convFile
,
439 UErrorCode
*pErrorCode
) {
441 char *s
, *key
, *value
;
442 const UConverterStaticData
*prototype
;
443 UConverterStaticData
*staticData
;
445 if(U_FAILURE(*pErrorCode
)) {
449 staticData
=&data
->staticData
;
450 staticData
->platform
=UCNV_IBM
;
451 staticData
->subCharLen
=0;
453 while(T_FileStream_readLine(convFile
, line
, sizeof(line
))) {
454 /* basic parsing and handling of state-related items */
455 if(ucm_parseHeaderLine(data
->ucm
, line
, &key
, &value
)) {
459 /* stop at the beginning of the mapping section */
460 if(uprv_strcmp(line
, "CHARMAP")==0) {
464 /* collect the information from the header field, ignore unknown keys */
465 if(uprv_strcmp(key
, "code_set_name")==0) {
467 uprv_strcpy((char *)staticData
->name
, value
);
468 getPlatformAndCCSIDFromName(value
, &staticData
->platform
, &staticData
->codepage
);
470 } else if(uprv_strcmp(key
, "subchar")==0) {
471 uint8_t bytes
[UCNV_EXT_MAX_BYTES
];
475 length
=ucm_parseBytes(bytes
, line
, (const char **)&s
);
476 if(1<=length
&& length
<=4 && *s
==0) {
477 staticData
->subCharLen
=length
;
478 uprv_memcpy(staticData
->subChar
, bytes
, length
);
480 fprintf(stderr
, "error: illegal <subchar> %s\n", value
);
481 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
484 } else if(uprv_strcmp(key
, "subchar1")==0) {
485 uint8_t bytes
[UCNV_EXT_MAX_BYTES
];
488 if(1==ucm_parseBytes(bytes
, line
, (const char **)&s
) && *s
==0) {
489 staticData
->subChar1
=bytes
[0];
491 fprintf(stderr
, "error: illegal <subchar1> %s\n", value
);
492 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
498 /* copy values from the UCMFile to the static data */
499 staticData
->maxBytesPerChar
=(int8_t)data
->ucm
->states
.maxCharLength
;
500 staticData
->minBytesPerChar
=(int8_t)data
->ucm
->states
.minCharLength
;
501 staticData
->conversionType
=data
->ucm
->states
.conversionType
;
503 if(staticData
->conversionType
==UCNV_UNSUPPORTED_CONVERTER
) {
504 fprintf(stderr
, "ucm error: missing conversion type (<uconv_class>)\n");
505 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
510 * Now that we know the type, copy any 'default' values from the table.
511 * We need not check the type any further because the parser only
512 * recognizes what we have prototypes for.
514 * For delta (extension-only) tables, copy values from the base file
515 * instead, see createConverter().
517 if(data
->ucm
->baseName
[0]==0) {
518 prototype
=ucnv_converterStaticData
[staticData
->conversionType
];
519 if(prototype
!=NULL
) {
520 if(staticData
->name
[0]==0) {
521 uprv_strcpy((char *)staticData
->name
, prototype
->name
);
524 if(staticData
->codepage
==0) {
525 staticData
->codepage
=prototype
->codepage
;
528 if(staticData
->platform
==0) {
529 staticData
->platform
=prototype
->platform
;
532 if(staticData
->minBytesPerChar
==0) {
533 staticData
->minBytesPerChar
=prototype
->minBytesPerChar
;
536 if(staticData
->maxBytesPerChar
==0) {
537 staticData
->maxBytesPerChar
=prototype
->maxBytesPerChar
;
540 if(staticData
->subCharLen
==0) {
541 staticData
->subCharLen
=prototype
->subCharLen
;
542 if(prototype
->subCharLen
>0) {
543 uprv_memcpy(staticData
->subChar
, prototype
->subChar
, prototype
->subCharLen
);
549 if(data
->ucm
->states
.outputType
<0) {
550 data
->ucm
->states
.outputType
=(int8_t)data
->ucm
->states
.maxCharLength
-1;
553 if( staticData
->subChar1
!=0 &&
554 (staticData
->minBytesPerChar
>1 ||
555 (staticData
->conversionType
!=UCNV_MBCS
&&
556 staticData
->conversionType
!=UCNV_EBCDIC_STATEFUL
))
558 fprintf(stderr
, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
559 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
563 /* return TRUE if a base table was read, FALSE for an extension table */
565 readFile(ConvData
*data
, const char* converterName
,
566 UErrorCode
*pErrorCode
) {
569 FileStream
*convFile
;
571 UCMStates
*baseStates
;
574 if(U_FAILURE(*pErrorCode
)) {
578 data
->ucm
=ucm_open();
580 convFile
=T_FileStream_open(converterName
, "r");
582 *pErrorCode
=U_FILE_ACCESS_ERROR
;
586 readHeader(data
, convFile
, pErrorCode
);
587 if(U_FAILURE(*pErrorCode
)) {
591 if(data
->ucm
->baseName
[0]==0) {
593 baseStates
=&data
->ucm
->states
;
594 ucm_processStates(baseStates
, IGNORE_SISO_CHECK
);
600 /* read the base table */
601 ucm_readTable(data
->ucm
, convFile
, dataIsBase
, baseStates
, pErrorCode
);
602 if(U_FAILURE(*pErrorCode
)) {
606 /* read an extension table if there is one */
607 while(T_FileStream_readLine(convFile
, line
, sizeof(line
))) {
608 end
=uprv_strchr(line
, 0);
610 (*(end
-1)=='\n' || *(end
-1)=='\r' || *(end
-1)==' ' || *(end
-1)=='\t')) {
615 if(line
[0]=='#' || u_skipWhitespace(line
)==end
) {
616 continue; /* ignore empty and comment lines */
619 if(0==uprv_strcmp(line
, "CHARMAP")) {
620 /* read the extension table */
621 ucm_readTable(data
->ucm
, convFile
, FALSE
, baseStates
, pErrorCode
);
623 fprintf(stderr
, "unexpected text after the base mapping table\n");
628 T_FileStream_close(convFile
);
630 if(data
->ucm
->base
->flagsType
==UCM_FLAGS_MIXED
|| data
->ucm
->ext
->flagsType
==UCM_FLAGS_MIXED
) {
631 fprintf(stderr
, "error: some entries have the mapping precision (with '|'), some do not\n");
632 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
639 createConverter(ConvData
*data
, const char *converterName
, UErrorCode
*pErrorCode
) {
643 UConverterStaticData
*staticData
;
644 UCMStates
*states
, *baseStates
;
646 if(U_FAILURE(*pErrorCode
)) {
652 dataIsBase
=readFile(data
, converterName
, pErrorCode
);
653 if(U_FAILURE(*pErrorCode
)) {
657 staticData
=&data
->staticData
;
658 states
=&data
->ucm
->states
;
662 * Build a normal .cnv file with a base table
663 * and an optional extension table.
665 data
->cnvData
=MBCSOpen(data
->ucm
);
666 if(data
->cnvData
==NULL
) {
667 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
669 } else if(!data
->cnvData
->isValid(data
->cnvData
,
670 staticData
->subChar
, staticData
->subCharLen
)
672 fprintf(stderr
, " the substitution character byte sequence is illegal in this codepage structure!\n");
673 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
675 } else if(staticData
->subChar1
!=0 &&
676 !data
->cnvData
->isValid(data
->cnvData
, &staticData
->subChar1
, 1)
678 fprintf(stderr
, " the subchar1 byte is illegal in this codepage structure!\n");
679 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
682 data
->ucm
->ext
->mappingsLength
>0 &&
683 !ucm_checkBaseExt(states
, data
->ucm
->base
, data
->ucm
->ext
, data
->ucm
->ext
, FALSE
)
685 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
686 } else if(data
->ucm
->base
->flagsType
&UCM_FLAGS_EXPLICIT
) {
687 /* sort the table so that it can be turned into UTF-8-friendly data */
688 ucm_sortTable(data
->ucm
->base
);
691 if(U_SUCCESS(*pErrorCode
)) {
693 /* add the base table after ucm_checkBaseExt()! */
694 !data
->cnvData
->addTable(data
->cnvData
, data
->ucm
->base
, &data
->staticData
)
696 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
699 * addTable() may have requested moving more mappings to the extension table
700 * if they fit into the base toUnicode table but not into the
701 * base fromUnicode table.
702 * (Especially for UTF-8-friendly fromUnicode tables.)
703 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
704 * to be excluded from the extension toUnicode data.
705 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
706 * the base fromUnicode table.
708 ucm_moveMappings(data
->ucm
->base
, data
->ucm
->ext
);
709 ucm_sortTable(data
->ucm
->ext
);
710 if(data
->ucm
->ext
->mappingsLength
>0) {
711 /* prepare the extension table, if there is one */
712 data
->extData
=CnvExtOpen(data
->ucm
);
713 if(data
->extData
==NULL
) {
714 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
716 !data
->extData
->addTable(data
->extData
, data
->ucm
->ext
, &data
->staticData
)
718 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
724 /* Build an extension-only .cnv file. */
725 char baseFilename
[500];
728 initConvData(&baseData
);
730 /* assemble a path/filename for data->ucm->baseName */
731 uprv_strcpy(baseFilename
, converterName
);
732 basename
=(char *)findBasename(baseFilename
);
733 uprv_strcpy(basename
, data
->ucm
->baseName
);
734 uprv_strcat(basename
, ".ucm");
736 /* read the base table */
737 dataIsBase
=readFile(&baseData
, baseFilename
, pErrorCode
);
738 if(U_FAILURE(*pErrorCode
)) {
740 } else if(!dataIsBase
) {
741 fprintf(stderr
, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename
);
742 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
744 /* prepare the extension table */
745 data
->extData
=CnvExtOpen(data
->ucm
);
746 if(data
->extData
==NULL
) {
747 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
749 /* fill in gaps in extension file header fields */
750 UCMapping
*m
, *mLimit
;
751 uint8_t fallbackFlags
;
753 baseStates
=&baseData
.ucm
->states
;
754 if(states
->conversionType
==UCNV_DBCS
) {
755 staticData
->minBytesPerChar
=(int8_t)(states
->minCharLength
=2);
756 } else if(states
->minCharLength
==0) {
757 staticData
->minBytesPerChar
=(int8_t)(states
->minCharLength
=baseStates
->minCharLength
);
759 if(states
->maxCharLength
<states
->minCharLength
) {
760 staticData
->maxBytesPerChar
=(int8_t)(states
->maxCharLength
=baseStates
->maxCharLength
);
763 if(staticData
->subCharLen
==0) {
764 uprv_memcpy(staticData
->subChar
, baseData
.staticData
.subChar
, 4);
765 staticData
->subCharLen
=baseData
.staticData
.subCharLen
;
768 * do not copy subChar1 -
769 * only use what is explicitly specified
770 * because it cannot be unset in the extension file header
773 /* get the fallback flags */
775 for(m
=baseData
.ucm
->base
->mappings
, mLimit
=m
+baseData
.ucm
->base
->mappingsLength
;
776 m
<mLimit
&& fallbackFlags
!=3;
786 if(fallbackFlags
&1) {
787 staticData
->hasFromUnicodeFallback
=TRUE
;
789 if(fallbackFlags
&2) {
790 staticData
->hasToUnicodeFallback
=TRUE
;
793 if(1!=ucm_countChars(baseStates
, staticData
->subChar
, staticData
->subCharLen
)) {
794 fprintf(stderr
, " the substitution character byte sequence is illegal in this codepage structure!\n");
795 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
797 } else if(staticData
->subChar1
!=0 && 1!=ucm_countChars(baseStates
, &staticData
->subChar1
, 1)) {
798 fprintf(stderr
, " the subchar1 byte is illegal in this codepage structure!\n");
799 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
802 !ucm_checkValidity(data
->ucm
->ext
, baseStates
) ||
803 !ucm_checkBaseExt(baseStates
, baseData
.ucm
->base
, data
->ucm
->ext
, data
->ucm
->ext
, FALSE
)
805 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
807 if(states
->maxCharLength
>1) {
809 * When building a normal .cnv file with a base table
810 * for an MBCS (not SBCS) table with explicit precision flags,
811 * the MBCSAddTable() function marks some mappings for moving
812 * to the extension table.
813 * They fit into the base toUnicode table but not into the
814 * base fromUnicode table.
815 * (Note: We do have explicit precision flags because they are
816 * required for extension table generation, and
817 * ucm_checkBaseExt() verified it.)
819 * We do not call MBCSAddTable() here (we probably could)
820 * so we need to do the analysis before building the extension table.
821 * We assume that MBCSAddTable() will build a UTF-8-friendly table.
822 * Redundant mappings in the extension table are ok except they cost some size.
824 * Do this after ucm_checkBaseExt().
826 const MBCSData
*mbcsData
=MBCSGetDummy();
828 for(m
=baseData
.ucm
->base
->mappings
, mLimit
=m
+baseData
.ucm
->base
->mappingsLength
;
832 if(!MBCSOkForBaseFromUnicode(mbcsData
, m
->b
.bytes
, m
->bLen
, m
->u
, m
->f
)) {
833 m
->f
|=MBCS_FROM_U_EXT_FLAG
;
834 m
->moveFlag
=UCM_MOVE_TO_EXT
;
840 ucm_moveMappings(baseData
.ucm
->base
, data
->ucm
->ext
);
841 ucm_sortTable(data
->ucm
->ext
);
844 if(!data
->extData
->addTable(data
->extData
, data
->ucm
->ext
, &data
->staticData
)) {
845 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
851 cleanupConvData(&baseData
);
856 * Hey, Emacs, please set the following:
859 * indent-tabs-mode: nil