2 ********************************************************************************
4 * Copyright (C) 1998-2012, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ********************************************************************************
11 * tool creating a binary (compressed) representation of the conversion mapping
12 * table (IBM NLTC ucmap format).
14 * 05/04/2000 helena Added fallback mapping into the picture...
15 * 06/29/2000 helena Major rewrite of the callback APIs.
19 #include "unicode/putil.h"
20 #include "unicode/ucnv_err.h"
30 #include "unicode/udata.h"
37 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
41 typedef struct ConvData
{
43 NewConverter
*cnvData
, *extData
;
44 UConverterSharedData sharedData
;
45 UConverterStaticData staticData
;
49 initConvData(ConvData
*data
) {
50 uprv_memset(data
, 0, sizeof(ConvData
));
51 data
->sharedData
.structSize
=sizeof(UConverterSharedData
);
52 data
->staticData
.structSize
=sizeof(UConverterStaticData
);
53 data
->sharedData
.staticData
=&data
->staticData
;
57 cleanupConvData(ConvData
*data
) {
59 if(data
->cnvData
!=NULL
) {
60 data
->cnvData
->close(data
->cnvData
);
63 if(data
->extData
!=NULL
) {
64 data
->extData
->close(data
->extData
);
73 * from ucnvstat.c - static prototypes of data-based converters
75 extern const UConverterStaticData
* ucnv_converterStaticData
[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
];
80 UBool VERBOSE
= FALSE
;
82 UBool IGNORE_SISO_CHECK
= FALSE
;
85 createConverter(ConvData
*data
, const char* converterName
, UErrorCode
*pErrorCode
);
88 * Set up the UNewData and write the converter..
91 writeConverterData(ConvData
*data
, const char *cnvName
, const char *cnvDir
, UErrorCode
*status
);
93 UBool haveCopyright
=TRUE
;
95 static UDataInfo dataInfo
={
104 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */
105 {6, 2, 0, 0}, /* formatVersion */
106 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */
110 writeConverterData(ConvData
*data
, const char *cnvName
, const char *cnvDir
, UErrorCode
*status
)
112 UNewDataMemory
*mem
= NULL
;
117 if(U_FAILURE(*status
))
122 tableType
=TABLE_NONE
;
123 if(data
->cnvData
!=NULL
) {
124 tableType
|=TABLE_BASE
;
126 if(data
->extData
!=NULL
) {
127 tableType
|=TABLE_EXT
;
130 mem
= udata_create(cnvDir
, "cnv", cnvName
, &dataInfo
, haveCopyright
? U_COPYRIGHT_STRING
: NULL
, status
);
132 if(U_FAILURE(*status
))
134 fprintf(stderr
, "Couldn't create the udata %s.%s: %s\n",
137 u_errorName(*status
));
143 printf("- Opened udata %s.%s\n", cnvName
, "cnv");
147 /* all read only, clean, platform independent data. Mmmm. :) */
148 udata_writeBlock(mem
, &data
->staticData
, sizeof(UConverterStaticData
));
149 size
+= sizeof(UConverterStaticData
); /* Is 4-aligned - by size */
150 /* Now, write the table */
151 if(tableType
&TABLE_BASE
) {
152 size
+= data
->cnvData
->write(data
->cnvData
, &data
->staticData
, mem
, tableType
);
154 if(tableType
&TABLE_EXT
) {
155 size
+= data
->extData
->write(data
->extData
, &data
->staticData
, mem
, tableType
);
158 sz2
= udata_finish(mem
, status
);
161 fprintf(stderr
, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2
, (int)size
);
162 *status
=U_INTERNAL_PROGRAM_ERROR
;
166 printf("- Wrote %u bytes to the udata.\n", (int)sz2
);
172 OPT_HELP_QUESTION_MARK
,
178 OPT_IGNORE_SISO_CHECK
,
182 static UOption options
[]={
184 UOPTION_HELP_QUESTION_MARK
,
189 { "small", NULL
, NULL
, NULL
, '\1', UOPT_NO_ARG
, 0 },
190 { "ignore-siso-check", NULL
, NULL
, NULL
, '\1', UOPT_NO_ARG
, 0 }
193 int main(int argc
, char* argv
[])
196 UErrorCode err
= U_ZERO_ERROR
, localError
;
197 char outFileName
[UCNV_MAX_FULL_FILE_NAME_LENGTH
];
198 const char* destdir
, *arg
;
200 char* dot
= NULL
, *outBasename
;
201 char cnvName
[UCNV_MAX_FULL_FILE_NAME_LENGTH
];
202 char cnvNameWithPkg
[UCNV_MAX_FULL_FILE_NAME_LENGTH
];
203 UVersionInfo icuVersion
;
208 U_MAIN_INIT_ARGS(argc
, argv
);
210 /* Set up the ICU version number */
211 u_getVersion(icuVersion
);
212 uprv_memcpy(&dataInfo
.dataVersion
, &icuVersion
, sizeof(UVersionInfo
));
214 /* preset then read command line options */
215 options
[OPT_DESTDIR
].value
=u_getDataDirectory();
216 argc
=u_parseArgs(argc
, argv
, LENGTHOF(options
), options
);
218 /* error handling, printing usage message */
221 "error in command line argument \"%s\"\n",
226 if(argc
<0 || options
[OPT_HELP_H
].doesOccur
|| options
[OPT_HELP_QUESTION_MARK
].doesOccur
) {
227 FILE *stdfile
=argc
<0 ? stderr
: stdout
;
229 "usage: %s [-options] files...\n"
230 "\tread .ucm codepage mapping files and write .cnv files\n"
232 "\t-h or -? or --help this usage text\n"
233 "\t-V or --version show a version message\n"
234 "\t-c or --copyright include a copyright notice\n"
235 "\t-d or --destdir destination directory, followed by the path\n"
236 "\t-v or --verbose Turn on verbose output\n",
239 "\t --small Generate smaller .cnv files. They will be\n"
240 "\t significantly smaller but may not be compatible with\n"
241 "\t older versions of ICU and will require heap memory\n"
242 "\t allocation when loaded.\n"
243 "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n");
244 return argc
<0 ? U_ILLEGAL_ARGUMENT_ERROR
: U_ZERO_ERROR
;
247 if(options
[OPT_VERSION
].doesOccur
) {
248 printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
249 dataInfo
.formatVersion
[0], dataInfo
.formatVersion
[1]);
250 printf("%s\n", U_COPYRIGHT_STRING
);
254 /* get the options values */
255 haveCopyright
= options
[OPT_COPYRIGHT
].doesOccur
;
256 destdir
= options
[OPT_DESTDIR
].value
;
257 VERBOSE
= options
[OPT_VERBOSE
].doesOccur
;
258 SMALL
= options
[OPT_SMALL
].doesOccur
;
260 if (options
[OPT_IGNORE_SISO_CHECK
].doesOccur
) {
261 IGNORE_SISO_CHECK
= TRUE
;
264 if (destdir
!= NULL
&& *destdir
!= 0) {
265 uprv_strcpy(outFileName
, destdir
);
266 destdirlen
= uprv_strlen(destdir
);
267 outBasename
= outFileName
+ destdirlen
;
268 if (*(outBasename
- 1) != U_FILE_SEP_CHAR
) {
269 *outBasename
++ = U_FILE_SEP_CHAR
;
274 outBasename
= outFileName
;
280 printf("makeconv: processing %d files...\n", argc
- 1);
281 for(i
=1; i
<argc
; ++i
) {
282 printf("%s ", argv
[i
]);
290 printFilename
= (UBool
) (argc
> 2 || VERBOSE
);
291 for (++argv
; --argc
; ++argv
)
293 arg
= getLongPathname(*argv
);
295 /* Check for potential buffer overflow */
296 if(strlen(arg
) >= UCNV_MAX_FULL_FILE_NAME_LENGTH
)
298 fprintf(stderr
, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR
));
299 return U_BUFFER_OVERFLOW_ERROR
;
302 /*produces the right destination path for display*/
305 const char *basename
;
307 /* find the last file sepator */
308 basename
= findBasename(arg
);
309 uprv_strcpy(outBasename
, basename
);
313 uprv_strcpy(outFileName
, arg
);
316 /*removes the extension if any is found*/
317 dot
= uprv_strrchr(outBasename
, '.');
323 /* the basename without extension is the converter name */
324 uprv_strcpy(cnvName
, outBasename
);
326 /*Adds the target extension*/
327 uprv_strcat(outBasename
, CONVERTER_FILE_EXTENSION
);
330 printf("makeconv: processing %s ...\n", arg
);
333 localError
= U_ZERO_ERROR
;
335 createConverter(&data
, arg
, &localError
);
337 if (U_FAILURE(localError
))
339 /* if an error is found, print out an error msg and keep going */
340 fprintf(stderr
, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName
, arg
,
341 u_errorName(localError
));
348 /* Insure the static data name matches the file name */
349 /* Changed to ignore directory and only compare base name
352 p
= strrchr(cnvName
, U_FILE_SEP_CHAR
); /* Find last file separator */
354 if(p
== NULL
) /* OK, try alternate */
356 p
= strrchr(cnvName
, U_FILE_ALT_SEP_CHAR
);
359 p
=cnvName
; /* If no separators, no problem */
364 p
++; /* If found separtor, don't include it in compare */
366 if(uprv_stricmp(p
,data
.staticData
.name
))
368 fprintf(stderr
, "Warning: %s%s claims to be '%s'\n",
369 cnvName
, CONVERTER_FILE_EXTENSION
,
370 data
.staticData
.name
);
373 uprv_strcpy((char*)data
.staticData
.name
, cnvName
);
375 if(!uprv_isInvariantString((char*)data
.staticData
.name
, -1)) {
377 "Error: A converter name must contain only invariant characters.\n"
378 "%s is not a valid converter name.\n",
379 data
.staticData
.name
);
381 err
= U_INVALID_TABLE_FORMAT
;
385 uprv_strcpy(cnvNameWithPkg
, cnvName
);
387 localError
= U_ZERO_ERROR
;
388 writeConverterData(&data
, cnvNameWithPkg
, destdir
, &localError
);
390 if(U_FAILURE(localError
))
392 /* if an error is found, print out an error msg and keep going*/
393 fprintf(stderr
, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName
, arg
,
394 u_errorName(localError
));
399 else if (printFilename
)
407 cleanupConvData(&data
);
414 getPlatformAndCCSIDFromName(const char *name
, int8_t *pPlatform
, int32_t *pCCSID
) {
415 if( (name
[0]=='i' || name
[0]=='I') &&
416 (name
[1]=='b' || name
[1]=='B') &&
417 (name
[2]=='m' || name
[2]=='M')
424 *pCCSID
=(int32_t)uprv_strtoul(name
, NULL
, 10);
426 *pPlatform
=UCNV_UNKNOWN
;
432 readHeader(ConvData
*data
,
433 FileStream
* convFile
,
434 const char* converterName
,
435 UErrorCode
*pErrorCode
) {
437 char *s
, *key
, *value
;
438 const UConverterStaticData
*prototype
;
439 UConverterStaticData
*staticData
;
441 if(U_FAILURE(*pErrorCode
)) {
445 staticData
=&data
->staticData
;
446 staticData
->platform
=UCNV_IBM
;
447 staticData
->subCharLen
=0;
449 while(T_FileStream_readLine(convFile
, line
, sizeof(line
))) {
450 /* basic parsing and handling of state-related items */
451 if(ucm_parseHeaderLine(data
->ucm
, line
, &key
, &value
)) {
455 /* stop at the beginning of the mapping section */
456 if(uprv_strcmp(line
, "CHARMAP")==0) {
460 /* collect the information from the header field, ignore unknown keys */
461 if(uprv_strcmp(key
, "code_set_name")==0) {
463 uprv_strcpy((char *)staticData
->name
, value
);
464 getPlatformAndCCSIDFromName(value
, &staticData
->platform
, &staticData
->codepage
);
466 } else if(uprv_strcmp(key
, "subchar")==0) {
467 uint8_t bytes
[UCNV_EXT_MAX_BYTES
];
471 length
=ucm_parseBytes(bytes
, line
, (const char **)&s
);
472 if(1<=length
&& length
<=4 && *s
==0) {
473 staticData
->subCharLen
=length
;
474 uprv_memcpy(staticData
->subChar
, bytes
, length
);
476 fprintf(stderr
, "error: illegal <subchar> %s\n", value
);
477 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
480 } else if(uprv_strcmp(key
, "subchar1")==0) {
481 uint8_t bytes
[UCNV_EXT_MAX_BYTES
];
484 if(1==ucm_parseBytes(bytes
, line
, (const char **)&s
) && *s
==0) {
485 staticData
->subChar1
=bytes
[0];
487 fprintf(stderr
, "error: illegal <subchar1> %s\n", value
);
488 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
494 /* copy values from the UCMFile to the static data */
495 staticData
->maxBytesPerChar
=(int8_t)data
->ucm
->states
.maxCharLength
;
496 staticData
->minBytesPerChar
=(int8_t)data
->ucm
->states
.minCharLength
;
497 staticData
->conversionType
=data
->ucm
->states
.conversionType
;
499 if(staticData
->conversionType
==UCNV_UNSUPPORTED_CONVERTER
) {
500 fprintf(stderr
, "ucm error: missing conversion type (<uconv_class>)\n");
501 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
506 * Now that we know the type, copy any 'default' values from the table.
507 * We need not check the type any further because the parser only
508 * recognizes what we have prototypes for.
510 * For delta (extension-only) tables, copy values from the base file
511 * instead, see createConverter().
513 if(data
->ucm
->baseName
[0]==0) {
514 prototype
=ucnv_converterStaticData
[staticData
->conversionType
];
515 if(prototype
!=NULL
) {
516 if(staticData
->name
[0]==0) {
517 uprv_strcpy((char *)staticData
->name
, prototype
->name
);
520 if(staticData
->codepage
==0) {
521 staticData
->codepage
=prototype
->codepage
;
524 if(staticData
->platform
==0) {
525 staticData
->platform
=prototype
->platform
;
528 if(staticData
->minBytesPerChar
==0) {
529 staticData
->minBytesPerChar
=prototype
->minBytesPerChar
;
532 if(staticData
->maxBytesPerChar
==0) {
533 staticData
->maxBytesPerChar
=prototype
->maxBytesPerChar
;
536 if(staticData
->subCharLen
==0) {
537 staticData
->subCharLen
=prototype
->subCharLen
;
538 if(prototype
->subCharLen
>0) {
539 uprv_memcpy(staticData
->subChar
, prototype
->subChar
, prototype
->subCharLen
);
545 if(data
->ucm
->states
.outputType
<0) {
546 data
->ucm
->states
.outputType
=(int8_t)data
->ucm
->states
.maxCharLength
-1;
549 if( staticData
->subChar1
!=0 &&
550 (staticData
->minBytesPerChar
>1 ||
551 (staticData
->conversionType
!=UCNV_MBCS
&&
552 staticData
->conversionType
!=UCNV_EBCDIC_STATEFUL
))
554 fprintf(stderr
, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
555 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
559 /* return TRUE if a base table was read, FALSE for an extension table */
561 readFile(ConvData
*data
, const char* converterName
,
562 UErrorCode
*pErrorCode
) {
565 FileStream
*convFile
;
567 UCMStates
*baseStates
;
570 if(U_FAILURE(*pErrorCode
)) {
574 data
->ucm
=ucm_open();
576 convFile
=T_FileStream_open(converterName
, "r");
578 *pErrorCode
=U_FILE_ACCESS_ERROR
;
582 readHeader(data
, convFile
, converterName
, pErrorCode
);
583 if(U_FAILURE(*pErrorCode
)) {
587 if(data
->ucm
->baseName
[0]==0) {
589 baseStates
=&data
->ucm
->states
;
590 ucm_processStates(baseStates
, IGNORE_SISO_CHECK
);
596 /* read the base table */
597 ucm_readTable(data
->ucm
, convFile
, dataIsBase
, baseStates
, pErrorCode
);
598 if(U_FAILURE(*pErrorCode
)) {
602 /* read an extension table if there is one */
603 while(T_FileStream_readLine(convFile
, line
, sizeof(line
))) {
604 end
=uprv_strchr(line
, 0);
606 (*(end
-1)=='\n' || *(end
-1)=='\r' || *(end
-1)==' ' || *(end
-1)=='\t')) {
611 if(line
[0]=='#' || u_skipWhitespace(line
)==end
) {
612 continue; /* ignore empty and comment lines */
615 if(0==uprv_strcmp(line
, "CHARMAP")) {
616 /* read the extension table */
617 ucm_readTable(data
->ucm
, convFile
, FALSE
, baseStates
, pErrorCode
);
619 fprintf(stderr
, "unexpected text after the base mapping table\n");
624 T_FileStream_close(convFile
);
626 if(data
->ucm
->base
->flagsType
==UCM_FLAGS_MIXED
|| data
->ucm
->ext
->flagsType
==UCM_FLAGS_MIXED
) {
627 fprintf(stderr
, "error: some entries have the mapping precision (with '|'), some do not\n");
628 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
635 createConverter(ConvData
*data
, const char *converterName
, UErrorCode
*pErrorCode
) {
639 UConverterStaticData
*staticData
;
640 UCMStates
*states
, *baseStates
;
642 if(U_FAILURE(*pErrorCode
)) {
648 dataIsBase
=readFile(data
, converterName
, pErrorCode
);
649 if(U_FAILURE(*pErrorCode
)) {
653 staticData
=&data
->staticData
;
654 states
=&data
->ucm
->states
;
658 * Build a normal .cnv file with a base table
659 * and an optional extension table.
661 data
->cnvData
=MBCSOpen(data
->ucm
);
662 if(data
->cnvData
==NULL
) {
663 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
665 } else if(!data
->cnvData
->isValid(data
->cnvData
,
666 staticData
->subChar
, staticData
->subCharLen
)
668 fprintf(stderr
, " the substitution character byte sequence is illegal in this codepage structure!\n");
669 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
671 } else if(staticData
->subChar1
!=0 &&
672 !data
->cnvData
->isValid(data
->cnvData
, &staticData
->subChar1
, 1)
674 fprintf(stderr
, " the subchar1 byte is illegal in this codepage structure!\n");
675 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
678 data
->ucm
->ext
->mappingsLength
>0 &&
679 !ucm_checkBaseExt(states
, data
->ucm
->base
, data
->ucm
->ext
, data
->ucm
->ext
, FALSE
)
681 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
682 } else if(data
->ucm
->base
->flagsType
&UCM_FLAGS_EXPLICIT
) {
683 /* sort the table so that it can be turned into UTF-8-friendly data */
684 ucm_sortTable(data
->ucm
->base
);
687 if(U_SUCCESS(*pErrorCode
)) {
689 /* add the base table after ucm_checkBaseExt()! */
690 !data
->cnvData
->addTable(data
->cnvData
, data
->ucm
->base
, &data
->staticData
)
692 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
695 * addTable() may have requested moving more mappings to the extension table
696 * if they fit into the base toUnicode table but not into the
697 * base fromUnicode table.
698 * (Especially for UTF-8-friendly fromUnicode tables.)
699 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
700 * to be excluded from the extension toUnicode data.
701 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
702 * the base fromUnicode table.
704 ucm_moveMappings(data
->ucm
->base
, data
->ucm
->ext
);
705 ucm_sortTable(data
->ucm
->ext
);
706 if(data
->ucm
->ext
->mappingsLength
>0) {
707 /* prepare the extension table, if there is one */
708 data
->extData
=CnvExtOpen(data
->ucm
);
709 if(data
->extData
==NULL
) {
710 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
712 !data
->extData
->addTable(data
->extData
, data
->ucm
->ext
, &data
->staticData
)
714 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
720 /* Build an extension-only .cnv file. */
721 char baseFilename
[500];
724 initConvData(&baseData
);
726 /* assemble a path/filename for data->ucm->baseName */
727 uprv_strcpy(baseFilename
, converterName
);
728 basename
=(char *)findBasename(baseFilename
);
729 uprv_strcpy(basename
, data
->ucm
->baseName
);
730 uprv_strcat(basename
, ".ucm");
732 /* read the base table */
733 dataIsBase
=readFile(&baseData
, baseFilename
, pErrorCode
);
734 if(U_FAILURE(*pErrorCode
)) {
736 } else if(!dataIsBase
) {
737 fprintf(stderr
, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename
);
738 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
740 /* prepare the extension table */
741 data
->extData
=CnvExtOpen(data
->ucm
);
742 if(data
->extData
==NULL
) {
743 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
745 /* fill in gaps in extension file header fields */
746 UCMapping
*m
, *mLimit
;
747 uint8_t fallbackFlags
;
749 baseStates
=&baseData
.ucm
->states
;
750 if(states
->conversionType
==UCNV_DBCS
) {
751 staticData
->minBytesPerChar
=(int8_t)(states
->minCharLength
=2);
752 } else if(states
->minCharLength
==0) {
753 staticData
->minBytesPerChar
=(int8_t)(states
->minCharLength
=baseStates
->minCharLength
);
755 if(states
->maxCharLength
<states
->minCharLength
) {
756 staticData
->maxBytesPerChar
=(int8_t)(states
->maxCharLength
=baseStates
->maxCharLength
);
759 if(staticData
->subCharLen
==0) {
760 uprv_memcpy(staticData
->subChar
, baseData
.staticData
.subChar
, 4);
761 staticData
->subCharLen
=baseData
.staticData
.subCharLen
;
764 * do not copy subChar1 -
765 * only use what is explicitly specified
766 * because it cannot be unset in the extension file header
769 /* get the fallback flags */
771 for(m
=baseData
.ucm
->base
->mappings
, mLimit
=m
+baseData
.ucm
->base
->mappingsLength
;
772 m
<mLimit
&& fallbackFlags
!=3;
782 if(fallbackFlags
&1) {
783 staticData
->hasFromUnicodeFallback
=TRUE
;
785 if(fallbackFlags
&2) {
786 staticData
->hasToUnicodeFallback
=TRUE
;
789 if(1!=ucm_countChars(baseStates
, staticData
->subChar
, staticData
->subCharLen
)) {
790 fprintf(stderr
, " the substitution character byte sequence is illegal in this codepage structure!\n");
791 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
793 } else if(staticData
->subChar1
!=0 && 1!=ucm_countChars(baseStates
, &staticData
->subChar1
, 1)) {
794 fprintf(stderr
, " the subchar1 byte is illegal in this codepage structure!\n");
795 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
798 !ucm_checkValidity(data
->ucm
->ext
, baseStates
) ||
799 !ucm_checkBaseExt(baseStates
, baseData
.ucm
->base
, data
->ucm
->ext
, data
->ucm
->ext
, FALSE
)
801 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
803 if(states
->maxCharLength
>1) {
805 * When building a normal .cnv file with a base table
806 * for an MBCS (not SBCS) table with explicit precision flags,
807 * the MBCSAddTable() function marks some mappings for moving
808 * to the extension table.
809 * They fit into the base toUnicode table but not into the
810 * base fromUnicode table.
811 * (Note: We do have explicit precision flags because they are
812 * required for extension table generation, and
813 * ucm_checkBaseExt() verified it.)
815 * We do not call MBCSAddTable() here (we probably could)
816 * so we need to do the analysis before building the extension table.
817 * We assume that MBCSAddTable() will build a UTF-8-friendly table.
818 * Redundant mappings in the extension table are ok except they cost some size.
820 * Do this after ucm_checkBaseExt().
822 const MBCSData
*mbcsData
=MBCSGetDummy();
824 for(m
=baseData
.ucm
->base
->mappings
, mLimit
=m
+baseData
.ucm
->base
->mappingsLength
;
828 if(!MBCSOkForBaseFromUnicode(mbcsData
, m
->b
.bytes
, m
->bLen
, m
->u
, m
->f
)) {
829 m
->f
|=MBCS_FROM_U_EXT_FLAG
;
830 m
->moveFlag
=UCM_MOVE_TO_EXT
;
836 ucm_moveMappings(baseData
.ucm
->base
, data
->ucm
->ext
);
837 ucm_sortTable(data
->ucm
->ext
);
840 if(!data
->extData
->addTable(data
->extData
, data
->ucm
->ext
, &data
->staticData
)) {
841 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
847 cleanupConvData(&baseData
);
852 * Hey, Emacs, please set the following:
855 * indent-tabs-mode: nil