2 ********************************************************************************
4 * Copyright (C) 1998-2014, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ********************************************************************************
11 * tool creating a binary (compressed) representation of the conversion mapping
12 * table (IBM NLTC ucmap format).
14 * 05/04/2000 helena Added fallback mapping into the picture...
15 * 06/29/2000 helena Major rewrite of the callback APIs.
19 #include "unicode/putil.h"
20 #include "unicode/ucnv_err.h"
30 #include "unicode/udata.h"
39 typedef struct ConvData
{
41 NewConverter
*cnvData
, *extData
;
42 UConverterSharedData sharedData
;
43 UConverterStaticData staticData
;
47 initConvData(ConvData
*data
) {
48 uprv_memset(data
, 0, sizeof(ConvData
));
49 data
->sharedData
.structSize
=sizeof(UConverterSharedData
);
50 data
->staticData
.structSize
=sizeof(UConverterStaticData
);
51 data
->sharedData
.staticData
=&data
->staticData
;
55 cleanupConvData(ConvData
*data
) {
57 if(data
->cnvData
!=NULL
) {
58 data
->cnvData
->close(data
->cnvData
);
61 if(data
->extData
!=NULL
) {
62 data
->extData
->close(data
->extData
);
71 * from ucnvstat.c - static prototypes of data-based converters
73 extern const UConverterStaticData
* ucnv_converterStaticData
[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
];
78 UBool VERBOSE
= FALSE
;
80 UBool IGNORE_SISO_CHECK
= FALSE
;
83 createConverter(ConvData
*data
, const char* converterName
, UErrorCode
*pErrorCode
);
86 * Set up the UNewData and write the converter..
89 writeConverterData(ConvData
*data
, const char *cnvName
, const char *cnvDir
, UErrorCode
*status
);
91 UBool haveCopyright
=TRUE
;
93 static UDataInfo dataInfo
={
102 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */
103 {6, 2, 0, 0}, /* formatVersion */
104 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */
108 writeConverterData(ConvData
*data
, const char *cnvName
, const char *cnvDir
, UErrorCode
*status
)
110 UNewDataMemory
*mem
= NULL
;
115 if(U_FAILURE(*status
))
120 tableType
=TABLE_NONE
;
121 if(data
->cnvData
!=NULL
) {
122 tableType
|=TABLE_BASE
;
124 if(data
->extData
!=NULL
) {
125 tableType
|=TABLE_EXT
;
128 mem
= udata_create(cnvDir
, "cnv", cnvName
, &dataInfo
, haveCopyright
? U_COPYRIGHT_STRING
: NULL
, status
);
130 if(U_FAILURE(*status
))
132 fprintf(stderr
, "Couldn't create the udata %s.%s: %s\n",
135 u_errorName(*status
));
141 printf("- Opened udata %s.%s\n", cnvName
, "cnv");
145 /* all read only, clean, platform independent data. Mmmm. :) */
146 udata_writeBlock(mem
, &data
->staticData
, sizeof(UConverterStaticData
));
147 size
+= sizeof(UConverterStaticData
); /* Is 4-aligned - by size */
148 /* Now, write the table */
149 if(tableType
&TABLE_BASE
) {
150 size
+= data
->cnvData
->write(data
->cnvData
, &data
->staticData
, mem
, tableType
);
152 if(tableType
&TABLE_EXT
) {
153 size
+= data
->extData
->write(data
->extData
, &data
->staticData
, mem
, tableType
);
156 sz2
= udata_finish(mem
, status
);
159 fprintf(stderr
, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2
, (int)size
);
160 *status
=U_INTERNAL_PROGRAM_ERROR
;
164 printf("- Wrote %u bytes to the udata.\n", (int)sz2
);
170 OPT_HELP_QUESTION_MARK
,
176 OPT_IGNORE_SISO_CHECK
,
180 static UOption options
[]={
182 UOPTION_HELP_QUESTION_MARK
,
187 { "small", NULL
, NULL
, NULL
, '\1', UOPT_NO_ARG
, 0 },
188 { "ignore-siso-check", NULL
, NULL
, NULL
, '\1', UOPT_NO_ARG
, 0 }
191 int main(int argc
, char* argv
[])
194 UErrorCode err
= U_ZERO_ERROR
, localError
;
195 char outFileName
[UCNV_MAX_FULL_FILE_NAME_LENGTH
];
196 const char* destdir
, *arg
;
198 char* dot
= NULL
, *outBasename
;
199 char cnvName
[UCNV_MAX_FULL_FILE_NAME_LENGTH
];
200 char cnvNameWithPkg
[UCNV_MAX_FULL_FILE_NAME_LENGTH
];
201 UVersionInfo icuVersion
;
206 U_MAIN_INIT_ARGS(argc
, argv
);
208 /* Set up the ICU version number */
209 u_getVersion(icuVersion
);
210 uprv_memcpy(&dataInfo
.dataVersion
, &icuVersion
, sizeof(UVersionInfo
));
212 /* preset then read command line options */
213 options
[OPT_DESTDIR
].value
=u_getDataDirectory();
214 argc
=u_parseArgs(argc
, argv
, UPRV_LENGTHOF(options
), options
);
216 /* error handling, printing usage message */
219 "error in command line argument \"%s\"\n",
224 if(argc
<0 || options
[OPT_HELP_H
].doesOccur
|| options
[OPT_HELP_QUESTION_MARK
].doesOccur
) {
225 FILE *stdfile
=argc
<0 ? stderr
: stdout
;
227 "usage: %s [-options] files...\n"
228 "\tread .ucm codepage mapping files and write .cnv files\n"
230 "\t-h or -? or --help this usage text\n"
231 "\t-V or --version show a version message\n"
232 "\t-c or --copyright include a copyright notice\n"
233 "\t-d or --destdir destination directory, followed by the path\n"
234 "\t-v or --verbose Turn on verbose output\n",
237 "\t --small Generate smaller .cnv files. They will be\n"
238 "\t significantly smaller but may not be compatible with\n"
239 "\t older versions of ICU and will require heap memory\n"
240 "\t allocation when loaded.\n"
241 "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n");
242 return argc
<0 ? U_ILLEGAL_ARGUMENT_ERROR
: U_ZERO_ERROR
;
245 if(options
[OPT_VERSION
].doesOccur
) {
246 printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
247 dataInfo
.formatVersion
[0], dataInfo
.formatVersion
[1]);
248 printf("%s\n", U_COPYRIGHT_STRING
);
252 /* get the options values */
253 haveCopyright
= options
[OPT_COPYRIGHT
].doesOccur
;
254 destdir
= options
[OPT_DESTDIR
].value
;
255 VERBOSE
= options
[OPT_VERBOSE
].doesOccur
;
256 SMALL
= options
[OPT_SMALL
].doesOccur
;
258 if (options
[OPT_IGNORE_SISO_CHECK
].doesOccur
) {
259 IGNORE_SISO_CHECK
= TRUE
;
262 if (destdir
!= NULL
&& *destdir
!= 0) {
263 uprv_strcpy(outFileName
, destdir
);
264 destdirlen
= uprv_strlen(destdir
);
265 outBasename
= outFileName
+ destdirlen
;
266 if (*(outBasename
- 1) != U_FILE_SEP_CHAR
) {
267 *outBasename
++ = U_FILE_SEP_CHAR
;
272 outBasename
= outFileName
;
278 printf("makeconv: processing %d files...\n", argc
- 1);
279 for(i
=1; i
<argc
; ++i
) {
280 printf("%s ", argv
[i
]);
288 printFilename
= (UBool
) (argc
> 2 || VERBOSE
);
289 for (++argv
; --argc
; ++argv
)
291 arg
= getLongPathname(*argv
);
293 /* Check for potential buffer overflow */
294 if(strlen(arg
) >= UCNV_MAX_FULL_FILE_NAME_LENGTH
)
296 fprintf(stderr
, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR
));
297 return U_BUFFER_OVERFLOW_ERROR
;
300 /*produces the right destination path for display*/
303 const char *basename
;
305 /* find the last file sepator */
306 basename
= findBasename(arg
);
307 uprv_strcpy(outBasename
, basename
);
311 uprv_strcpy(outFileName
, arg
);
314 /*removes the extension if any is found*/
315 dot
= uprv_strrchr(outBasename
, '.');
321 /* the basename without extension is the converter name */
322 uprv_strcpy(cnvName
, outBasename
);
324 /*Adds the target extension*/
325 uprv_strcat(outBasename
, CONVERTER_FILE_EXTENSION
);
328 printf("makeconv: processing %s ...\n", arg
);
331 localError
= U_ZERO_ERROR
;
333 createConverter(&data
, arg
, &localError
);
335 if (U_FAILURE(localError
))
337 /* if an error is found, print out an error msg and keep going */
338 fprintf(stderr
, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName
, arg
,
339 u_errorName(localError
));
346 /* Insure the static data name matches the file name */
347 /* Changed to ignore directory and only compare base name
350 p
= strrchr(cnvName
, U_FILE_SEP_CHAR
); /* Find last file separator */
352 if(p
== NULL
) /* OK, try alternate */
354 p
= strrchr(cnvName
, U_FILE_ALT_SEP_CHAR
);
357 p
=cnvName
; /* If no separators, no problem */
362 p
++; /* If found separtor, don't include it in compare */
364 if(uprv_stricmp(p
,data
.staticData
.name
))
366 fprintf(stderr
, "Warning: %s%s claims to be '%s'\n",
367 cnvName
, CONVERTER_FILE_EXTENSION
,
368 data
.staticData
.name
);
371 uprv_strcpy((char*)data
.staticData
.name
, cnvName
);
373 if(!uprv_isInvariantString((char*)data
.staticData
.name
, -1)) {
375 "Error: A converter name must contain only invariant characters.\n"
376 "%s is not a valid converter name.\n",
377 data
.staticData
.name
);
379 err
= U_INVALID_TABLE_FORMAT
;
383 uprv_strcpy(cnvNameWithPkg
, cnvName
);
385 localError
= U_ZERO_ERROR
;
386 writeConverterData(&data
, cnvNameWithPkg
, destdir
, &localError
);
388 if(U_FAILURE(localError
))
390 /* if an error is found, print out an error msg and keep going*/
391 fprintf(stderr
, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName
, arg
,
392 u_errorName(localError
));
397 else if (printFilename
)
405 cleanupConvData(&data
);
412 getPlatformAndCCSIDFromName(const char *name
, int8_t *pPlatform
, int32_t *pCCSID
) {
413 if( (name
[0]=='i' || name
[0]=='I') &&
414 (name
[1]=='b' || name
[1]=='B') &&
415 (name
[2]=='m' || name
[2]=='M')
422 *pCCSID
=(int32_t)uprv_strtoul(name
, NULL
, 10);
424 *pPlatform
=UCNV_UNKNOWN
;
430 readHeader(ConvData
*data
,
431 FileStream
* convFile
,
432 const char* converterName
,
433 UErrorCode
*pErrorCode
) {
435 char *s
, *key
, *value
;
436 const UConverterStaticData
*prototype
;
437 UConverterStaticData
*staticData
;
439 if(U_FAILURE(*pErrorCode
)) {
443 staticData
=&data
->staticData
;
444 staticData
->platform
=UCNV_IBM
;
445 staticData
->subCharLen
=0;
447 while(T_FileStream_readLine(convFile
, line
, sizeof(line
))) {
448 /* basic parsing and handling of state-related items */
449 if(ucm_parseHeaderLine(data
->ucm
, line
, &key
, &value
)) {
453 /* stop at the beginning of the mapping section */
454 if(uprv_strcmp(line
, "CHARMAP")==0) {
458 /* collect the information from the header field, ignore unknown keys */
459 if(uprv_strcmp(key
, "code_set_name")==0) {
461 uprv_strcpy((char *)staticData
->name
, value
);
462 getPlatformAndCCSIDFromName(value
, &staticData
->platform
, &staticData
->codepage
);
464 } else if(uprv_strcmp(key
, "subchar")==0) {
465 uint8_t bytes
[UCNV_EXT_MAX_BYTES
];
469 length
=ucm_parseBytes(bytes
, line
, (const char **)&s
);
470 if(1<=length
&& length
<=4 && *s
==0) {
471 staticData
->subCharLen
=length
;
472 uprv_memcpy(staticData
->subChar
, bytes
, length
);
474 fprintf(stderr
, "error: illegal <subchar> %s\n", value
);
475 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
478 } else if(uprv_strcmp(key
, "subchar1")==0) {
479 uint8_t bytes
[UCNV_EXT_MAX_BYTES
];
482 if(1==ucm_parseBytes(bytes
, line
, (const char **)&s
) && *s
==0) {
483 staticData
->subChar1
=bytes
[0];
485 fprintf(stderr
, "error: illegal <subchar1> %s\n", value
);
486 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
492 /* copy values from the UCMFile to the static data */
493 staticData
->maxBytesPerChar
=(int8_t)data
->ucm
->states
.maxCharLength
;
494 staticData
->minBytesPerChar
=(int8_t)data
->ucm
->states
.minCharLength
;
495 staticData
->conversionType
=data
->ucm
->states
.conversionType
;
497 if(staticData
->conversionType
==UCNV_UNSUPPORTED_CONVERTER
) {
498 fprintf(stderr
, "ucm error: missing conversion type (<uconv_class>)\n");
499 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
504 * Now that we know the type, copy any 'default' values from the table.
505 * We need not check the type any further because the parser only
506 * recognizes what we have prototypes for.
508 * For delta (extension-only) tables, copy values from the base file
509 * instead, see createConverter().
511 if(data
->ucm
->baseName
[0]==0) {
512 prototype
=ucnv_converterStaticData
[staticData
->conversionType
];
513 if(prototype
!=NULL
) {
514 if(staticData
->name
[0]==0) {
515 uprv_strcpy((char *)staticData
->name
, prototype
->name
);
518 if(staticData
->codepage
==0) {
519 staticData
->codepage
=prototype
->codepage
;
522 if(staticData
->platform
==0) {
523 staticData
->platform
=prototype
->platform
;
526 if(staticData
->minBytesPerChar
==0) {
527 staticData
->minBytesPerChar
=prototype
->minBytesPerChar
;
530 if(staticData
->maxBytesPerChar
==0) {
531 staticData
->maxBytesPerChar
=prototype
->maxBytesPerChar
;
534 if(staticData
->subCharLen
==0) {
535 staticData
->subCharLen
=prototype
->subCharLen
;
536 if(prototype
->subCharLen
>0) {
537 uprv_memcpy(staticData
->subChar
, prototype
->subChar
, prototype
->subCharLen
);
543 if(data
->ucm
->states
.outputType
<0) {
544 data
->ucm
->states
.outputType
=(int8_t)data
->ucm
->states
.maxCharLength
-1;
547 if( staticData
->subChar1
!=0 &&
548 (staticData
->minBytesPerChar
>1 ||
549 (staticData
->conversionType
!=UCNV_MBCS
&&
550 staticData
->conversionType
!=UCNV_EBCDIC_STATEFUL
))
552 fprintf(stderr
, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
553 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
557 /* return TRUE if a base table was read, FALSE for an extension table */
559 readFile(ConvData
*data
, const char* converterName
,
560 UErrorCode
*pErrorCode
) {
563 FileStream
*convFile
;
565 UCMStates
*baseStates
;
568 if(U_FAILURE(*pErrorCode
)) {
572 data
->ucm
=ucm_open();
574 convFile
=T_FileStream_open(converterName
, "r");
576 *pErrorCode
=U_FILE_ACCESS_ERROR
;
580 readHeader(data
, convFile
, converterName
, pErrorCode
);
581 if(U_FAILURE(*pErrorCode
)) {
585 if(data
->ucm
->baseName
[0]==0) {
587 baseStates
=&data
->ucm
->states
;
588 ucm_processStates(baseStates
, IGNORE_SISO_CHECK
);
594 /* read the base table */
595 ucm_readTable(data
->ucm
, convFile
, dataIsBase
, baseStates
, pErrorCode
);
596 if(U_FAILURE(*pErrorCode
)) {
600 /* read an extension table if there is one */
601 while(T_FileStream_readLine(convFile
, line
, sizeof(line
))) {
602 end
=uprv_strchr(line
, 0);
604 (*(end
-1)=='\n' || *(end
-1)=='\r' || *(end
-1)==' ' || *(end
-1)=='\t')) {
609 if(line
[0]=='#' || u_skipWhitespace(line
)==end
) {
610 continue; /* ignore empty and comment lines */
613 if(0==uprv_strcmp(line
, "CHARMAP")) {
614 /* read the extension table */
615 ucm_readTable(data
->ucm
, convFile
, FALSE
, baseStates
, pErrorCode
);
617 fprintf(stderr
, "unexpected text after the base mapping table\n");
622 T_FileStream_close(convFile
);
624 if(data
->ucm
->base
->flagsType
==UCM_FLAGS_MIXED
|| data
->ucm
->ext
->flagsType
==UCM_FLAGS_MIXED
) {
625 fprintf(stderr
, "error: some entries have the mapping precision (with '|'), some do not\n");
626 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
633 createConverter(ConvData
*data
, const char *converterName
, UErrorCode
*pErrorCode
) {
637 UConverterStaticData
*staticData
;
638 UCMStates
*states
, *baseStates
;
640 if(U_FAILURE(*pErrorCode
)) {
646 dataIsBase
=readFile(data
, converterName
, pErrorCode
);
647 if(U_FAILURE(*pErrorCode
)) {
651 staticData
=&data
->staticData
;
652 states
=&data
->ucm
->states
;
656 * Build a normal .cnv file with a base table
657 * and an optional extension table.
659 data
->cnvData
=MBCSOpen(data
->ucm
);
660 if(data
->cnvData
==NULL
) {
661 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
663 } else if(!data
->cnvData
->isValid(data
->cnvData
,
664 staticData
->subChar
, staticData
->subCharLen
)
666 fprintf(stderr
, " the substitution character byte sequence is illegal in this codepage structure!\n");
667 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
669 } else if(staticData
->subChar1
!=0 &&
670 !data
->cnvData
->isValid(data
->cnvData
, &staticData
->subChar1
, 1)
672 fprintf(stderr
, " the subchar1 byte is illegal in this codepage structure!\n");
673 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
676 data
->ucm
->ext
->mappingsLength
>0 &&
677 !ucm_checkBaseExt(states
, data
->ucm
->base
, data
->ucm
->ext
, data
->ucm
->ext
, FALSE
)
679 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
680 } else if(data
->ucm
->base
->flagsType
&UCM_FLAGS_EXPLICIT
) {
681 /* sort the table so that it can be turned into UTF-8-friendly data */
682 ucm_sortTable(data
->ucm
->base
);
685 if(U_SUCCESS(*pErrorCode
)) {
687 /* add the base table after ucm_checkBaseExt()! */
688 !data
->cnvData
->addTable(data
->cnvData
, data
->ucm
->base
, &data
->staticData
)
690 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
693 * addTable() may have requested moving more mappings to the extension table
694 * if they fit into the base toUnicode table but not into the
695 * base fromUnicode table.
696 * (Especially for UTF-8-friendly fromUnicode tables.)
697 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
698 * to be excluded from the extension toUnicode data.
699 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
700 * the base fromUnicode table.
702 ucm_moveMappings(data
->ucm
->base
, data
->ucm
->ext
);
703 ucm_sortTable(data
->ucm
->ext
);
704 if(data
->ucm
->ext
->mappingsLength
>0) {
705 /* prepare the extension table, if there is one */
706 data
->extData
=CnvExtOpen(data
->ucm
);
707 if(data
->extData
==NULL
) {
708 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
710 !data
->extData
->addTable(data
->extData
, data
->ucm
->ext
, &data
->staticData
)
712 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
718 /* Build an extension-only .cnv file. */
719 char baseFilename
[500];
722 initConvData(&baseData
);
724 /* assemble a path/filename for data->ucm->baseName */
725 uprv_strcpy(baseFilename
, converterName
);
726 basename
=(char *)findBasename(baseFilename
);
727 uprv_strcpy(basename
, data
->ucm
->baseName
);
728 uprv_strcat(basename
, ".ucm");
730 /* read the base table */
731 dataIsBase
=readFile(&baseData
, baseFilename
, pErrorCode
);
732 if(U_FAILURE(*pErrorCode
)) {
734 } else if(!dataIsBase
) {
735 fprintf(stderr
, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename
);
736 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
738 /* prepare the extension table */
739 data
->extData
=CnvExtOpen(data
->ucm
);
740 if(data
->extData
==NULL
) {
741 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
743 /* fill in gaps in extension file header fields */
744 UCMapping
*m
, *mLimit
;
745 uint8_t fallbackFlags
;
747 baseStates
=&baseData
.ucm
->states
;
748 if(states
->conversionType
==UCNV_DBCS
) {
749 staticData
->minBytesPerChar
=(int8_t)(states
->minCharLength
=2);
750 } else if(states
->minCharLength
==0) {
751 staticData
->minBytesPerChar
=(int8_t)(states
->minCharLength
=baseStates
->minCharLength
);
753 if(states
->maxCharLength
<states
->minCharLength
) {
754 staticData
->maxBytesPerChar
=(int8_t)(states
->maxCharLength
=baseStates
->maxCharLength
);
757 if(staticData
->subCharLen
==0) {
758 uprv_memcpy(staticData
->subChar
, baseData
.staticData
.subChar
, 4);
759 staticData
->subCharLen
=baseData
.staticData
.subCharLen
;
762 * do not copy subChar1 -
763 * only use what is explicitly specified
764 * because it cannot be unset in the extension file header
767 /* get the fallback flags */
769 for(m
=baseData
.ucm
->base
->mappings
, mLimit
=m
+baseData
.ucm
->base
->mappingsLength
;
770 m
<mLimit
&& fallbackFlags
!=3;
780 if(fallbackFlags
&1) {
781 staticData
->hasFromUnicodeFallback
=TRUE
;
783 if(fallbackFlags
&2) {
784 staticData
->hasToUnicodeFallback
=TRUE
;
787 if(1!=ucm_countChars(baseStates
, staticData
->subChar
, staticData
->subCharLen
)) {
788 fprintf(stderr
, " the substitution character byte sequence is illegal in this codepage structure!\n");
789 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
791 } else if(staticData
->subChar1
!=0 && 1!=ucm_countChars(baseStates
, &staticData
->subChar1
, 1)) {
792 fprintf(stderr
, " the subchar1 byte is illegal in this codepage structure!\n");
793 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
796 !ucm_checkValidity(data
->ucm
->ext
, baseStates
) ||
797 !ucm_checkBaseExt(baseStates
, baseData
.ucm
->base
, data
->ucm
->ext
, data
->ucm
->ext
, FALSE
)
799 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
801 if(states
->maxCharLength
>1) {
803 * When building a normal .cnv file with a base table
804 * for an MBCS (not SBCS) table with explicit precision flags,
805 * the MBCSAddTable() function marks some mappings for moving
806 * to the extension table.
807 * They fit into the base toUnicode table but not into the
808 * base fromUnicode table.
809 * (Note: We do have explicit precision flags because they are
810 * required for extension table generation, and
811 * ucm_checkBaseExt() verified it.)
813 * We do not call MBCSAddTable() here (we probably could)
814 * so we need to do the analysis before building the extension table.
815 * We assume that MBCSAddTable() will build a UTF-8-friendly table.
816 * Redundant mappings in the extension table are ok except they cost some size.
818 * Do this after ucm_checkBaseExt().
820 const MBCSData
*mbcsData
=MBCSGetDummy();
822 for(m
=baseData
.ucm
->base
->mappings
, mLimit
=m
+baseData
.ucm
->base
->mappingsLength
;
826 if(!MBCSOkForBaseFromUnicode(mbcsData
, m
->b
.bytes
, m
->bLen
, m
->u
, m
->f
)) {
827 m
->f
|=MBCS_FROM_U_EXT_FLAG
;
828 m
->moveFlag
=UCM_MOVE_TO_EXT
;
834 ucm_moveMappings(baseData
.ucm
->base
, data
->ucm
->ext
);
835 ucm_sortTable(data
->ucm
->ext
);
838 if(!data
->extData
->addTable(data
->extData
, data
->ucm
->ext
, &data
->staticData
)) {
839 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
845 cleanupConvData(&baseData
);
850 * Hey, Emacs, please set the following:
853 * indent-tabs-mode: nil