2 ********************************************************************************
4 * Copyright (C) 1998-2015, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ********************************************************************************
11 * tool creating a binary (compressed) representation of the conversion mapping
12 * table (IBM NLTC ucmap format).
14 * 05/04/2000 helena Added fallback mapping into the picture...
15 * 06/29/2000 helena Major rewrite of the callback APIs.
19 #include "unicode/putil.h"
20 #include "unicode/ucnv_err.h"
31 #include "unicode/udata.h"
40 typedef struct ConvData
{
42 NewConverter
*cnvData
, *extData
;
43 UConverterSharedData sharedData
;
44 UConverterStaticData staticData
;
48 initConvData(ConvData
*data
) {
49 uprv_memset(data
, 0, sizeof(ConvData
));
50 data
->sharedData
.structSize
=sizeof(UConverterSharedData
);
51 data
->staticData
.structSize
=sizeof(UConverterStaticData
);
52 data
->sharedData
.staticData
=&data
->staticData
;
56 cleanupConvData(ConvData
*data
) {
58 if(data
->cnvData
!=NULL
) {
59 data
->cnvData
->close(data
->cnvData
);
62 if(data
->extData
!=NULL
) {
63 data
->extData
->close(data
->extData
);
72 * from ucnvstat.c - static prototypes of data-based converters
74 U_CAPI
const UConverterStaticData
* ucnv_converterStaticData
[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
];
79 UBool VERBOSE
= FALSE
;
82 UBool IGNORE_SISO_CHECK
= FALSE
;
85 createConverter(ConvData
*data
, const char* converterName
, UErrorCode
*pErrorCode
);
88 * Set up the UNewData and write the converter..
91 writeConverterData(ConvData
*data
, const char *cnvName
, const char *cnvDir
, UErrorCode
*status
);
93 UBool haveCopyright
=TRUE
;
95 static UDataInfo dataInfo
={
104 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */
105 {6, 2, 0, 0}, /* formatVersion */
106 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */
110 writeConverterData(ConvData
*data
, const char *cnvName
, const char *cnvDir
, UErrorCode
*status
)
112 UNewDataMemory
*mem
= NULL
;
117 if(U_FAILURE(*status
))
122 tableType
=TABLE_NONE
;
123 if(data
->cnvData
!=NULL
) {
124 tableType
|=TABLE_BASE
;
126 if(data
->extData
!=NULL
) {
127 tableType
|=TABLE_EXT
;
130 mem
= udata_create(cnvDir
, "cnv", cnvName
, &dataInfo
, haveCopyright
? U_COPYRIGHT_STRING
: NULL
, status
);
132 if(U_FAILURE(*status
))
134 fprintf(stderr
, "Couldn't create the udata %s.%s: %s\n",
137 u_errorName(*status
));
143 printf("- Opened udata %s.%s\n", cnvName
, "cnv");
147 /* all read only, clean, platform independent data. Mmmm. :) */
148 udata_writeBlock(mem
, &data
->staticData
, sizeof(UConverterStaticData
));
149 size
+= sizeof(UConverterStaticData
); /* Is 4-aligned - by size */
150 /* Now, write the table */
151 if(tableType
&TABLE_BASE
) {
152 size
+= data
->cnvData
->write(data
->cnvData
, &data
->staticData
, mem
, tableType
);
154 if(tableType
&TABLE_EXT
) {
155 size
+= data
->extData
->write(data
->extData
, &data
->staticData
, mem
, tableType
);
158 sz2
= udata_finish(mem
, status
);
161 fprintf(stderr
, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2
, (int)size
);
162 *status
=U_INTERNAL_PROGRAM_ERROR
;
166 printf("- Wrote %u bytes to the udata.\n", (int)sz2
);
172 OPT_HELP_QUESTION_MARK
,
178 OPT_IGNORE_SISO_CHECK
,
184 static UOption options
[]={
186 UOPTION_HELP_QUESTION_MARK
,
191 { "small", NULL
, NULL
, NULL
, '\1', UOPT_NO_ARG
, 0 },
192 { "ignore-siso-check", NULL
, NULL
, NULL
, '\1', UOPT_NO_ARG
, 0 },
196 int main(int argc
, char* argv
[])
199 char cnvName
[UCNV_MAX_FULL_FILE_NAME_LENGTH
];
201 U_MAIN_INIT_ARGS(argc
, argv
);
203 /* Set up the ICU version number */
204 UVersionInfo icuVersion
;
205 u_getVersion(icuVersion
);
206 uprv_memcpy(&dataInfo
.dataVersion
, &icuVersion
, sizeof(UVersionInfo
));
208 /* preset then read command line options */
209 options
[OPT_DESTDIR
].value
=u_getDataDirectory();
210 argc
=u_parseArgs(argc
, argv
, UPRV_LENGTHOF(options
), options
);
212 /* error handling, printing usage message */
215 "error in command line argument \"%s\"\n",
220 if(argc
<0 || options
[OPT_HELP_H
].doesOccur
|| options
[OPT_HELP_QUESTION_MARK
].doesOccur
) {
221 FILE *stdfile
=argc
<0 ? stderr
: stdout
;
223 "usage: %s [-options] files...\n"
224 "\tread .ucm codepage mapping files and write .cnv files\n"
226 "\t-h or -? or --help this usage text\n"
227 "\t-V or --version show a version message\n"
228 "\t-c or --copyright include a copyright notice\n"
229 "\t-d or --destdir destination directory, followed by the path\n"
230 "\t-v or --verbose Turn on verbose output\n"
231 "\t-q or --quiet do not display warnings and progress\n",
234 "\t --small Generate smaller .cnv files. They will be\n"
235 "\t significantly smaller but may not be compatible with\n"
236 "\t older versions of ICU and will require heap memory\n"
237 "\t allocation when loaded.\n"
238 "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n");
239 return argc
<0 ? U_ILLEGAL_ARGUMENT_ERROR
: U_ZERO_ERROR
;
242 if(options
[OPT_VERSION
].doesOccur
) {
243 printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
244 dataInfo
.formatVersion
[0], dataInfo
.formatVersion
[1]);
245 printf("%s\n", U_COPYRIGHT_STRING
);
249 /* get the options values */
250 haveCopyright
= options
[OPT_COPYRIGHT
].doesOccur
;
251 const char *destdir
= options
[OPT_DESTDIR
].value
;
252 VERBOSE
= options
[OPT_VERBOSE
].doesOccur
;
253 QUIET
= options
[OPT_QUIET
].doesOccur
;
254 SMALL
= options
[OPT_SMALL
].doesOccur
;
256 if (options
[OPT_IGNORE_SISO_CHECK
].doesOccur
) {
257 IGNORE_SISO_CHECK
= TRUE
;
260 icu::CharString outFileName
;
261 UErrorCode err
= U_ZERO_ERROR
;
262 if (destdir
!= NULL
&& *destdir
!= 0) {
263 outFileName
.append(destdir
, err
).ensureEndsWithFileSeparator(err
);
264 if (U_FAILURE(err
)) {
268 int32_t outBasenameStart
= outFileName
.length();
273 printf("makeconv: processing %d files...\n", argc
- 1);
274 for(i
=1; i
<argc
; ++i
) {
275 printf("%s ", argv
[i
]);
282 UBool printFilename
= (UBool
) (argc
> 2 || VERBOSE
);
283 for (++argv
; --argc
; ++argv
)
285 UErrorCode localError
= U_ZERO_ERROR
;
286 const char *arg
= getLongPathname(*argv
);
288 /*produces the right destination path for display*/
289 outFileName
.truncate(outBasenameStart
);
290 if (outBasenameStart
!= 0)
292 /* find the last file sepator */
293 const char *basename
= findBasename(arg
);
294 outFileName
.append(basename
, localError
);
298 outFileName
.append(arg
, localError
);
300 if (U_FAILURE(localError
)) {
304 /*removes the extension if any is found*/
305 int32_t lastDotIndex
= outFileName
.lastIndexOf('.');
306 if (lastDotIndex
>= outBasenameStart
) {
307 outFileName
.truncate(lastDotIndex
);
310 /* the basename without extension is the converter name */
311 if ((outFileName
.length() - outBasenameStart
) >= UPRV_LENGTHOF(cnvName
)) {
312 fprintf(stderr
, "converter name %s too long\n", outFileName
.data() + outBasenameStart
);
313 return U_BUFFER_OVERFLOW_ERROR
;
315 uprv_strcpy(cnvName
, outFileName
.data() + outBasenameStart
);
317 /*Adds the target extension*/
318 outFileName
.append(CONVERTER_FILE_EXTENSION
, localError
);
319 if (U_FAILURE(localError
)) {
324 printf("makeconv: processing %s ...\n", arg
);
328 createConverter(&data
, arg
, &localError
);
330 if (U_FAILURE(localError
))
332 /* if an error is found, print out an error msg and keep going */
333 fprintf(stderr
, "Error creating converter for \"%s\" file for \"%s\" (%s)\n",
334 outFileName
.data(), arg
, u_errorName(localError
));
341 /* Insure the static data name matches the file name */
342 /* Changed to ignore directory and only compare base name
345 p
= strrchr(cnvName
, U_FILE_SEP_CHAR
); /* Find last file separator */
347 if(p
== NULL
) /* OK, try alternate */
349 p
= strrchr(cnvName
, U_FILE_ALT_SEP_CHAR
);
352 p
=cnvName
; /* If no separators, no problem */
357 p
++; /* If found separator, don't include it in compare */
359 if(uprv_stricmp(p
,data
.staticData
.name
) && !QUIET
)
361 fprintf(stderr
, "Warning: %s%s claims to be '%s'\n",
362 cnvName
, CONVERTER_FILE_EXTENSION
,
363 data
.staticData
.name
);
366 uprv_strcpy((char*)data
.staticData
.name
, cnvName
);
368 if(!uprv_isInvariantString((char*)data
.staticData
.name
, -1)) {
370 "Error: A converter name must contain only invariant characters.\n"
371 "%s is not a valid converter name.\n",
372 data
.staticData
.name
);
374 err
= U_INVALID_TABLE_FORMAT
;
378 localError
= U_ZERO_ERROR
;
379 writeConverterData(&data
, cnvName
, destdir
, &localError
);
381 if(U_FAILURE(localError
))
383 /* if an error is found, print out an error msg and keep going*/
384 fprintf(stderr
, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName
.data(), arg
,
385 u_errorName(localError
));
390 else if (printFilename
)
392 puts(outFileName
.data() + outBasenameStart
);
398 cleanupConvData(&data
);
405 getPlatformAndCCSIDFromName(const char *name
, int8_t *pPlatform
, int32_t *pCCSID
) {
406 if( (name
[0]=='i' || name
[0]=='I') &&
407 (name
[1]=='b' || name
[1]=='B') &&
408 (name
[2]=='m' || name
[2]=='M')
415 *pCCSID
=(int32_t)uprv_strtoul(name
, NULL
, 10);
417 *pPlatform
=UCNV_UNKNOWN
;
423 readHeader(ConvData
*data
,
424 FileStream
* convFile
,
425 UErrorCode
*pErrorCode
) {
427 char *s
, *key
, *value
;
428 const UConverterStaticData
*prototype
;
429 UConverterStaticData
*staticData
;
431 if(U_FAILURE(*pErrorCode
)) {
435 staticData
=&data
->staticData
;
436 staticData
->platform
=UCNV_IBM
;
437 staticData
->subCharLen
=0;
439 while(T_FileStream_readLine(convFile
, line
, sizeof(line
))) {
440 /* basic parsing and handling of state-related items */
441 if(ucm_parseHeaderLine(data
->ucm
, line
, &key
, &value
)) {
445 /* stop at the beginning of the mapping section */
446 if(uprv_strcmp(line
, "CHARMAP")==0) {
450 /* collect the information from the header field, ignore unknown keys */
451 if(uprv_strcmp(key
, "code_set_name")==0) {
453 uprv_strcpy((char *)staticData
->name
, value
);
454 getPlatformAndCCSIDFromName(value
, &staticData
->platform
, &staticData
->codepage
);
456 } else if(uprv_strcmp(key
, "subchar")==0) {
457 uint8_t bytes
[UCNV_EXT_MAX_BYTES
];
461 length
=ucm_parseBytes(bytes
, line
, (const char **)&s
);
462 if(1<=length
&& length
<=4 && *s
==0) {
463 staticData
->subCharLen
=length
;
464 uprv_memcpy(staticData
->subChar
, bytes
, length
);
466 fprintf(stderr
, "error: illegal <subchar> %s\n", value
);
467 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
470 } else if(uprv_strcmp(key
, "subchar1")==0) {
471 uint8_t bytes
[UCNV_EXT_MAX_BYTES
];
474 if(1==ucm_parseBytes(bytes
, line
, (const char **)&s
) && *s
==0) {
475 staticData
->subChar1
=bytes
[0];
477 fprintf(stderr
, "error: illegal <subchar1> %s\n", value
);
478 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
484 /* copy values from the UCMFile to the static data */
485 staticData
->maxBytesPerChar
=(int8_t)data
->ucm
->states
.maxCharLength
;
486 staticData
->minBytesPerChar
=(int8_t)data
->ucm
->states
.minCharLength
;
487 staticData
->conversionType
=data
->ucm
->states
.conversionType
;
489 if(staticData
->conversionType
==UCNV_UNSUPPORTED_CONVERTER
) {
490 fprintf(stderr
, "ucm error: missing conversion type (<uconv_class>)\n");
491 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
496 * Now that we know the type, copy any 'default' values from the table.
497 * We need not check the type any further because the parser only
498 * recognizes what we have prototypes for.
500 * For delta (extension-only) tables, copy values from the base file
501 * instead, see createConverter().
503 if(data
->ucm
->baseName
[0]==0) {
504 prototype
=ucnv_converterStaticData
[staticData
->conversionType
];
505 if(prototype
!=NULL
) {
506 if(staticData
->name
[0]==0) {
507 uprv_strcpy((char *)staticData
->name
, prototype
->name
);
510 if(staticData
->codepage
==0) {
511 staticData
->codepage
=prototype
->codepage
;
514 if(staticData
->platform
==0) {
515 staticData
->platform
=prototype
->platform
;
518 if(staticData
->minBytesPerChar
==0) {
519 staticData
->minBytesPerChar
=prototype
->minBytesPerChar
;
522 if(staticData
->maxBytesPerChar
==0) {
523 staticData
->maxBytesPerChar
=prototype
->maxBytesPerChar
;
526 if(staticData
->subCharLen
==0) {
527 staticData
->subCharLen
=prototype
->subCharLen
;
528 if(prototype
->subCharLen
>0) {
529 uprv_memcpy(staticData
->subChar
, prototype
->subChar
, prototype
->subCharLen
);
535 if(data
->ucm
->states
.outputType
<0) {
536 data
->ucm
->states
.outputType
=(int8_t)data
->ucm
->states
.maxCharLength
-1;
539 if( staticData
->subChar1
!=0 &&
540 (staticData
->minBytesPerChar
>1 ||
541 (staticData
->conversionType
!=UCNV_MBCS
&&
542 staticData
->conversionType
!=UCNV_EBCDIC_STATEFUL
))
544 fprintf(stderr
, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
545 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
549 /* return TRUE if a base table was read, FALSE for an extension table */
551 readFile(ConvData
*data
, const char* converterName
,
552 UErrorCode
*pErrorCode
) {
555 FileStream
*convFile
;
557 UCMStates
*baseStates
;
560 if(U_FAILURE(*pErrorCode
)) {
564 data
->ucm
=ucm_open();
566 convFile
=T_FileStream_open(converterName
, "r");
568 *pErrorCode
=U_FILE_ACCESS_ERROR
;
572 readHeader(data
, convFile
, pErrorCode
);
573 if(U_FAILURE(*pErrorCode
)) {
577 if(data
->ucm
->baseName
[0]==0) {
579 baseStates
=&data
->ucm
->states
;
580 ucm_processStates(baseStates
, IGNORE_SISO_CHECK
);
586 /* read the base table */
587 ucm_readTable(data
->ucm
, convFile
, dataIsBase
, baseStates
, pErrorCode
);
588 if(U_FAILURE(*pErrorCode
)) {
592 /* read an extension table if there is one */
593 while(T_FileStream_readLine(convFile
, line
, sizeof(line
))) {
594 end
=uprv_strchr(line
, 0);
596 (*(end
-1)=='\n' || *(end
-1)=='\r' || *(end
-1)==' ' || *(end
-1)=='\t')) {
601 if(line
[0]=='#' || u_skipWhitespace(line
)==end
) {
602 continue; /* ignore empty and comment lines */
605 if(0==uprv_strcmp(line
, "CHARMAP")) {
606 /* read the extension table */
607 ucm_readTable(data
->ucm
, convFile
, FALSE
, baseStates
, pErrorCode
);
609 fprintf(stderr
, "unexpected text after the base mapping table\n");
614 T_FileStream_close(convFile
);
616 if(data
->ucm
->base
->flagsType
==UCM_FLAGS_MIXED
|| data
->ucm
->ext
->flagsType
==UCM_FLAGS_MIXED
) {
617 fprintf(stderr
, "error: some entries have the mapping precision (with '|'), some do not\n");
618 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
625 createConverter(ConvData
*data
, const char *converterName
, UErrorCode
*pErrorCode
) {
629 UConverterStaticData
*staticData
;
630 UCMStates
*states
, *baseStates
;
632 if(U_FAILURE(*pErrorCode
)) {
638 dataIsBase
=readFile(data
, converterName
, pErrorCode
);
639 if(U_FAILURE(*pErrorCode
)) {
643 staticData
=&data
->staticData
;
644 states
=&data
->ucm
->states
;
648 * Build a normal .cnv file with a base table
649 * and an optional extension table.
651 data
->cnvData
=MBCSOpen(data
->ucm
);
652 if(data
->cnvData
==NULL
) {
653 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
655 } else if(!data
->cnvData
->isValid(data
->cnvData
,
656 staticData
->subChar
, staticData
->subCharLen
)
658 fprintf(stderr
, " the substitution character byte sequence is illegal in this codepage structure!\n");
659 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
661 } else if(staticData
->subChar1
!=0 &&
662 !data
->cnvData
->isValid(data
->cnvData
, &staticData
->subChar1
, 1)
664 fprintf(stderr
, " the subchar1 byte is illegal in this codepage structure!\n");
665 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
668 data
->ucm
->ext
->mappingsLength
>0 &&
669 !ucm_checkBaseExt(states
, data
->ucm
->base
, data
->ucm
->ext
, data
->ucm
->ext
, FALSE
)
671 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
672 } else if(data
->ucm
->base
->flagsType
&UCM_FLAGS_EXPLICIT
) {
673 /* sort the table so that it can be turned into UTF-8-friendly data */
674 ucm_sortTable(data
->ucm
->base
);
677 if(U_SUCCESS(*pErrorCode
)) {
679 /* add the base table after ucm_checkBaseExt()! */
680 !data
->cnvData
->addTable(data
->cnvData
, data
->ucm
->base
, &data
->staticData
)
682 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
685 * addTable() may have requested moving more mappings to the extension table
686 * if they fit into the base toUnicode table but not into the
687 * base fromUnicode table.
688 * (Especially for UTF-8-friendly fromUnicode tables.)
689 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
690 * to be excluded from the extension toUnicode data.
691 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
692 * the base fromUnicode table.
694 ucm_moveMappings(data
->ucm
->base
, data
->ucm
->ext
);
695 ucm_sortTable(data
->ucm
->ext
);
696 if(data
->ucm
->ext
->mappingsLength
>0) {
697 /* prepare the extension table, if there is one */
698 data
->extData
=CnvExtOpen(data
->ucm
);
699 if(data
->extData
==NULL
) {
700 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
702 !data
->extData
->addTable(data
->extData
, data
->ucm
->ext
, &data
->staticData
)
704 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
710 /* Build an extension-only .cnv file. */
711 char baseFilename
[500];
714 initConvData(&baseData
);
716 /* assemble a path/filename for data->ucm->baseName */
717 uprv_strcpy(baseFilename
, converterName
);
718 basename
=(char *)findBasename(baseFilename
);
719 uprv_strcpy(basename
, data
->ucm
->baseName
);
720 uprv_strcat(basename
, ".ucm");
722 /* read the base table */
723 dataIsBase
=readFile(&baseData
, baseFilename
, pErrorCode
);
724 if(U_FAILURE(*pErrorCode
)) {
726 } else if(!dataIsBase
) {
727 fprintf(stderr
, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename
);
728 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
730 /* prepare the extension table */
731 data
->extData
=CnvExtOpen(data
->ucm
);
732 if(data
->extData
==NULL
) {
733 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
735 /* fill in gaps in extension file header fields */
736 UCMapping
*m
, *mLimit
;
737 uint8_t fallbackFlags
;
739 baseStates
=&baseData
.ucm
->states
;
740 if(states
->conversionType
==UCNV_DBCS
) {
741 staticData
->minBytesPerChar
=(int8_t)(states
->minCharLength
=2);
742 } else if(states
->minCharLength
==0) {
743 staticData
->minBytesPerChar
=(int8_t)(states
->minCharLength
=baseStates
->minCharLength
);
745 if(states
->maxCharLength
<states
->minCharLength
) {
746 staticData
->maxBytesPerChar
=(int8_t)(states
->maxCharLength
=baseStates
->maxCharLength
);
749 if(staticData
->subCharLen
==0) {
750 uprv_memcpy(staticData
->subChar
, baseData
.staticData
.subChar
, 4);
751 staticData
->subCharLen
=baseData
.staticData
.subCharLen
;
754 * do not copy subChar1 -
755 * only use what is explicitly specified
756 * because it cannot be unset in the extension file header
759 /* get the fallback flags */
761 for(m
=baseData
.ucm
->base
->mappings
, mLimit
=m
+baseData
.ucm
->base
->mappingsLength
;
762 m
<mLimit
&& fallbackFlags
!=3;
772 if(fallbackFlags
&1) {
773 staticData
->hasFromUnicodeFallback
=TRUE
;
775 if(fallbackFlags
&2) {
776 staticData
->hasToUnicodeFallback
=TRUE
;
779 if(1!=ucm_countChars(baseStates
, staticData
->subChar
, staticData
->subCharLen
)) {
780 fprintf(stderr
, " the substitution character byte sequence is illegal in this codepage structure!\n");
781 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
783 } else if(staticData
->subChar1
!=0 && 1!=ucm_countChars(baseStates
, &staticData
->subChar1
, 1)) {
784 fprintf(stderr
, " the subchar1 byte is illegal in this codepage structure!\n");
785 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
788 !ucm_checkValidity(data
->ucm
->ext
, baseStates
) ||
789 !ucm_checkBaseExt(baseStates
, baseData
.ucm
->base
, data
->ucm
->ext
, data
->ucm
->ext
, FALSE
)
791 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
793 if(states
->maxCharLength
>1) {
795 * When building a normal .cnv file with a base table
796 * for an MBCS (not SBCS) table with explicit precision flags,
797 * the MBCSAddTable() function marks some mappings for moving
798 * to the extension table.
799 * They fit into the base toUnicode table but not into the
800 * base fromUnicode table.
801 * (Note: We do have explicit precision flags because they are
802 * required for extension table generation, and
803 * ucm_checkBaseExt() verified it.)
805 * We do not call MBCSAddTable() here (we probably could)
806 * so we need to do the analysis before building the extension table.
807 * We assume that MBCSAddTable() will build a UTF-8-friendly table.
808 * Redundant mappings in the extension table are ok except they cost some size.
810 * Do this after ucm_checkBaseExt().
812 const MBCSData
*mbcsData
=MBCSGetDummy();
814 for(m
=baseData
.ucm
->base
->mappings
, mLimit
=m
+baseData
.ucm
->base
->mappingsLength
;
818 if(!MBCSOkForBaseFromUnicode(mbcsData
, m
->b
.bytes
, m
->bLen
, m
->u
, m
->f
)) {
819 m
->f
|=MBCS_FROM_U_EXT_FLAG
;
820 m
->moveFlag
=UCM_MOVE_TO_EXT
;
826 ucm_moveMappings(baseData
.ucm
->base
, data
->ucm
->ext
);
827 ucm_sortTable(data
->ucm
->ext
);
830 if(!data
->extData
->addTable(data
->extData
, data
->ucm
->ext
, &data
->staticData
)) {
831 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
837 cleanupConvData(&baseData
);
842 * Hey, Emacs, please set the following:
845 * indent-tabs-mode: nil