2 ********************************************************************************
4 * Copyright (C) 1998-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ********************************************************************************
11 * tool creating a binary (compressed) representation of the conversion mapping
12 * table (IBM NLTC ucmap format).
14 * 05/04/2000 helena Added fallback mapping into the picture...
15 * 06/29/2000 helena Major rewrite of the callback APIs.
19 #include "unicode/putil.h"
21 #include "unicode/ucnv_err.h"
31 #include "unicode/udata.h"
40 typedef struct ConvData
{
42 NewConverter
*cnvData
, *extData
;
43 UConverterSharedData sharedData
;
44 UConverterStaticData staticData
;
48 initConvData(ConvData
*data
) {
49 uprv_memset(data
, 0, sizeof(ConvData
));
50 data
->sharedData
.structSize
=sizeof(UConverterSharedData
);
51 data
->staticData
.structSize
=sizeof(UConverterStaticData
);
52 data
->sharedData
.staticData
=&data
->staticData
;
56 cleanupConvData(ConvData
*data
) {
58 if(data
->cnvData
!=NULL
) {
59 data
->cnvData
->close(data
->cnvData
);
62 if(data
->extData
!=NULL
) {
63 data
->extData
->close(data
->extData
);
72 * from ucnvstat.c - static prototypes of data-based converters
74 extern const UConverterStaticData
* ucnv_converterStaticData
[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
];
79 UBool VERBOSE
= FALSE
;
80 UBool TOUCHFILE
= FALSE
;
83 createConverter(ConvData
*data
, const char* converterName
, UErrorCode
*pErrorCode
);
86 * Set up the UNewData and write the converter..
89 writeConverterData(ConvData
*data
, const char *cnvName
, const char *cnvDir
, UErrorCode
*status
);
91 UBool haveCopyright
=TRUE
;
93 static UDataInfo dataInfo
={
102 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */
103 {6, 2, 0, 0}, /* formatVersion */
104 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */
108 writeConverterData(ConvData
*data
, const char *cnvName
, const char *cnvDir
, UErrorCode
*status
)
110 UNewDataMemory
*mem
= NULL
;
115 if(U_FAILURE(*status
))
120 tableType
=TABLE_NONE
;
121 if(data
->cnvData
!=NULL
) {
122 tableType
|=TABLE_BASE
;
124 if(data
->extData
!=NULL
) {
125 tableType
|=TABLE_EXT
;
128 mem
= udata_create(cnvDir
, "cnv", cnvName
, &dataInfo
, haveCopyright
? U_COPYRIGHT_STRING
: NULL
, status
);
130 if(U_FAILURE(*status
))
132 fprintf(stderr
, "Couldn't create the udata %s.%s: %s\n",
135 u_errorName(*status
));
141 fprintf(stderr
, "- Opened udata %s.%s\n", cnvName
, "cnv");
145 /* all read only, clean, platform independent data. Mmmm. :) */
146 udata_writeBlock(mem
, &data
->staticData
, sizeof(UConverterStaticData
));
147 size
+= sizeof(UConverterStaticData
); /* Is 4-aligned - by size */
148 /* Now, write the table */
149 if(tableType
&TABLE_BASE
) {
150 size
+= data
->cnvData
->write(data
->cnvData
, &data
->staticData
, mem
, tableType
);
152 if(tableType
&TABLE_EXT
) {
153 size
+= data
->extData
->write(data
->extData
, &data
->staticData
, mem
, tableType
);
156 sz2
= udata_finish(mem
, status
);
159 fprintf(stderr
, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2
, (int)size
);
160 *status
=U_INTERNAL_PROGRAM_ERROR
;
164 fprintf(stderr
, "- Wrote %u bytes to the udata.\n", (int)sz2
);
168 static UOption options
[]={
169 UOPTION_HELP_H
, /* 0 Numbers for those who*/
170 UOPTION_HELP_QUESTION_MARK
, /* 1 can't count. */
171 UOPTION_COPYRIGHT
, /* 2 */
172 UOPTION_VERSION
, /* 3 */
173 UOPTION_DESTDIR
, /* 4 */
174 UOPTION_VERBOSE
, /* 5 */
175 UOPTION_PACKAGE_NAME
, /* 6 */
176 UOPTION_DEF( "touchfile", 't', UOPT_NO_ARG
) /* 7 */
179 int main(int argc
, char* argv
[])
182 UErrorCode err
= U_ZERO_ERROR
, localError
;
183 char outFileName
[UCNV_MAX_FULL_FILE_NAME_LENGTH
];
184 char touchFileName
[UCNV_MAX_FULL_FILE_NAME_LENGTH
];
185 const char* destdir
, *arg
;
186 const char *pkgName
= NULL
;
188 char* dot
= NULL
, *outBasename
;
189 char cnvName
[UCNV_MAX_FULL_FILE_NAME_LENGTH
];
190 char cnvNameWithPkg
[UCNV_MAX_FULL_FILE_NAME_LENGTH
];
191 UVersionInfo icuVersion
;
196 U_MAIN_INIT_ARGS(argc
, argv
);
198 /* Set up the ICU version number */
199 u_getVersion(icuVersion
);
200 uprv_memcpy(&dataInfo
.dataVersion
, &icuVersion
, sizeof(UVersionInfo
));
202 /* preset then read command line options */
203 options
[4].value
=u_getDataDirectory();
204 argc
=u_parseArgs(argc
, argv
, sizeof(options
)/sizeof(options
[0]), options
);
206 /* error handling, printing usage message */
209 "error in command line argument \"%s\"\n",
214 if(argc
<0 || options
[0].doesOccur
|| options
[1].doesOccur
) {
216 "usage: %s [-options] files...\n"
217 "\tread .ucm codepage mapping files and write .cnv files\n"
219 "\t-h or -? or --help this usage text\n"
220 "\t-V or --version show a version message\n"
221 "\t-c or --copyright include a copyright notice\n"
222 "\t-d or --destdir destination directory, followed by the path\n"
223 "\t-v or --verbose Turn on verbose output\n",
226 "\t-p or --pkgname sets the 'package' name for output files.\n"
227 "\t If name is ICUDATA, then the default icu package\n"
228 "\t name will be used.\n"
229 "\t-t or --touchfile Generate additional small file without packagename, for nmake\n");
230 return argc
<0 ? U_ILLEGAL_ARGUMENT_ERROR
: U_ZERO_ERROR
;
233 if(options
[3].doesOccur
) {
234 fprintf(stderr
,"makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
235 dataInfo
.formatVersion
[0], dataInfo
.formatVersion
[1]);
236 fprintf(stderr
, "Copyright (C) 1998-2000, International Business Machines\n");
237 fprintf(stderr
,"Corporation and others. All Rights Reserved.\n");
241 TOUCHFILE
= options
[7].doesOccur
;
243 if(!options
[6].doesOccur
)
249 pkgName
=options
[6].value
;
250 if(!strcmp(pkgName
, "ICUDATA"))
252 pkgName
= U_ICUDATA_NAME
;
260 fprintf(stderr
, "%s: Don't use touchfile option with an empty packagename.\n",
267 /* get the options values */
268 haveCopyright
= options
[2].doesOccur
;
269 destdir
= options
[4].value
;
270 VERBOSE
= options
[5].doesOccur
;
272 if (destdir
!= NULL
&& *destdir
!= 0) {
273 uprv_strcpy(outFileName
, destdir
);
274 destdirlen
= uprv_strlen(destdir
);
275 outBasename
= outFileName
+ destdirlen
;
276 if (*(outBasename
- 1) != U_FILE_SEP_CHAR
) {
277 *outBasename
++ = U_FILE_SEP_CHAR
;
282 outBasename
= outFileName
;
288 printf("makeconv: processing %d files...\n", argc
- 1);
289 for(i
=1; i
<argc
; ++i
) {
290 printf("%s ", argv
[i
]);
298 printFilename
= (UBool
) (argc
> 2 || VERBOSE
);
299 for (++argv
; --argc
; ++argv
)
301 arg
= getLongPathname(*argv
);
303 /*produces the right destination path for display*/
306 const char *basename
;
308 /* find the last file sepator */
309 basename
= findBasename(arg
);
310 uprv_strcpy(outBasename
, basename
);
314 uprv_strcpy(outFileName
, arg
);
317 /*removes the extension if any is found*/
318 dot
= uprv_strrchr(outBasename
, '.');
324 /* the basename without extension is the converter name */
325 uprv_strcpy(cnvName
, outBasename
);
329 uprv_strcpy(touchFileName
, outBasename
);
330 uprv_strcat(touchFileName
, ".cnv");
335 /* changes both basename and filename */
336 uprv_strcpy(outBasename
, pkgName
);
337 uprv_strcat(outBasename
, "_");
338 uprv_strcat(outBasename
, cnvName
);
342 /*Adds the target extension*/
343 uprv_strcat(outBasename
, CONVERTER_FILE_EXTENSION
);
346 printf("makeconv: processing %s ...\n", arg
);
349 localError
= U_ZERO_ERROR
;
351 createConverter(&data
, arg
, &localError
);
353 if (U_FAILURE(localError
))
355 /* if an error is found, print out an error msg and keep going */
356 fprintf(stderr
, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName
, arg
,
357 u_errorName(localError
));
364 /* Make the static data name equal to the file name */
365 if( /*VERBOSE && */ uprv_stricmp(cnvName
,data
.staticData
.name
))
367 fprintf(stderr
, "Warning: %s%s claims to be '%s'\n",
369 CONVERTER_FILE_EXTENSION
,
370 data
.staticData
.name
);
373 uprv_strcpy((char*)data
.staticData
.name
, cnvName
);
375 if(!uprv_isInvariantString((char*)data
.staticData
.name
, -1)) {
377 "Error: A converter name must contain only invariant characters.\n"
378 "%s is not a valid converter name.\n",
379 data
.staticData
.name
);
381 err
= U_INVALID_TABLE_FORMAT
;
387 uprv_strcpy(cnvNameWithPkg
, cnvName
);
391 uprv_strcpy(cnvNameWithPkg
, pkgName
);
392 uprv_strcat(cnvNameWithPkg
, "_");
393 uprv_strcat(cnvNameWithPkg
, cnvName
);
396 localError
= U_ZERO_ERROR
;
397 writeConverterData(&data
, cnvNameWithPkg
, destdir
, &localError
);
403 sprintf(msg
, "This empty file tells nmake that %s in package %s has been updated.\n",
406 q
= T_FileStream_open(touchFileName
, "w");
409 fprintf(stderr
, "Error writing touchfile \"%s\"\n", touchFileName
);
410 localError
= U_FILE_ACCESS_ERROR
;
415 T_FileStream_write(q
, msg
, (int32_t)uprv_strlen(msg
));
416 T_FileStream_close(q
);
420 if(U_FAILURE(localError
))
422 /* if an error is found, print out an error msg and keep going*/
423 fprintf(stderr
, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName
, arg
,
424 u_errorName(localError
));
429 else if (printFilename
)
437 cleanupConvData(&data
);
444 getPlatformAndCCSIDFromName(const char *name
, int8_t *pPlatform
, int32_t *pCCSID
) {
445 if( (name
[0]=='i' || name
[0]=='I') &&
446 (name
[1]=='b' || name
[1]=='B') &&
447 (name
[2]=='m' || name
[2]=='M')
454 *pCCSID
=(int32_t)uprv_strtoul(name
, NULL
, 10);
456 *pPlatform
=UCNV_UNKNOWN
;
462 readHeader(ConvData
*data
,
463 FileStream
* convFile
,
464 const char* converterName
,
465 UErrorCode
*pErrorCode
) {
467 char *s
, *key
, *value
;
468 const UConverterStaticData
*prototype
;
469 UConverterStaticData
*staticData
;
471 if(U_FAILURE(*pErrorCode
)) {
475 staticData
=&data
->staticData
;
476 staticData
->platform
=UCNV_IBM
;
477 staticData
->subCharLen
=0;
479 while(T_FileStream_readLine(convFile
, line
, sizeof(line
))) {
480 /* basic parsing and handling of state-related items */
481 if(ucm_parseHeaderLine(data
->ucm
, line
, &key
, &value
)) {
485 /* stop at the beginning of the mapping section */
486 if(uprv_strcmp(line
, "CHARMAP")==0) {
490 /* collect the information from the header field, ignore unknown keys */
491 if(uprv_strcmp(key
, "code_set_name")==0) {
493 uprv_strcpy((char *)staticData
->name
, value
);
494 getPlatformAndCCSIDFromName(value
, &staticData
->platform
, &staticData
->codepage
);
496 } else if(uprv_strcmp(key
, "subchar")==0) {
497 uint8_t bytes
[UCNV_EXT_MAX_BYTES
];
501 length
=ucm_parseBytes(bytes
, line
, (const char **)&s
);
502 if(1<=length
&& length
<=4 && *s
==0) {
503 staticData
->subCharLen
=length
;
504 uprv_memcpy(staticData
->subChar
, bytes
, length
);
506 fprintf(stderr
, "error: illegal <subchar> %s\n", value
);
507 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
510 } else if(uprv_strcmp(key
, "subchar1")==0) {
511 uint8_t bytes
[UCNV_EXT_MAX_BYTES
];
514 if(1==ucm_parseBytes(bytes
, line
, (const char **)&s
) && *s
==0) {
515 staticData
->subChar1
=bytes
[0];
517 fprintf(stderr
, "error: illegal <subchar1> %s\n", value
);
518 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
524 /* copy values from the UCMFile to the static data */
525 staticData
->maxBytesPerChar
=(int8_t)data
->ucm
->states
.maxCharLength
;
526 staticData
->minBytesPerChar
=(int8_t)data
->ucm
->states
.minCharLength
;
527 staticData
->conversionType
=data
->ucm
->states
.conversionType
;
529 if(staticData
->conversionType
==UCNV_UNSUPPORTED_CONVERTER
) {
530 fprintf(stderr
, "ucm error: missing conversion type (<uconv_class>)\n");
531 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
536 * Now that we know the type, copy any 'default' values from the table.
537 * We need not check the type any further because the parser only
538 * recognizes what we have prototypes for.
540 * For delta (extension-only) tables, copy values from the base file
541 * instead, see createConverter().
543 if(data
->ucm
->baseName
[0]==0) {
544 prototype
=ucnv_converterStaticData
[staticData
->conversionType
];
545 if(prototype
!=NULL
) {
546 if(staticData
->name
[0]==0) {
547 uprv_strcpy((char *)staticData
->name
, prototype
->name
);
550 if(staticData
->codepage
==0) {
551 staticData
->codepage
=prototype
->codepage
;
554 if(staticData
->platform
==0) {
555 staticData
->platform
=prototype
->platform
;
558 if(staticData
->minBytesPerChar
==0) {
559 staticData
->minBytesPerChar
=prototype
->minBytesPerChar
;
562 if(staticData
->maxBytesPerChar
==0) {
563 staticData
->maxBytesPerChar
=prototype
->maxBytesPerChar
;
566 if(staticData
->subCharLen
==0) {
567 staticData
->subCharLen
=prototype
->subCharLen
;
568 if(prototype
->subCharLen
>0) {
569 uprv_memcpy(staticData
->subChar
, prototype
->subChar
, prototype
->subCharLen
);
575 if(data
->ucm
->states
.outputType
<0) {
576 data
->ucm
->states
.outputType
=(int8_t)data
->ucm
->states
.maxCharLength
-1;
579 if( staticData
->subChar1
!=0 &&
580 (staticData
->minBytesPerChar
>1 ||
581 (staticData
->conversionType
!=UCNV_MBCS
&&
582 staticData
->conversionType
!=UCNV_EBCDIC_STATEFUL
))
584 fprintf(stderr
, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
585 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
589 /* return TRUE if a base table was read, FALSE for an extension table */
591 readFile(ConvData
*data
, const char* converterName
,
592 UErrorCode
*pErrorCode
) {
595 FileStream
*convFile
;
597 UCMStates
*baseStates
;
600 if(U_FAILURE(*pErrorCode
)) {
604 data
->ucm
=ucm_open();
606 convFile
=T_FileStream_open(converterName
, "r");
608 *pErrorCode
=U_FILE_ACCESS_ERROR
;
612 readHeader(data
, convFile
, converterName
, pErrorCode
);
613 if(U_FAILURE(*pErrorCode
)) {
617 if(data
->ucm
->baseName
[0]==0) {
619 baseStates
=&data
->ucm
->states
;
620 ucm_processStates(baseStates
);
626 /* read the base table */
627 ucm_readTable(data
->ucm
, convFile
, dataIsBase
, baseStates
, pErrorCode
);
628 if(U_FAILURE(*pErrorCode
)) {
632 /* read an extension table if there is one */
633 while(T_FileStream_readLine(convFile
, line
, sizeof(line
))) {
634 end
=uprv_strchr(line
, 0);
636 (*(end
-1)=='\n' || *(end
-1)=='\r' || *(end
-1)==' ' || *(end
-1)=='\t')) {
641 if(line
[0]=='#' || u_skipWhitespace(line
)==end
) {
642 continue; /* ignore empty and comment lines */
645 if(0==uprv_strcmp(line
, "CHARMAP")) {
646 /* read the extension table */
647 ucm_readTable(data
->ucm
, convFile
, FALSE
, baseStates
, pErrorCode
);
649 fprintf(stderr
, "unexpected text after the base mapping table\n");
654 T_FileStream_close(convFile
);
656 if(data
->ucm
->base
->flagsType
==UCM_FLAGS_MIXED
|| data
->ucm
->ext
->flagsType
==UCM_FLAGS_MIXED
) {
657 fprintf(stderr
, "error: some entries have the mapping precision (with '|'), some do not\n");
658 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
665 createConverter(ConvData
*data
, const char *converterName
, UErrorCode
*pErrorCode
) {
669 UConverterStaticData
*staticData
;
670 UCMStates
*states
, *baseStates
;
672 if(U_FAILURE(*pErrorCode
)) {
678 dataIsBase
=readFile(data
, converterName
, pErrorCode
);
679 if(U_FAILURE(*pErrorCode
)) {
683 staticData
=&data
->staticData
;
684 states
=&data
->ucm
->states
;
687 data
->cnvData
=MBCSOpen(data
->ucm
);
688 if(data
->cnvData
==NULL
) {
689 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
691 } else if(!data
->cnvData
->isValid(data
->cnvData
,
692 staticData
->subChar
, staticData
->subCharLen
)
694 fprintf(stderr
, " the substitution character byte sequence is illegal in this codepage structure!\n");
695 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
697 } else if(staticData
->subChar1
!=0 &&
698 !data
->cnvData
->isValid(data
->cnvData
, &staticData
->subChar1
, 1)
700 fprintf(stderr
, " the subchar1 byte is illegal in this codepage structure!\n");
701 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
703 } else if(data
->ucm
->ext
->mappingsLength
>0) {
704 /* prepare the extension table, if there is one */
705 data
->extData
=CnvExtOpen(data
->ucm
);
706 if(data
->extData
==NULL
) {
707 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
710 !ucm_checkBaseExt(states
, data
->ucm
->base
, data
->ucm
->ext
, data
->ucm
->ext
, FALSE
) ||
711 !data
->extData
->addTable(data
->extData
, data
->ucm
->ext
, &data
->staticData
)
713 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
717 /* add the base table after ucm_checkBaseExt()! */
718 if( U_SUCCESS(*pErrorCode
) &&
719 !data
->cnvData
->addTable(data
->cnvData
, data
->ucm
->base
, &data
->staticData
)
721 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
724 char baseFilename
[500];
727 initConvData(&baseData
);
729 /* assemble a path/filename for data->ucm->baseName */
730 uprv_strcpy(baseFilename
, converterName
);
731 basename
=(char *)findBasename(baseFilename
);
732 uprv_strcpy(basename
, data
->ucm
->baseName
);
733 uprv_strcat(basename
, ".ucm");
735 /* read the base table */
736 dataIsBase
=readFile(&baseData
, baseFilename
, pErrorCode
);
737 if(U_FAILURE(*pErrorCode
)) {
739 } else if(!dataIsBase
) {
740 fprintf(stderr
, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename
);
741 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
743 /* prepare the extension table */
744 data
->extData
=CnvExtOpen(data
->ucm
);
745 if(data
->extData
==NULL
) {
746 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
749 /* fill in gaps in extension file header fields */
750 UCMapping
*m
, *mLimit
;
751 uint8_t fallbackFlags
;
753 baseStates
=&baseData
.ucm
->states
;
754 if(states
->conversionType
==UCNV_DBCS
) {
755 staticData
->minBytesPerChar
=(int8_t)(states
->minCharLength
=2);
756 } else if(states
->minCharLength
==0) {
757 staticData
->minBytesPerChar
=(int8_t)(states
->minCharLength
=baseStates
->minCharLength
);
759 if(states
->maxCharLength
<states
->minCharLength
) {
760 staticData
->maxBytesPerChar
=(int8_t)(states
->maxCharLength
=baseStates
->maxCharLength
);
763 if(staticData
->subCharLen
==0) {
764 uprv_memcpy(staticData
->subChar
, baseData
.staticData
.subChar
, 4);
765 staticData
->subCharLen
=baseData
.staticData
.subCharLen
;
768 * do not copy subChar1 -
769 * only use what is explicitly specified
770 * because it cannot be unset in the extension file header
773 /* get the fallback flags */
775 for(m
=baseData
.ucm
->base
->mappings
, mLimit
=m
+baseData
.ucm
->base
->mappingsLength
;
776 m
<mLimit
&& fallbackFlags
!=3;
785 for(m
=data
->ucm
->base
->mappings
, mLimit
=m
+data
->ucm
->base
->mappingsLength
;
786 m
<mLimit
&& fallbackFlags
!=3;
796 if(fallbackFlags
&1) {
797 staticData
->hasFromUnicodeFallback
=TRUE
;
799 if(fallbackFlags
&2) {
800 staticData
->hasToUnicodeFallback
=TRUE
;
803 if(1!=ucm_countChars(baseStates
, staticData
->subChar
, staticData
->subCharLen
)) {
804 fprintf(stderr
, " the substitution character byte sequence is illegal in this codepage structure!\n");
805 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
807 } else if(1!=ucm_countChars(baseStates
, &staticData
->subChar1
, 1)) {
808 fprintf(stderr
, " the subchar1 byte is illegal in this codepage structure!\n");
809 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
812 !ucm_checkValidity(data
->ucm
->ext
, baseStates
) ||
813 !ucm_checkBaseExt(baseStates
, baseData
.ucm
->base
, data
->ucm
->ext
, data
->ucm
->ext
, FALSE
) ||
814 !data
->extData
->addTable(data
->extData
, data
->ucm
->ext
, &data
->staticData
)
816 *pErrorCode
=U_INVALID_TABLE_FORMAT
;
821 cleanupConvData(&baseData
);
826 * Hey, Emacs, please set the following:
829 * indent-tabs-mode: nil