X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/374ca955a76ecab1204ca8bfa63ff9238d998416..08b89b0a244153b9f5bbb2f49c55ab0f7298122e:/icuSources/tools/makeconv/makeconv.c?ds=sidebyside diff --git a/icuSources/tools/makeconv/makeconv.c b/icuSources/tools/makeconv/makeconv.c index f3b64430..424c9622 100644 --- a/icuSources/tools/makeconv/makeconv.c +++ b/icuSources/tools/makeconv/makeconv.c @@ -1,7 +1,7 @@ /* ******************************************************************************** * - * Copyright (C) 1998-2004, International Business Machines + * Copyright (C) 1998-2012, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************** @@ -17,7 +17,6 @@ #include #include "unicode/putil.h" -#include "ucnv_io.h" #include "unicode/ucnv_err.h" #include "ucnv_bld.h" #include "ucnv_imp.h" @@ -35,6 +34,8 @@ #include "makeconv.h" #include "genmbcs.h" +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) + #define DEBUG 0 typedef struct ConvData { @@ -77,7 +78,8 @@ extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPP * Global - verbosity */ UBool VERBOSE = FALSE; -UBool TOUCHFILE = FALSE; +UBool SMALL = FALSE; +UBool IGNORE_SISO_CHECK = FALSE; static void createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode); @@ -138,7 +140,7 @@ writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErr if(VERBOSE) { - fprintf(stderr, "- Opened udata %s.%s\n", cnvName, "cnv"); + printf("- Opened udata %s.%s\n", cnvName, "cnv"); } @@ -161,19 +163,31 @@ writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErr } if(VERBOSE) { - fprintf(stderr, "- Wrote %u bytes to the udata.\n", (int)sz2); + printf("- Wrote %u bytes to the udata.\n", (int)sz2); } } +enum { + OPT_HELP_H, + OPT_HELP_QUESTION_MARK, + OPT_COPYRIGHT, + OPT_VERSION, + OPT_DESTDIR, + OPT_VERBOSE, + OPT_SMALL, + OPT_IGNORE_SISO_CHECK, + OPT_COUNT +}; + static UOption options[]={ - UOPTION_HELP_H, /* 0 Numbers for those who*/ - UOPTION_HELP_QUESTION_MARK, /* 1 can't count. */ - UOPTION_COPYRIGHT, /* 2 */ - UOPTION_VERSION, /* 3 */ - UOPTION_DESTDIR, /* 4 */ - UOPTION_VERBOSE, /* 5 */ - UOPTION_PACKAGE_NAME, /* 6 */ - UOPTION_DEF( "touchfile", 't', UOPT_NO_ARG) /* 7 */ + UOPTION_HELP_H, + UOPTION_HELP_QUESTION_MARK, + UOPTION_COPYRIGHT, + UOPTION_VERSION, + UOPTION_DESTDIR, + UOPTION_VERBOSE, + { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }, + { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 } }; int main(int argc, char* argv[]) @@ -181,9 +195,7 @@ int main(int argc, char* argv[]) ConvData data; UErrorCode err = U_ZERO_ERROR, localError; char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; - char touchFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; const char* destdir, *arg; - const char *pkgName = NULL; size_t destdirlen; char* dot = NULL, *outBasename; char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; @@ -200,8 +212,8 @@ int main(int argc, char* argv[]) uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo)); /* preset then read command line options */ - options[4].value=u_getDataDirectory(); - argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); + options[OPT_DESTDIR].value=u_getDataDirectory(); + argc=u_parseArgs(argc, argv, LENGTHOF(options), options); /* error handling, printing usage message */ if(argc<0) { @@ -211,8 +223,9 @@ int main(int argc, char* argv[]) } else if(argc<2) { argc=-1; } - if(argc<0 || options[0].doesOccur || options[1].doesOccur) { - fprintf(stderr, + if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) { + FILE *stdfile=argc<0 ? stderr : stdout; + fprintf(stdfile, "usage: %s [-options] files...\n" "\tread .ucm codepage mapping files and write .cnv files\n" "options:\n" @@ -222,53 +235,32 @@ int main(int argc, char* argv[]) "\t-d or --destdir destination directory, followed by the path\n" "\t-v or --verbose Turn on verbose output\n", argv[0]); - fprintf(stderr, - "\t-p or --pkgname sets the 'package' name for output files.\n" - "\t If name is ICUDATA, then the default icu package\n" - "\t name will be used.\n" - "\t-t or --touchfile Generate additional small file without packagename, for nmake\n"); + fprintf(stdfile, + "\t --small Generate smaller .cnv files. They will be\n" + "\t significantly smaller but may not be compatible with\n" + "\t older versions of ICU and will require heap memory\n" + "\t allocation when loaded.\n" + "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n"); return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; } - if(options[3].doesOccur) { - fprintf(stderr,"makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n", - dataInfo.formatVersion[0], dataInfo.formatVersion[1]); - fprintf(stderr, "Copyright (C) 1998-2000, International Business Machines\n"); - fprintf(stderr,"Corporation and others. All Rights Reserved.\n"); + if(options[OPT_VERSION].doesOccur) { + printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n", + dataInfo.formatVersion[0], dataInfo.formatVersion[1]); + printf("%s\n", U_COPYRIGHT_STRING); exit(0); } - TOUCHFILE = options[7].doesOccur; - - if(!options[6].doesOccur) - { - pkgName=NULL; - } - else - { - pkgName =options[6].value; - if(!strcmp(pkgName, "ICUDATA")) - { - pkgName = U_ICUDATA_NAME; - } - if(pkgName[0] == 0) - { - pkgName = NULL; + /* get the options values */ + haveCopyright = options[OPT_COPYRIGHT].doesOccur; + destdir = options[OPT_DESTDIR].value; + VERBOSE = options[OPT_VERBOSE].doesOccur; + SMALL = options[OPT_SMALL].doesOccur; - if(TOUCHFILE) - { - fprintf(stderr, "%s: Don't use touchfile option with an empty packagename.\n", - argv[0]); - exit(1); - } - } + if (options[OPT_IGNORE_SISO_CHECK].doesOccur) { + IGNORE_SISO_CHECK = TRUE; } - /* get the options values */ - haveCopyright = options[2].doesOccur; - destdir = options[4].value; - VERBOSE = options[5].doesOccur; - if (destdir != NULL && *destdir != 0) { uprv_strcpy(outFileName, destdir); destdirlen = uprv_strlen(destdir); @@ -300,6 +292,13 @@ int main(int argc, char* argv[]) { arg = getLongPathname(*argv); + /* Check for potential buffer overflow */ + if(strlen(arg) >= UCNV_MAX_FULL_FILE_NAME_LENGTH) + { + fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR)); + return U_BUFFER_OVERFLOW_ERROR; + } + /*produces the right destination path for display*/ if (destdirlen != 0) { @@ -324,21 +323,6 @@ int main(int argc, char* argv[]) /* the basename without extension is the converter name */ uprv_strcpy(cnvName, outBasename); - if(TOUCHFILE) - { - uprv_strcpy(touchFileName, outBasename); - uprv_strcat(touchFileName, ".cnv"); - } - - if(pkgName != NULL) - { - /* changes both basename and filename */ - uprv_strcpy(outBasename, pkgName); - uprv_strcat(outBasename, "_"); - uprv_strcat(outBasename, cnvName); - } - - /*Adds the target extension*/ uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION); @@ -361,12 +345,28 @@ int main(int argc, char* argv[]) } else { - /* Make the static data name equal to the file name */ - if( /*VERBOSE && */ uprv_stricmp(cnvName,data.staticData.name)) + /* Insure the static data name matches the file name */ + /* Changed to ignore directory and only compare base name + LDH 1/2/08*/ + char *p; + p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */ + + if(p == NULL) /* OK, try alternate */ + { + p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR); + if(p == NULL) + { + p=cnvName; /* If no separators, no problem */ + } + } + else + { + p++; /* If found separtor, don't include it in compare */ + } + if(uprv_stricmp(p,data.staticData.name)) { fprintf(stderr, "Warning: %s%s claims to be '%s'\n", - cnvName, - CONVERTER_FILE_EXTENSION, + cnvName, CONVERTER_FILE_EXTENSION, data.staticData.name); } @@ -382,40 +382,10 @@ int main(int argc, char* argv[]) } } - if(pkgName == NULL) - { - uprv_strcpy(cnvNameWithPkg, cnvName); - } - else - { - uprv_strcpy(cnvNameWithPkg, pkgName); - uprv_strcat(cnvNameWithPkg, "_"); - uprv_strcat(cnvNameWithPkg, cnvName); - } + uprv_strcpy(cnvNameWithPkg, cnvName); localError = U_ZERO_ERROR; writeConverterData(&data, cnvNameWithPkg, destdir, &localError); - if(TOUCHFILE) - { - FileStream *q; - char msg[1024]; - - sprintf(msg, "This empty file tells nmake that %s in package %s has been updated.\n", - cnvName, pkgName); - - q = T_FileStream_open(touchFileName, "w"); - if(q == NULL) - { - fprintf(stderr, "Error writing touchfile \"%s\"\n", touchFileName); - localError = U_FILE_ACCESS_ERROR; - } - - else - { - T_FileStream_write(q, msg, (int32_t)uprv_strlen(msg)); - T_FileStream_close(q); - } - } if(U_FAILURE(localError)) { @@ -428,7 +398,7 @@ int main(int argc, char* argv[]) } else if (printFilename) { - puts(outFileName); + puts(outBasename); } } fflush(stdout); @@ -463,7 +433,7 @@ readHeader(ConvData *data, FileStream* convFile, const char* converterName, UErrorCode *pErrorCode) { - char line[200]; + char line[1024]; char *s, *key, *value; const UConverterStaticData *prototype; UConverterStaticData *staticData; @@ -590,7 +560,7 @@ readHeader(ConvData *data, static UBool readFile(ConvData *data, const char* converterName, UErrorCode *pErrorCode) { - char line[200]; + char line[1024]; char *end; FileStream *convFile; @@ -617,7 +587,7 @@ readFile(ConvData *data, const char* converterName, if(data->ucm->baseName[0]==0) { dataIsBase=TRUE; baseStates=&data->ucm->states; - ucm_processStates(baseStates); + ucm_processStates(baseStates, IGNORE_SISO_CHECK); } else { dataIsBase=FALSE; baseStates=NULL; @@ -684,6 +654,10 @@ createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCod states=&data->ucm->states; if(dataIsBase) { + /* + * Build a normal .cnv file with a base table + * and an optional extension table. + */ data->cnvData=MBCSOpen(data->ucm); if(data->cnvData==NULL) { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; @@ -700,27 +674,50 @@ createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCod fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n"); *pErrorCode=U_INVALID_TABLE_FORMAT; - } else if(data->ucm->ext->mappingsLength>0) { - /* prepare the extension table, if there is one */ - data->extData=CnvExtOpen(data->ucm); - if(data->extData==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + } else if( + data->ucm->ext->mappingsLength>0 && + !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE) + ) { + *pErrorCode=U_INVALID_TABLE_FORMAT; + } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) { + /* sort the table so that it can be turned into UTF-8-friendly data */ + ucm_sortTable(data->ucm->base); + } - } else if( - !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE) || - !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData) + if(U_SUCCESS(*pErrorCode)) { + if( + /* add the base table after ucm_checkBaseExt()! */ + !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData) ) { *pErrorCode=U_INVALID_TABLE_FORMAT; + } else { + /* + * addTable() may have requested moving more mappings to the extension table + * if they fit into the base toUnicode table but not into the + * base fromUnicode table. + * (Especially for UTF-8-friendly fromUnicode tables.) + * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them + * to be excluded from the extension toUnicode data. + * See MBCSOkForBaseFromUnicode() for which mappings do not fit into + * the base fromUnicode table. + */ + ucm_moveMappings(data->ucm->base, data->ucm->ext); + ucm_sortTable(data->ucm->ext); + if(data->ucm->ext->mappingsLength>0) { + /* prepare the extension table, if there is one */ + data->extData=CnvExtOpen(data->ucm); + if(data->extData==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + } else if( + !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData) + ) { + *pErrorCode=U_INVALID_TABLE_FORMAT; + } + } } } - - /* add the base table after ucm_checkBaseExt()! */ - if( U_SUCCESS(*pErrorCode) && - !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData) - ) { - *pErrorCode=U_INVALID_TABLE_FORMAT; - } } else { + /* Build an extension-only .cnv file. */ char baseFilename[500]; char *basename; @@ -744,7 +741,6 @@ createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCod data->extData=CnvExtOpen(data->ucm); if(data->extData==NULL) { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - } else { /* fill in gaps in extension file header fields */ UCMapping *m, *mLimit; @@ -782,16 +778,6 @@ createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCod fallbackFlags|=2; } } - for(m=data->ucm->base->mappings, mLimit=m+data->ucm->base->mappingsLength; - mf==1) { - fallbackFlags|=1; - } else if(m->f==3) { - fallbackFlags|=2; - } - } if(fallbackFlags&1) { staticData->hasFromUnicodeFallback=TRUE; @@ -804,16 +790,56 @@ createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCod fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n"); *pErrorCode=U_INVALID_TABLE_FORMAT; - } else if(1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) { + } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) { fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n"); *pErrorCode=U_INVALID_TABLE_FORMAT; } else if( !ucm_checkValidity(data->ucm->ext, baseStates) || - !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE) || - !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData) + !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE) ) { *pErrorCode=U_INVALID_TABLE_FORMAT; + } else { + if(states->maxCharLength>1) { + /* + * When building a normal .cnv file with a base table + * for an MBCS (not SBCS) table with explicit precision flags, + * the MBCSAddTable() function marks some mappings for moving + * to the extension table. + * They fit into the base toUnicode table but not into the + * base fromUnicode table. + * (Note: We do have explicit precision flags because they are + * required for extension table generation, and + * ucm_checkBaseExt() verified it.) + * + * We do not call MBCSAddTable() here (we probably could) + * so we need to do the analysis before building the extension table. + * We assume that MBCSAddTable() will build a UTF-8-friendly table. + * Redundant mappings in the extension table are ok except they cost some size. + * + * Do this after ucm_checkBaseExt(). + */ + const MBCSData *mbcsData=MBCSGetDummy(); + int32_t needsMove=0; + for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength; + mb.bytes, m->bLen, m->u, m->f)) { + m->f|=MBCS_FROM_U_EXT_FLAG; + m->moveFlag=UCM_MOVE_TO_EXT; + ++needsMove; + } + } + + if(needsMove!=0) { + ucm_moveMappings(baseData.ucm->base, data->ucm->ext); + ucm_sortTable(data->ucm->ext); + } + } + if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) { + *pErrorCode=U_INVALID_TABLE_FORMAT; + } } } }