/*
********************************************************************************
*
- * Copyright (C) 1998-2003, International Business Machines
+ * Copyright (C) 1998-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
********************************************************************************
#include <stdio.h>
#include "unicode/putil.h"
-#include "ucnv_io.h"
#include "unicode/ucnv_err.h"
#include "ucnv_bld.h"
#include "ucnv_imp.h"
#include "ucnv_cnv.h"
#include "cstring.h"
#include "cmemory.h"
+#include "uinvchar.h"
#include "filestrm.h"
#include "toolutil.h"
#include "uoptions.h"
#include "unicode/udata.h"
#include "unewdata.h"
-#include "ucmpwrit.h"
+#include "uparse.h"
+#include "ucm.h"
#include "makeconv.h"
#include "genmbcs.h"
-#define DEBUG 0
-
-/*
- * from ucnvstat.c - static prototypes of data-based converters
- */
-extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
-
-/*
- * Global - verbosity
- */
-UBool VERBOSE = FALSE;
-UBool TOUCHFILE = FALSE;
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
-/*Reads the header of the table file and fills in basic knowledge about the converter
- *in "converter"
- */
-static void readHeaderFromFile(UConverterSharedData* myConverter, FileStream* convFile, const char* converterName, UErrorCode* err);
-
-/*Reads the rest of the file, and fills up the shared objects if necessary
-Returns the UConverterTable. */
-static void loadTableFromFile(FileStream* convFile, UConverterSharedData* sharedData, UErrorCode* err);
-
-/* creates a UConverterSharedData from a mapping file.
- * Fills in: *staticData, *table. Converter is NOT otherwise useful.
- */
-static UConverterSharedData* createConverterFromTableFile(const char* realName, UErrorCode* err);
-
-/*
- * Set up the UNewData and write the converter..
- */
-void writeConverterData(UConverterSharedData *mySharedData, const char *cnvName, const char *cnvDir, UErrorCode *status);
+#define DEBUG 0
-static const char NLTC_SEPARATORS[9] = { '\r', '\n', '\t', ' ', '<', '>' ,'"' , 'U', '\0' };
-static const char FALLBACK_SEPARATOR = '|';
-static const char CODEPOINT_SEPARATORS[8] = { '\r', '>', '\\', 'x', '\n', ' ', '\t', '\0' };
-static const char UNICODE_CODEPOINT_SEPARATORS[6] = { '<', '>', 'U', ' ', '\t', '\0' };
+typedef struct ConvData {
+ UCMFile *ucm;
+ NewConverter *cnvData, *extData;
+ UConverterSharedData sharedData;
+ UConverterStaticData staticData;
+} ConvData;
-static const char *
-skipWhitespace(const char *s) {
- while(*s==' ' || *s=='\t') {
- ++s;
- }
- return s;
+static void
+initConvData(ConvData *data) {
+ uprv_memset(data, 0, sizeof(ConvData));
+ data->sharedData.structSize=sizeof(UConverterSharedData);
+ data->staticData.structSize=sizeof(UConverterStaticData);
+ data->sharedData.staticData=&data->staticData;
}
-static int32_t
-parseCodepageBytes(const char *s, uint32_t *pBytes, const char **pEnd) {
- char *end;
- int32_t length=0;
- uint32_t bytes=0, value;
-
- while(s[0]=='\\' && s[1]=='x') {
- if(length==4) {
- return -1;
+static void
+cleanupConvData(ConvData *data) {
+ if(data!=NULL) {
+ if(data->cnvData!=NULL) {
+ data->cnvData->close(data->cnvData);
+ data->cnvData=NULL;
}
- value=uprv_strtoul(s+2, &end, 16);
- s+=4;
- if(end!=s) {
- return -1;
+ if(data->extData!=NULL) {
+ data->extData->close(data->extData);
+ data->extData=NULL;
}
- bytes=(bytes<<8)|value;
- ++length;
- }
- if(length==0) {
- return -1;
+ ucm_close(data->ucm);
+ data->ucm=NULL;
}
- if(pEnd!=NULL) {
- *pEnd=s;
- }
- *pBytes=bytes;
- return length;
-}
-
-/* Remove all characters followed by '#'. There is an exception if there
- * is a fallback sign '|' after the comment and the comment does not
- * start in column 0. In this case, we just blank from '#' to just
- * before the '|' in order to support the fact that IBM official .ucm
- * files have the fallback information in comments!
- */
-static char *
- removeComments (char *line)
-{
- char *pound;
-
- line = (char*)skipWhitespace(line);
- pound = uprv_strchr (line, '#');
- if (pound != NULL)
- {
- char *fallback = pound == line ? 0 : uprv_strchr(pound + 1, '|');
- if (fallback != NULL)
- {
- uprv_memset(pound, ' ', fallback-pound);
- }
- else
- {
- *pound = '\0';
- }
- }
- return line;
}
-/* Returns true in c is a in set 'setOfChars', false otherwise
+/*
+ * from ucnvstat.c - static prototypes of data-based converters
*/
-static UBool
- isInSet (char c, const char *setOfChars)
-{
- uint8_t i = 0;
-
- while (setOfChars[i] != '\0')
- {
- if (c == setOfChars[i++])
- return TRUE;
- }
-
- return FALSE;
-}
+extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
-/* Returns pointer to the next non-whitespace (or non-separator)
+/*
+ * Global - verbosity
*/
-static int32_t
- nextTokenOffset (const char *line, const char *separators)
-{
- int32_t i = 0;
-
- while (line[i] && isInSet(line[i], separators))
- i++;
+UBool VERBOSE = FALSE;
+UBool SMALL = FALSE;
+UBool IGNORE_SISO_CHECK = FALSE;
- return i;
-}
+static void
+createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
-/* Returns pointer to the next token based on the set of separators
+/*
+ * Set up the UNewData and write the converter..
*/
-static char *
- getToken (char *token, char *line, const char *separators)
-{
- int32_t i = nextTokenOffset (line, separators);
- int8_t j = 0;
-
- while (line[i] && (!isInSet(line[i], separators)))
- token[j++] = line[i++];
- token[j] = '\0';
-
- return line + i;
-}
+static void
+writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
UBool haveCopyright=TRUE;
{0, 0, 0, 0} /* dataVersion (calculated at runtime) */
};
-void writeConverterData(UConverterSharedData *mySharedData,
- const char *cnvName,
- const char *cnvDir,
- UErrorCode *status)
+static void
+writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
{
UNewDataMemory *mem = NULL;
uint32_t sz2;
uint32_t size = 0;
+ int32_t tableType;
if(U_FAILURE(*status))
{
return;
}
+ tableType=TABLE_NONE;
+ if(data->cnvData!=NULL) {
+ tableType|=TABLE_BASE;
+ }
+ if(data->extData!=NULL) {
+ tableType|=TABLE_EXT;
+ }
+
mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
if(U_FAILURE(*status))
if(VERBOSE)
{
- fprintf(stderr, "- Opened udata %s.%s\n", cnvName, "cnv");
+ printf("- Opened udata %s.%s\n", cnvName, "cnv");
}
+
/* all read only, clean, platform independent data. Mmmm. :) */
- udata_writeBlock(mem, mySharedData->staticData, sizeof(UConverterStaticData));
+ udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */
/* Now, write the table */
- size += ((NewConverter *)mySharedData->table)->write((NewConverter *)mySharedData->table, mySharedData->staticData, mem);
+ if(tableType&TABLE_BASE) {
+ size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
+ }
+ if(tableType&TABLE_EXT) {
+ size += data->extData->write(data->extData, &data->staticData, mem, tableType);
+ }
sz2 = udata_finish(mem, status);
if(size != sz2)
{
- fprintf(stderr, "error: wrote %d bytes to the .cnv file but counted %d bytes\n", sz2, size);
+ fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
*status=U_INTERNAL_PROGRAM_ERROR;
}
if(VERBOSE)
{
- fprintf(stderr, "- Wrote %d bytes to the udata.\n", sz2);
+ printf("- Wrote %u bytes to the udata.\n", (int)sz2);
}
}
+enum {
+ OPT_HELP_H,
+ OPT_HELP_QUESTION_MARK,
+ OPT_COPYRIGHT,
+ OPT_VERSION,
+ OPT_DESTDIR,
+ OPT_VERBOSE,
+ OPT_SMALL,
+ OPT_IGNORE_SISO_CHECK,
+ OPT_COUNT
+};
+
static UOption options[]={
- UOPTION_HELP_H, /* 0 Numbers for those who*/
- UOPTION_HELP_QUESTION_MARK, /* 1 can't count. */
- UOPTION_COPYRIGHT, /* 2 */
- UOPTION_VERSION, /* 3 */
- UOPTION_DESTDIR, /* 4 */
- UOPTION_VERBOSE, /* 5 */
- UOPTION_PACKAGE_NAME, /* 6 */
- UOPTION_DEF( "touchfile", 't', UOPT_NO_ARG) /* 7 */
+ UOPTION_HELP_H,
+ UOPTION_HELP_QUESTION_MARK,
+ UOPTION_COPYRIGHT,
+ UOPTION_VERSION,
+ UOPTION_DESTDIR,
+ UOPTION_VERBOSE,
+ { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
+ { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }
};
int main(int argc, char* argv[])
{
- UConverterSharedData* mySharedData = NULL;
- UErrorCode err = U_ZERO_ERROR;
+ ConvData data;
+ UErrorCode err = U_ZERO_ERROR, localError;
char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
- char touchFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
const char* destdir, *arg;
- const char *pkgName = NULL;
size_t destdirlen;
char* dot = NULL, *outBasename;
char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH];
UVersionInfo icuVersion;
+ UBool printFilename;
+
+ err = U_ZERO_ERROR;
U_MAIN_INIT_ARGS(argc, argv);
uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
/* preset then read command line options */
- options[4].value=u_getDataDirectory();
- argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
+ options[OPT_DESTDIR].value=u_getDataDirectory();
+ argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
/* error handling, printing usage message */
if(argc<0) {
} else if(argc<2) {
argc=-1;
}
- if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
- fprintf(stderr,
+ if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
+ FILE *stdfile=argc<0 ? stderr : stdout;
+ fprintf(stdfile,
"usage: %s [-options] files...\n"
"\tread .ucm codepage mapping files and write .cnv files\n"
"options:\n"
"\t-d or --destdir destination directory, followed by the path\n"
"\t-v or --verbose Turn on verbose output\n",
argv[0]);
- fprintf(stderr,
- "\t-p or --pkgname sets the 'package' name for output files.\n"
- "\t If name is ICUDATA, then the default icu package\n"
- "\t name will be used.\n"
- "\t-t or --touchfile Generate additional small file without packagename, for nmake\n");
+ fprintf(stdfile,
+ "\t --small Generate smaller .cnv files. They will be\n"
+ "\t significantly smaller but may not be compatible with\n"
+ "\t older versions of ICU and will require heap memory\n"
+ "\t allocation when loaded.\n"
+ "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n");
return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
}
- if(options[3].doesOccur) {
- fprintf(stderr,"makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
- dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
- fprintf(stderr, "Copyright (C) 1998-2000, International Business Machines\n");
- fprintf(stderr,"Corporation and others. All Rights Reserved.\n");
+ if(options[OPT_VERSION].doesOccur) {
+ printf("makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
+ dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
+ printf("%s\n", U_COPYRIGHT_STRING);
exit(0);
}
- TOUCHFILE = options[7].doesOccur;
+ /* get the options values */
+ haveCopyright = options[OPT_COPYRIGHT].doesOccur;
+ destdir = options[OPT_DESTDIR].value;
+ VERBOSE = options[OPT_VERBOSE].doesOccur;
+ SMALL = options[OPT_SMALL].doesOccur;
- if(!options[6].doesOccur)
- {
- fprintf(stderr, "%s : option -p (package name) is required.\n",
- argv[0]);
- exit(1);
+ if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
+ IGNORE_SISO_CHECK = TRUE;
}
- else
- {
- pkgName =options[6].value;
- if(!strcmp(pkgName, "ICUDATA"))
- {
- pkgName = U_ICUDATA_NAME;
- }
- if(pkgName[0] == 0)
- {
- pkgName = NULL;
-
- if(TOUCHFILE)
- {
- fprintf(stderr, "%s: Don't use touchfile option with an empty packagename.\n",
- argv[0]);
- exit(1);
- }
- }
- }
-
- /* get the options values */
- haveCopyright = options[2].doesOccur;
- destdir = options[4].value;
- VERBOSE = options[5].doesOccur;
if (destdir != NULL && *destdir != 0) {
uprv_strcpy(outFileName, destdir);
}
#endif
- for (++argv; --argc; ++argv)
+ err = U_ZERO_ERROR;
+ printFilename = (UBool) (argc > 2 || VERBOSE);
+ for (++argv; --argc; ++argv)
{
- err = U_ZERO_ERROR;
- arg = getLongPathname(*argv);
+ arg = getLongPathname(*argv);
- /*produces the right destination path for display*/
- if (destdirlen != 0)
+ /* Check for potential buffer overflow */
+ if(strlen(arg) > UCNV_MAX_FULL_FILE_NAME_LENGTH)
{
- const char *basename;
+ fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR));
+ return U_BUFFER_OVERFLOW_ERROR;
+ }
- /* find the last file sepator */
- basename = uprv_strrchr(arg, U_FILE_SEP_CHAR);
- if (basename == NULL) {
- basename = arg;
- } else {
- ++basename;
- }
+ /*produces the right destination path for display*/
+ if (destdirlen != 0)
+ {
+ const char *basename;
- uprv_strcpy(outBasename, basename);
+ /* find the last file sepator */
+ basename = findBasename(arg);
+ uprv_strcpy(outBasename, basename);
}
- else
+ else
{
- uprv_strcpy(outFileName, arg);
+ uprv_strcpy(outFileName, arg);
}
- /*removes the extension if any is found*/
- dot = uprv_strrchr(outBasename, '.');
- if (dot)
+ /*removes the extension if any is found*/
+ dot = uprv_strrchr(outBasename, '.');
+ if (dot)
{
- *dot = '\0';
+ *dot = '\0';
}
- /* the basename without extension is the converter name */
- uprv_strcpy(cnvName, outBasename);
-
- if(TOUCHFILE)
- {
- uprv_strcpy(touchFileName, outBasename);
- uprv_strcat(touchFileName, ".cnv");
- }
-
- if(pkgName != NULL)
- {
- /* changes both baename and filename */
- uprv_strcpy(outBasename, pkgName);
- uprv_strcat(outBasename, "_");
- uprv_strcat(outBasename, cnvName);
- }
-
+ /* the basename without extension is the converter name */
+ uprv_strcpy(cnvName, outBasename);
- /*Adds the target extension*/
- uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION);
+ /*Adds the target extension*/
+ uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION);
#if DEBUG
printf("makeconv: processing %s ...\n", arg);
fflush(stdout);
#endif
- mySharedData = createConverterFromTableFile(arg, &err);
+ localError = U_ZERO_ERROR;
+ initConvData(&data);
+ createConverter(&data, arg, &localError);
- if (U_FAILURE(err) || (mySharedData == NULL))
+ if (U_FAILURE(localError))
{
- /* if an error is found, print out an error msg and keep going */
- fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (error code %d - %s)\n", outFileName, arg, err,
- u_errorName(err));
- err = U_ZERO_ERROR;
+ /* if an error is found, print out an error msg and keep going */
+ fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
+ u_errorName(localError));
+ if(U_SUCCESS(err)) {
+ err = localError;
+ }
}
- else
+ else
{
- /* Make the static data name equal to the file name */
- if( /*VERBOSE && */ uprv_stricmp(cnvName,mySharedData->staticData->name))
- {
- fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
- cnvName,
- CONVERTER_FILE_EXTENSION,
- mySharedData->staticData->name);
- }
-
- uprv_strcpy((char*)mySharedData->staticData->name, cnvName);
-
- if(pkgName == NULL)
- {
- uprv_strcpy(cnvNameWithPkg, cnvName);
- }
- else
- {
- uprv_strcpy(cnvNameWithPkg, pkgName);
- uprv_strcat(cnvNameWithPkg, "_");
- uprv_strcat(cnvNameWithPkg, cnvName);
- }
-
- writeConverterData(mySharedData, cnvNameWithPkg, destdir, &err);
- ((NewConverter *)mySharedData->table)->close((NewConverter *)mySharedData->table);
- if(TOUCHFILE)
- {
- FileStream *q;
- char msg[1024];
-
- sprintf(msg, "This empty file tells nmake that %s in package %s has been updated.\n",
- cnvName, pkgName);
-
- q = T_FileStream_open(touchFileName, "w");
- if(q == NULL)
- {
- fprintf(stderr, "Error writing touchfile \"%s\"\n", touchFileName);
- err = U_FILE_ACCESS_ERROR;
- }
-
- else
- {
- T_FileStream_write(q, msg, uprv_strlen(msg));
- T_FileStream_close(q);
- }
- }
-
- /* write the information data */
- uprv_free((UConverterStaticData *)mySharedData->staticData);
- uprv_free(mySharedData);
-
- if(U_FAILURE(err))
- {
- /* if an error is found, print out an error msg and keep going*/
- fprintf(stderr, "Error writing \"%s\" file for \"%s\" (error code %d - %s)\n", outFileName, arg, err,
- u_errorName(err));
- }
- else
- {
- puts(outFileName);
- }
+ /* Insure the static data name matches the file name */
+ /* Changed to ignore directory and only compare base name
+ LDH 1/2/08*/
+ char *p;
+ p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
+
+ if(p == NULL) /* OK, try alternate */
+ {
+ p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
+ if(p == NULL)
+ {
+ p=cnvName; /* If no separators, no problem */
+ }
+ }
+ else
+ {
+ p++; /* If found separtor, don't include it in compare */
+ }
+ if(uprv_stricmp(p,data.staticData.name))
+ {
+ fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
+ cnvName, CONVERTER_FILE_EXTENSION,
+ data.staticData.name);
+ }
+
+ uprv_strcpy((char*)data.staticData.name, cnvName);
+
+ if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
+ fprintf(stderr,
+ "Error: A converter name must contain only invariant characters.\n"
+ "%s is not a valid converter name.\n",
+ data.staticData.name);
+ if(U_SUCCESS(err)) {
+ err = U_INVALID_TABLE_FORMAT;
+ }
+ }
+
+ uprv_strcpy(cnvNameWithPkg, cnvName);
+
+ localError = U_ZERO_ERROR;
+ writeConverterData(&data, cnvNameWithPkg, destdir, &localError);
+
+ if(U_FAILURE(localError))
+ {
+ /* if an error is found, print out an error msg and keep going*/
+ fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
+ u_errorName(localError));
+ if(U_SUCCESS(err)) {
+ err = localError;
+ }
+ }
+ else if (printFilename)
+ {
+ puts(outBasename);
+ }
}
- fflush(stdout);
- fflush(stderr);
+ fflush(stdout);
+ fflush(stderr);
+
+ cleanupConvData(&data);
}
- return err;
+ return err;
}
static void
}
}
-/*Reads the header of the table file and fills in basic knowledge about the converter in "converter"*/
-void readHeaderFromFile(UConverterSharedData* mySharedData,
- FileStream* convFile,
- const char* converterName,
- UErrorCode *pErrorCode)
-{
- char line[200];
- char *s, *end, *key, *value;
+static void
+readHeader(ConvData *data,
+ FileStream* convFile,
+ const char* converterName,
+ UErrorCode *pErrorCode) {
+ char line[1024];
+ char *s, *key, *value;
+ const UConverterStaticData *prototype;
UConverterStaticData *staticData;
- char c;
if(U_FAILURE(*pErrorCode)) {
return;
}
- staticData=(UConverterStaticData *)mySharedData->staticData;
- staticData->conversionType=UCNV_UNSUPPORTED_CONVERTER;
+ staticData=&data->staticData;
staticData->platform=UCNV_IBM;
staticData->subCharLen=0;
while(T_FileStream_readLine(convFile, line, sizeof(line))) {
- /* remove comments and trailing CR and LF and remove whitespace from the end */
- for(end=line; (c=*end)!=0; ++end) {
- if(c=='#' || c=='\r' || c=='\n') {
- break;
- }
- }
- while(end>line && (*(end-1)==' ' || *(end-1)=='\t')) {
- --end;
- }
- *end=0;
-
- /* skip leading white space and ignore empty lines */
- s=(char *)skipWhitespace(line);
- if(*s==0) {
+ /* basic parsing and handling of state-related items */
+ if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
continue;
}
/* stop at the beginning of the mapping section */
- if(uprv_memcmp(s, "CHARMAP", 7)==0) {
+ if(uprv_strcmp(line, "CHARMAP")==0) {
break;
}
- /* get the key name, bracketed in <> */
- if(*s!='<') {
- fprintf(stderr, "error: no header field <key> in line \"%s\"\n", line);
- *pErrorCode=U_INVALID_TABLE_FORMAT;
- return;
- }
- key=++s;
- while(*s!='>') {
- if(*s==0) {
- fprintf(stderr, "error: incomplete header field <key> in line \"%s\"\n", line);
- *pErrorCode=U_INVALID_TABLE_FORMAT;
- return;
- }
- ++s;
- }
- *s=0;
-
- /* get the value string, possibly quoted */
- s=(char *)skipWhitespace(s+1);
- if(*s!='"') {
- value=s;
- } else {
- /* remove the quotes */
- value=s+1;
- if(end>value && *(end-1)=='"') {
- *--end=0;
- }
- }
-
/* collect the information from the header field, ignore unknown keys */
if(uprv_strcmp(key, "code_set_name")==0) {
if(*value!=0) {
- uprv_strcpy((char*)staticData->name, value);
+ uprv_strcpy((char *)staticData->name, value);
getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
}
- } else if(uprv_strcmp(key, "uconv_class")==0) {
- const UConverterStaticData *prototype;
-
- if(uprv_strcmp(value, "DBCS")==0) {
- staticData->conversionType=UCNV_DBCS;
- } else if(uprv_strcmp(value, "SBCS")==0) {
- staticData->conversionType = UCNV_SBCS;
- } else if(uprv_strcmp(value, "MBCS")==0) {
- staticData->conversionType = UCNV_MBCS;
- } else if(uprv_strcmp(value, "EBCDIC_STATEFUL")==0) {
- staticData->conversionType = UCNV_EBCDIC_STATEFUL;
- } else {
- fprintf(stderr, "error: unknown <uconv_class> %s\n", value);
- *pErrorCode=U_INVALID_TABLE_FORMAT;
- return;
- }
-
- /* Now that we know the type, copy any 'default' values from the table. */
- prototype=ucnv_converterStaticData[staticData->conversionType];
- if(prototype!=NULL) {
- if(staticData->name[0]==0) {
- uprv_strcpy((char*)staticData->name, prototype->name);
- }
-
- if(staticData->codepage==0) {
- staticData->codepage = prototype->codepage;
- }
-
- if(staticData->platform==0) {
- staticData->platform = prototype->platform;
- }
-
- if(staticData->minBytesPerChar==0) {
- staticData->minBytesPerChar = prototype->minBytesPerChar;
- }
-
- if(staticData->maxBytesPerChar==0) {
- staticData->maxBytesPerChar = prototype->maxBytesPerChar;
- }
-
- if(staticData->subCharLen==0) {
- staticData->subCharLen=prototype->subCharLen;
- if(prototype->subCharLen>0) {
- uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
- }
- }
- }
- } else if(uprv_strcmp(key, "mb_cur_max")==0) {
- if('1'<=*value && *value<='4' && value[1]==0) {
- staticData->maxBytesPerChar=(int8_t)(*value-'0');
- } else {
- fprintf(stderr, "error: illegal <mb_cur_max> %s\n", value);
- *pErrorCode=U_INVALID_TABLE_FORMAT;
- return;
- }
- } else if(uprv_strcmp(key, "mb_cur_min")==0) {
- if('1'<=*value && *value<='4' && value[1]==0) {
- staticData->minBytesPerChar=(int8_t)(*value-'0');
- } else {
- fprintf(stderr, "error: illegal <mb_cur_min> %s\n", value);
- *pErrorCode=U_INVALID_TABLE_FORMAT;
- return;
- }
} else if(uprv_strcmp(key, "subchar")==0) {
- uint32_t bytes;
- int32_t length;
-
- length=parseCodepageBytes(value, &bytes, (const char **)&end);
- if(length>0 && *end==0) {
- staticData->subCharLen=(int8_t)length;
- do {
- staticData->subChar[--length]=(uint8_t)bytes;
- bytes>>=8;
- } while(length>0);
+ uint8_t bytes[UCNV_EXT_MAX_BYTES];
+ int8_t length;
+
+ s=value;
+ length=ucm_parseBytes(bytes, line, (const char **)&s);
+ if(1<=length && length<=4 && *s==0) {
+ staticData->subCharLen=length;
+ uprv_memcpy(staticData->subChar, bytes, length);
} else {
fprintf(stderr, "error: illegal <subchar> %s\n", value);
*pErrorCode=U_INVALID_TABLE_FORMAT;
return;
}
} else if(uprv_strcmp(key, "subchar1")==0) {
- uint32_t bytes;
+ uint8_t bytes[UCNV_EXT_MAX_BYTES];
- if(1==parseCodepageBytes(value, &bytes, (const char **)&end) && *end==0) {
- staticData->subChar1=(uint8_t)bytes;
+ s=value;
+ if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
+ staticData->subChar1=bytes[0];
} else {
fprintf(stderr, "error: illegal <subchar1> %s\n", value);
*pErrorCode=U_INVALID_TABLE_FORMAT;
return;
}
- } else if(uprv_strcmp(key, "icu:state")==0) {
- /* if an SBCS/DBCS/EBCDIC_STATEFUL converter has icu:state, then turn it into MBCS */
- switch(staticData->conversionType) {
- case UCNV_SBCS:
- case UCNV_DBCS:
- case UCNV_EBCDIC_STATEFUL:
- staticData->conversionType = UCNV_MBCS;
- break;
- case UCNV_MBCS:
- break;
- default:
- fprintf(stderr, "error: <icu:state> entry for non-MBCS table or before the <uconv_class> line\n");
- *pErrorCode=U_INVALID_TABLE_FORMAT;
- return;
+ }
+ }
+
+ /* copy values from the UCMFile to the static data */
+ staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
+ staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
+ staticData->conversionType=data->ucm->states.conversionType;
+
+ if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
+ fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
+ *pErrorCode=U_INVALID_TABLE_FORMAT;
+ return;
+ }
+
+ /*
+ * Now that we know the type, copy any 'default' values from the table.
+ * We need not check the type any further because the parser only
+ * recognizes what we have prototypes for.
+ *
+ * For delta (extension-only) tables, copy values from the base file
+ * instead, see createConverter().
+ */
+ if(data->ucm->baseName[0]==0) {
+ prototype=ucnv_converterStaticData[staticData->conversionType];
+ if(prototype!=NULL) {
+ if(staticData->name[0]==0) {
+ uprv_strcpy((char *)staticData->name, prototype->name);
+ }
+
+ if(staticData->codepage==0) {
+ staticData->codepage=prototype->codepage;
+ }
+
+ if(staticData->platform==0) {
+ staticData->platform=prototype->platform;
+ }
+
+ if(staticData->minBytesPerChar==0) {
+ staticData->minBytesPerChar=prototype->minBytesPerChar;
}
if(staticData->maxBytesPerChar==0) {
- fprintf(stderr, "error: <icu:state> before the <mb_cur_max> line\n");
- *pErrorCode=U_INVALID_TABLE_FORMAT;
- return;
+ staticData->maxBytesPerChar=prototype->maxBytesPerChar;
}
- if(mySharedData->table==NULL) {
- mySharedData->table=(UConverterTable *)MBCSOpen(staticData->maxBytesPerChar);
- if(mySharedData->table==NULL) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
+
+ if(staticData->subCharLen==0) {
+ staticData->subCharLen=prototype->subCharLen;
+ if(prototype->subCharLen>0) {
+ uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
}
}
- if(!MBCSAddState((NewConverter *)mySharedData->table, value)) {
- *pErrorCode=U_INVALID_TABLE_FORMAT;
- return;
- }
}
}
- if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
- *pErrorCode=U_INVALID_TABLE_FORMAT;
- } else if(staticData->conversionType==UCNV_MBCS && mySharedData->table==NULL) {
- fprintf(stderr, "error: missing state table information (<icu:state>) for MBCS\n");
- *pErrorCode=U_INVALID_TABLE_FORMAT;
- } else if(staticData->subChar1!=0 &&
- !staticData->conversionType==UCNV_MBCS &&
- !staticData->conversionType==UCNV_EBCDIC_STATEFUL
+ if(data->ucm->states.outputType<0) {
+ data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
+ }
+
+ if( staticData->subChar1!=0 &&
+ (staticData->minBytesPerChar>1 ||
+ (staticData->conversionType!=UCNV_MBCS &&
+ staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
) {
fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
*pErrorCode=U_INVALID_TABLE_FORMAT;
}
}
-void loadTableFromFile(FileStream* convFile, UConverterSharedData* sharedData, UErrorCode* err)
-{
- char storageLine[200];
- char* line = NULL;
- UConverterStaticData *staticData=(UConverterStaticData *)sharedData->staticData;
- NewConverter *cnvData = (NewConverter *)sharedData->table;
- UChar32 unicodeValue, codepageValue;
- uint8_t mbcsBytes[8];
- int32_t mbcsLength;
- char codepointBytes[20];
- UBool isOK = TRUE;
- uint8_t precisionMask = 0, unicodeMask = 0;
- char endOfLine;
-
- if(cnvData->startMappings!=NULL)
- {
- if(!cnvData->startMappings(cnvData)) {
- *err = U_INVALID_TABLE_FORMAT;
- return;
- }
- }
-
- if(cnvData->isValid!=NULL)
- {
- const uint8_t *p = staticData->subChar;
- codepageValue = 0;
- switch(staticData->subCharLen) {
- case 4: codepageValue = (codepageValue << 8) | *p++;
- case 3: codepageValue = (codepageValue << 8) | *p++;
- case 2: codepageValue = (codepageValue << 8) | *p++;
- case 1: codepageValue = (codepageValue << 8) | *p;
- default: break; /* must never occur */
- }
- if(!cnvData->isValid(cnvData, staticData->subChar, staticData->subCharLen, codepageValue)) {
- fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
- *err = U_INVALID_TABLE_FORMAT;
- isOK = FALSE;
- }
- }
+/* return TRUE if a base table was read, FALSE for an extension table */
+static UBool
+readFile(ConvData *data, const char* converterName,
+ UErrorCode *pErrorCode) {
+ char line[1024];
+ char *end;
+ FileStream *convFile;
- staticData->hasFromUnicodeFallback = staticData->hasToUnicodeFallback = FALSE;
+ UCMStates *baseStates;
+ UBool dataIsBase;
- while (T_FileStream_readLine(convFile, storageLine, sizeof(storageLine)))
- {
- removeComments(storageLine);
- line = storageLine;
- if (line[nextTokenOffset(line, NLTC_SEPARATORS)] != '\0')
- {
- /* get the Unicode code point */
- line = getToken(codepointBytes, line, UNICODE_CODEPOINT_SEPARATORS);
- if (uprv_strcmp(codepointBytes, "END") == 0)
- {
- break;
- }
- unicodeValue = (UChar32)T_CString_stringToInteger(codepointBytes, 16);
+ if(U_FAILURE(*pErrorCode)) {
+ return FALSE;
+ }
- /* get the codepage bytes */
- codepageValue = 0;
- mbcsLength = 0;
- do
- {
- line = getToken(codepointBytes, line, CODEPOINT_SEPARATORS);
- mbcsBytes[mbcsLength] = (uint8_t)T_CString_stringToInteger(codepointBytes, 16);
- codepageValue = codepageValue << 8 | mbcsBytes[mbcsLength++];
-
- /* End of line could be \0 or | (if fallback) */
- endOfLine= line[nextTokenOffset(line, CODEPOINT_SEPARATORS)];
- } while((endOfLine != '\0') && (endOfLine != FALLBACK_SEPARATOR));
-
- if(unicodeValue>=0x10000) {
- unicodeMask|=UCNV_HAS_SUPPLEMENTARY; /* there are supplementary code points */
- } else if(UTF_IS_SURROGATE(unicodeValue)) {
- unicodeMask|=UCNV_HAS_SURROGATES; /* there are single surrogates */
- }
+ data->ucm=ucm_open();
- if((uint32_t)unicodeValue > 0x10ffff)
- {
- fprintf(stderr, "error: Unicode code point > U+10ffff in '%s'\n", storageLine);
- isOK = FALSE;
- }
- else if(endOfLine == FALLBACK_SEPARATOR)
- {
- /* we know that there is a fallback separator */
- precisionMask |= 1;
- line = uprv_strchr(line, FALLBACK_SEPARATOR) + 1;
- switch(*line)
- {
- case '0':
- /* set roundtrip mappings */
- isOK &= cnvData->addToUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, 0) &&
- cnvData->addFromUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, 0);
- break;
- case '1':
- /* set only a fallback mapping from Unicode to codepage */
- staticData->hasFromUnicodeFallback = TRUE;
- isOK &= cnvData->addFromUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, 1);
- break;
- case '2':
- /* skip subchar mappings */
- break;
- case '3':
- /* set only a fallback mapping from codepage to Unicode */
- staticData->hasToUnicodeFallback = TRUE;
- isOK &= cnvData->addToUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, 1);
- break;
- default:
- fprintf(stderr, "error: illegal fallback indicator '%s' in '%s'\n", line - 1, storageLine);
- *err = U_INVALID_TABLE_FORMAT;
- break;
- }
- }
- else
- {
- precisionMask |= 2;
- /* set the mappings */
- isOK &= cnvData->addToUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, -1) &&
- cnvData->addFromUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, -1);
- }
- }
+ convFile=T_FileStream_open(converterName, "r");
+ if(convFile==NULL) {
+ *pErrorCode=U_FILE_ACCESS_ERROR;
+ return FALSE;
}
- if(unicodeMask == 3)
- {
- fprintf(stderr, "warning: contains mappings to both supplementary code points and single surrogates\n");
+ readHeader(data, convFile, converterName, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return FALSE;
}
- staticData->unicodeMask = unicodeMask;
- if(cnvData->finishMappings!=NULL)
- {
- cnvData->finishMappings(cnvData, staticData);
+ if(data->ucm->baseName[0]==0) {
+ dataIsBase=TRUE;
+ baseStates=&data->ucm->states;
+ ucm_processStates(baseStates, IGNORE_SISO_CHECK);
+ } else {
+ dataIsBase=FALSE;
+ baseStates=NULL;
}
- if(!isOK)
- {
- *err = U_INVALID_TABLE_FORMAT;
- }
- else if(precisionMask == 3)
- {
- fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
- *err = U_INVALID_TABLE_FORMAT;
+ /* read the base table */
+ ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return FALSE;
}
-}
-/*creates a UConverterStaticData, fills in necessary links to it the appropriate function pointers*/
-UConverterSharedData* createConverterFromTableFile(const char* converterName, UErrorCode* err)
-{
- FileStream* convFile = NULL;
- UConverterSharedData* mySharedData = NULL;
- UConverterStaticData* myStaticData = NULL;
+ /* read an extension table if there is one */
+ while(T_FileStream_readLine(convFile, line, sizeof(line))) {
+ end=uprv_strchr(line, 0);
+ while(line<end &&
+ (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
+ --end;
+ }
+ *end=0;
- if (U_FAILURE(*err)) return NULL;
+ if(line[0]=='#' || u_skipWhitespace(line)==end) {
+ continue; /* ignore empty and comment lines */
+ }
- convFile = T_FileStream_open(converterName, "r");
- if (convFile == NULL)
- {
- *err = U_FILE_ACCESS_ERROR;
- return NULL;
+ if(0==uprv_strcmp(line, "CHARMAP")) {
+ /* read the extension table */
+ ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
+ } else {
+ fprintf(stderr, "unexpected text after the base mapping table\n");
+ }
+ break;
}
+ T_FileStream_close(convFile);
- mySharedData = (UConverterSharedData*) uprv_malloc(sizeof(UConverterSharedData));
- if (mySharedData == NULL)
- {
- *err = U_MEMORY_ALLOCATION_ERROR;
- T_FileStream_close(convFile);
- return NULL;
+ if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
+ fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
+ *pErrorCode=U_INVALID_TABLE_FORMAT;
}
- uprv_memset(mySharedData, 0, sizeof(UConverterSharedData));
+ return dataIsBase;
+}
+
+static void
+createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
+ ConvData baseData;
+ UBool dataIsBase;
- mySharedData->structSize = sizeof(UConverterSharedData);
+ UConverterStaticData *staticData;
+ UCMStates *states, *baseStates;
- myStaticData = (UConverterStaticData*) uprv_malloc(sizeof(UConverterStaticData));
- if (myStaticData == NULL)
- {
- *err = U_MEMORY_ALLOCATION_ERROR;
- T_FileStream_close(convFile);
- return NULL;
+ if(U_FAILURE(*pErrorCode)) {
+ return;
}
- uprv_memset(myStaticData, 0, sizeof(UConverterStaticData));
- mySharedData->staticData = myStaticData;
- myStaticData->structSize = sizeof(UConverterStaticData);
- /* mySharedData->staticDataOwned = FALSE; */ /* not owned if in udata */
- mySharedData->sharedDataCached = FALSE;
- mySharedData->dataMemory = NULL; /* for init */
+ initConvData(data);
- readHeaderFromFile(mySharedData, convFile, converterName, err);
+ dataIsBase=readFile(data, converterName, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ }
+
+ staticData=&data->staticData;
+ states=&data->ucm->states;
+
+ if(dataIsBase) {
+ /*
+ * Build a normal .cnv file with a base table
+ * and an optional extension table.
+ */
+ data->cnvData=MBCSOpen(data->ucm);
+ if(data->cnvData==NULL) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+
+ } else if(!data->cnvData->isValid(data->cnvData,
+ staticData->subChar, staticData->subCharLen)
+ ) {
+ fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
+ *pErrorCode=U_INVALID_TABLE_FORMAT;
- if (U_FAILURE(*err)) return NULL;
+ } else if(staticData->subChar1!=0 &&
+ !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
+ ) {
+ fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
+ *pErrorCode=U_INVALID_TABLE_FORMAT;
- switch (myStaticData->conversionType)
- {
- case UCNV_SBCS:
- {
- /* SBCS: use MBCS data structure with a default state table */
- if(mySharedData->staticData->maxBytesPerChar!=1) {
- fprintf(stderr, "error: SBCS codepage with max bytes/char!=1\n");
- *err = U_INVALID_TABLE_FORMAT;
- break;
+ } else if(
+ data->ucm->ext->mappingsLength>0 &&
+ !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
+ ) {
+ *pErrorCode=U_INVALID_TABLE_FORMAT;
+ } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
+ /* sort the table so that it can be turned into UTF-8-friendly data */
+ ucm_sortTable(data->ucm->base);
}
- myStaticData->conversionType = UCNV_MBCS;
- if(mySharedData->table == NULL) {
- NewConverter *sharedDataTable = MBCSOpen(1);
- if(sharedDataTable != NULL) {
- if(!MBCSAddState(sharedDataTable, "0-ff")) {
- *err = U_INVALID_TABLE_FORMAT;
- sharedDataTable->close(sharedDataTable);
- } else {
- mySharedData->table = (UConverterTable *)sharedDataTable;
- }
+
+ if(U_SUCCESS(*pErrorCode)) {
+ if(
+ /* add the base table after ucm_checkBaseExt()! */
+ !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
+ ) {
+ *pErrorCode=U_INVALID_TABLE_FORMAT;
} else {
- *err = U_MEMORY_ALLOCATION_ERROR;
+ /*
+ * addTable() may have requested moving more mappings to the extension table
+ * if they fit into the base toUnicode table but not into the
+ * base fromUnicode table.
+ * (Especially for UTF-8-friendly fromUnicode tables.)
+ * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
+ * to be excluded from the extension toUnicode data.
+ * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
+ * the base fromUnicode table.
+ */
+ ucm_moveMappings(data->ucm->base, data->ucm->ext);
+ ucm_sortTable(data->ucm->ext);
+ if(data->ucm->ext->mappingsLength>0) {
+ /* prepare the extension table, if there is one */
+ data->extData=CnvExtOpen(data->ucm);
+ if(data->extData==NULL) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ } else if(
+ !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
+ ) {
+ *pErrorCode=U_INVALID_TABLE_FORMAT;
+ }
+ }
}
}
- break;
- }
- case UCNV_MBCS:
- {
- /* MBCSOpen() was called by readHeaderFromFile() */
- break;
- }
- case UCNV_EBCDIC_STATEFUL:
- {
- /* EBCDIC_STATEFUL: use MBCS data structure with a default state table */
- if(mySharedData->staticData->maxBytesPerChar!=2) {
- fprintf(stderr, "error: DBCS codepage with max bytes/char!=2\n");
- *err = U_INVALID_TABLE_FORMAT;
- break;
- }
- myStaticData->conversionType = UCNV_MBCS;
- if(mySharedData->table == NULL) {
- NewConverter *sharedDataTable = MBCSOpen(2);
- if(sharedDataTable != NULL) {
- if( !MBCSAddState(sharedDataTable, "0-ff, e:1.s, f:0.s") ||
- !MBCSAddState(sharedDataTable, "initial, 0-3f:4, e:1.s, f:0.s, 40:3, 41-fe:2, ff:4") ||
- !MBCSAddState(sharedDataTable, "0-40:1.i, 41-fe:1., ff:1.i") ||
- !MBCSAddState(sharedDataTable, "0-ff:1.i, 40:1.") ||
- !MBCSAddState(sharedDataTable, "0-ff:1.i")
+ } else {
+ /* Build an extension-only .cnv file. */
+ char baseFilename[500];
+ char *basename;
+
+ initConvData(&baseData);
+
+ /* assemble a path/filename for data->ucm->baseName */
+ uprv_strcpy(baseFilename, converterName);
+ basename=(char *)findBasename(baseFilename);
+ uprv_strcpy(basename, data->ucm->baseName);
+ uprv_strcat(basename, ".ucm");
+
+ /* read the base table */
+ dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return;
+ } else if(!dataIsBase) {
+ fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
+ *pErrorCode=U_INVALID_TABLE_FORMAT;
+ } else {
+ /* prepare the extension table */
+ data->extData=CnvExtOpen(data->ucm);
+ if(data->extData==NULL) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ /* fill in gaps in extension file header fields */
+ UCMapping *m, *mLimit;
+ uint8_t fallbackFlags;
+
+ baseStates=&baseData.ucm->states;
+ if(states->conversionType==UCNV_DBCS) {
+ staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
+ } else if(states->minCharLength==0) {
+ staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
+ }
+ if(states->maxCharLength<states->minCharLength) {
+ staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
+ }
+
+ if(staticData->subCharLen==0) {
+ uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
+ staticData->subCharLen=baseData.staticData.subCharLen;
+ }
+ /*
+ * do not copy subChar1 -
+ * only use what is explicitly specified
+ * because it cannot be unset in the extension file header
+ */
+
+ /* get the fallback flags */
+ fallbackFlags=0;
+ for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
+ m<mLimit && fallbackFlags!=3;
+ ++m
) {
- *err = U_INVALID_TABLE_FORMAT;
- sharedDataTable->close(sharedDataTable);
- } else {
- mySharedData->table = (UConverterTable *)sharedDataTable;
+ if(m->f==1) {
+ fallbackFlags|=1;
+ } else if(m->f==3) {
+ fallbackFlags|=2;
+ }
}
- } else {
- *err = U_MEMORY_ALLOCATION_ERROR;
- }
- }
- break;
- }
- case UCNV_DBCS:
- {
- /* DBCS: use MBCS data structure with a default state table */
- if(mySharedData->staticData->maxBytesPerChar!=2) {
- fprintf(stderr, "error: DBCS codepage with max bytes/char!=2\n");
- *err = U_INVALID_TABLE_FORMAT;
- break;
- }
- myStaticData->conversionType = UCNV_MBCS;
- if(mySharedData->table == NULL) {
- NewConverter *sharedDataTable = MBCSOpen(2);
- if(sharedDataTable != NULL) {
- if( !MBCSAddState(sharedDataTable, "0-3f:3, 40:2, 41-fe:1, ff:3") ||
- !MBCSAddState(sharedDataTable, "41-fe") ||
- !MBCSAddState(sharedDataTable, "40") ||
- !MBCSAddState(sharedDataTable, "")
+
+ if(fallbackFlags&1) {
+ staticData->hasFromUnicodeFallback=TRUE;
+ }
+ if(fallbackFlags&2) {
+ staticData->hasToUnicodeFallback=TRUE;
+ }
+
+ if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
+ fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
+ *pErrorCode=U_INVALID_TABLE_FORMAT;
+
+ } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
+ fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
+ *pErrorCode=U_INVALID_TABLE_FORMAT;
+
+ } else if(
+ !ucm_checkValidity(data->ucm->ext, baseStates) ||
+ !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
) {
- *err = U_INVALID_TABLE_FORMAT;
- sharedDataTable->close(sharedDataTable);
+ *pErrorCode=U_INVALID_TABLE_FORMAT;
} else {
- mySharedData->table = (UConverterTable *)sharedDataTable;
+ if(states->maxCharLength>1) {
+ /*
+ * When building a normal .cnv file with a base table
+ * for an MBCS (not SBCS) table with explicit precision flags,
+ * the MBCSAddTable() function marks some mappings for moving
+ * to the extension table.
+ * They fit into the base toUnicode table but not into the
+ * base fromUnicode table.
+ * (Note: We do have explicit precision flags because they are
+ * required for extension table generation, and
+ * ucm_checkBaseExt() verified it.)
+ *
+ * We do not call MBCSAddTable() here (we probably could)
+ * so we need to do the analysis before building the extension table.
+ * We assume that MBCSAddTable() will build a UTF-8-friendly table.
+ * Redundant mappings in the extension table are ok except they cost some size.
+ *
+ * Do this after ucm_checkBaseExt().
+ */
+ const MBCSData *mbcsData=MBCSGetDummy();
+ int32_t needsMove=0;
+ for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
+ m<mLimit;
+ ++m
+ ) {
+ if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
+ m->f|=MBCS_FROM_U_EXT_FLAG;
+ m->moveFlag=UCM_MOVE_TO_EXT;
+ ++needsMove;
+ }
+ }
+
+ if(needsMove!=0) {
+ ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
+ ucm_sortTable(data->ucm->ext);
+ }
+ }
+ if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
+ *pErrorCode=U_INVALID_TABLE_FORMAT;
+ }
}
- } else {
- *err = U_MEMORY_ALLOCATION_ERROR;
}
}
- break;
- }
-
- default :
- fprintf(stderr, "error: <uconv_class> omitted\n");
- *err = U_INVALID_TABLE_FORMAT;
- mySharedData->table = NULL;
- break;
- };
- if(U_SUCCESS(*err) && mySharedData->table != NULL)
- {
- loadTableFromFile(convFile, mySharedData, err);
+ cleanupConvData(&baseData);
}
-
- T_FileStream_close(convFile);
-
- return mySharedData;
}
/*