X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/f3c0d7a59d99c2a94c6b8822291f0e42be3773c9..HEAD:/icuSources/tools/gennorm2/gennorm2.cpp diff --git a/icuSources/tools/gennorm2/gennorm2.cpp b/icuSources/tools/gennorm2/gennorm2.cpp index 477bfd62..bce5336b 100644 --- a/icuSources/tools/gennorm2/gennorm2.cpp +++ b/icuSources/tools/gennorm2/gennorm2.cpp @@ -22,8 +22,10 @@ #include "unicode/utypes.h" #include "n2builder.h" +#include #include #include +#include #include #include "unicode/errorcode.h" #include "unicode/localpointer.h" @@ -44,10 +46,8 @@ U_NAMESPACE_BEGIN UBool beVerbose=FALSE, haveCopyright=TRUE; -U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose); - #if !UCONFIG_NO_NORMALIZATION -void parseFile(FILE *f, Normalizer2DataBuilder &builder); +void parseFile(std::ifstream &f, Normalizer2DataBuilder &builder); #endif /* -------------------------------------------------------------------------- */ @@ -61,6 +61,7 @@ enum { OUTPUT_FILENAME, UNICODE_VERSION, WRITE_C_SOURCE, + WRITE_COMBINED_DATA, OPT_FAST }; @@ -73,6 +74,7 @@ static UOption options[]={ UOPTION_DEF("output", 'o', UOPT_REQUIRES_ARG), UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG), UOPTION_DEF("csource", '\1', UOPT_NO_ARG), + UOPTION_DEF("combined", '\1', UOPT_NO_ARG), UOPTION_DEF("fast", '\1', UOPT_NO_ARG) }; @@ -96,17 +98,22 @@ main(int argc, char* argv[]) { if( argc<2 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur ) { - /* - * Broken into chunks because the C89 standard says the minimum - * required supported string length is 509 bytes. - */ fprintf(stderr, "Usage: %s [-options] infiles+ -o outputfilename\n" "\n" "Reads the infiles with normalization data and\n" - "creates a binary or C source file (outputfilename) with the data.\n" + "creates a binary file, or a C source file (--csource), with the data,\n" + "or writes a data file with the combined data (--combined).\n" + "See http://userguide.icu-project.org/transforms/normalization#TOC-Data-File-Syntax\n" + "\n" + "Alternate usage: %s [-options] a.txt b.txt minus p.txt q.txt -o outputfilename\n" + "\n" + "Computes the difference of (a, b) minus (p, q) and writes the diff data\n" + "in input-file syntax to the outputfilename.\n" + "It is then possible to build (p, q, diff) to get the same data as (a, b).\n" + "(Useful for computing minimal incremental mapping data files.)\n" "\n", - argv[0]); + argv[0], argv[0]); fprintf(stderr, "Options:\n" "\t-h or -? or --help this usage text\n" @@ -116,7 +123,9 @@ main(int argc, char* argv[]) { fprintf(stderr, "\t-s or --sourcedir source directory, followed by the path\n" "\t-o or --output output filename\n" - "\t --csource writes a C source file with initializers\n"); + "\t --csource writes a C source file with initializers\n" + "\t --combined writes a .txt file (input-file syntax) with the\n" + "\t combined data from all of the input files\n"); fprintf(stderr, "\t --fast optimize the data for fast normalization,\n" "\t which might increase its size (Writes fully decomposed\n" @@ -144,7 +153,10 @@ main(int argc, char* argv[]) { #else - LocalPointer builder(new Normalizer2DataBuilder(errorCode), errorCode); + LocalPointer b1(new Normalizer2DataBuilder(errorCode), errorCode); + LocalPointer b2; + LocalPointer diff; + Normalizer2DataBuilder *builder = b1.getAlias(); errorCode.assertSuccess(); if(options[UNICODE_VERSION].doesOccur) { @@ -166,20 +178,46 @@ main(int argc, char* argv[]) { pathLength=filename.length(); } + bool doMinus = false; for(int i=1; isetUnicodeVersion(options[UNICODE_VERSION].value); + } + if(options[OPT_FAST].doesOccur) { + builder->setOptimization(Normalizer2DataBuilder::OPTIMIZE_FAST); + } + doMinus = true; + continue; + } filename.append(argv[i], errorCode); - LocalStdioFilePointer f(fopen(filename.data(), "r")); - if(f==NULL) { + std::ifstream f(filename.data()); + if(f.fail()) { fprintf(stderr, "gennorm2 error: unable to open %s\n", filename.data()); exit(U_FILE_ACCESS_ERROR); } builder->setOverrideHandling(Normalizer2DataBuilder::OVERRIDE_PREVIOUS); - parseFile(f.getAlias(), *builder); + parseFile(f, *builder); filename.truncate(pathLength); } - if(options[WRITE_C_SOURCE].doesOccur) { + if(doMinus) { + Normalizer2DataBuilder::computeDiff(*b1, *b2, *diff); + diff->writeDataFile(options[OUTPUT_FILENAME].value, /* writeRemoved= */ true); + } else if(options[WRITE_COMBINED_DATA].doesOccur) { + builder->writeDataFile(options[OUTPUT_FILENAME].value, /* writeRemoved= */ false); + } else if(options[WRITE_C_SOURCE].doesOccur) { builder->writeCSourceFile(options[OUTPUT_FILENAME].value); } else { builder->writeBinaryFile(options[OUTPUT_FILENAME].value); @@ -192,11 +230,19 @@ main(int argc, char* argv[]) { #if !UCONFIG_NO_NORMALIZATION -void parseFile(FILE *f, Normalizer2DataBuilder &builder) { +void parseFile(std::ifstream &f, Normalizer2DataBuilder &builder) { IcuToolErrorCode errorCode("gennorm2/parseFile()"); - char line[300]; + std::string lineString; uint32_t startCP, endCP; - while(NULL!=fgets(line, (int)sizeof(line), f)) { + while(std::getline(f, lineString)) { + if (lineString.empty()) { + continue; // skip empty lines. + } +#if (U_CPLUSPLUS_VERSION >= 11) + char *line = &lineString.front(); +#else + char *line = &lineString.at(0); +#endif char *comment=(char *)strchr(line, '#'); if(comment!=NULL) { *comment=0; @@ -220,6 +266,11 @@ void parseFile(FILE *f, Normalizer2DataBuilder &builder) { fprintf(stderr, "gennorm2 error: parsing code point range from %s\n", line); exit(errorCode.reset()); } + if (endCP >= 0xd800 && startCP <= 0xdfff) { + fprintf(stderr, "gennorm2 error: value or mapping for surrogate code points: %s\n", + line); + exit(U_ILLEGAL_ARGUMENT_ERROR); + } delimiter=u_skipWhitespace(delimiter); if(*delimiter==':') { const char *s=u_skipWhitespace(delimiter+1);