X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/4388f060552cc537e71e957d32f35e9d75a61233..HEAD:/icuSources/tools/gennorm2/gennorm2.cpp diff --git a/icuSources/tools/gennorm2/gennorm2.cpp b/icuSources/tools/gennorm2/gennorm2.cpp index 597300da..bce5336b 100644 --- a/icuSources/tools/gennorm2/gennorm2.cpp +++ b/icuSources/tools/gennorm2/gennorm2.cpp @@ -1,12 +1,14 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * -* Copyright (C) 2009-2012, International Business Machines +* Copyright (C) 2009-2014, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: gennorm2.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -20,8 +22,10 @@ #include "unicode/utypes.h" #include "n2builder.h" +#include #include #include +#include #include #include "unicode/errorcode.h" #include "unicode/localpointer.h" @@ -38,16 +42,12 @@ #include "unewdata.h" #endif -#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) - U_NAMESPACE_BEGIN UBool beVerbose=FALSE, haveCopyright=TRUE; -U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose); - #if !UCONFIG_NO_NORMALIZATION -void parseFile(FILE *f, Normalizer2DataBuilder &builder); +void parseFile(std::ifstream &f, Normalizer2DataBuilder &builder); #endif /* -------------------------------------------------------------------------- */ @@ -60,6 +60,8 @@ enum { SOURCEDIR, OUTPUT_FILENAME, UNICODE_VERSION, + WRITE_C_SOURCE, + WRITE_COMBINED_DATA, OPT_FAST }; @@ -71,6 +73,8 @@ static UOption options[]={ UOPTION_SOURCEDIR, UOPTION_DEF("output", 'o', UOPT_REQUIRES_ARG), UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG), + UOPTION_DEF("csource", '\1', UOPT_NO_ARG), + UOPTION_DEF("combined", '\1', UOPT_NO_ARG), UOPTION_DEF("fast", '\1', UOPT_NO_ARG) }; @@ -94,17 +98,22 @@ main(int argc, char* argv[]) { if( argc<2 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur ) { - /* - * Broken into chunks because the C89 standard says the minimum - * required supported string length is 509 bytes. - */ fprintf(stderr, "Usage: %s [-options] infiles+ -o outputfilename\n" "\n" "Reads the infiles with normalization data and\n" - "creates a binary file (outputfilename) with the data.\n" + "creates a binary file, or a C source file (--csource), with the data,\n" + "or writes a data file with the combined data (--combined).\n" + "See http://userguide.icu-project.org/transforms/normalization#TOC-Data-File-Syntax\n" + "\n" + "Alternate usage: %s [-options] a.txt b.txt minus p.txt q.txt -o outputfilename\n" + "\n" + "Computes the difference of (a, b) minus (p, q) and writes the diff data\n" + "in input-file syntax to the outputfilename.\n" + "It is then possible to build (p, q, diff) to get the same data as (a, b).\n" + "(Useful for computing minimal incremental mapping data files.)\n" "\n", - argv[0]); + argv[0], argv[0]); fprintf(stderr, "Options:\n" "\t-h or -? or --help this usage text\n" @@ -113,9 +122,12 @@ main(int argc, char* argv[]) { "\t-u or --unicode Unicode version, followed by the version like 5.2.0\n"); fprintf(stderr, "\t-s or --sourcedir source directory, followed by the path\n" - "\t-o or --output output filename\n"); + "\t-o or --output output filename\n" + "\t --csource writes a C source file with initializers\n" + "\t --combined writes a .txt file (input-file syntax) with the\n" + "\t combined data from all of the input files\n"); fprintf(stderr, - "\t --fast optimize the .nrm file for fast normalization,\n" + "\t --fast optimize the data for fast normalization,\n" "\t which might increase its size (Writes fully decomposed\n" "\t regular mappings instead of delta mappings.\n" "\t You should measure the runtime speed to make sure that\n" @@ -141,7 +153,10 @@ main(int argc, char* argv[]) { #else - LocalPointer builder(new Normalizer2DataBuilder(errorCode)); + LocalPointer b1(new Normalizer2DataBuilder(errorCode), errorCode); + LocalPointer b2; + LocalPointer diff; + Normalizer2DataBuilder *builder = b1.getAlias(); errorCode.assertSuccess(); if(options[UNICODE_VERSION].doesOccur) { @@ -163,20 +178,50 @@ main(int argc, char* argv[]) { pathLength=filename.length(); } + bool doMinus = false; for(int i=1; isetUnicodeVersion(options[UNICODE_VERSION].value); + } + if(options[OPT_FAST].doesOccur) { + builder->setOptimization(Normalizer2DataBuilder::OPTIMIZE_FAST); + } + doMinus = true; + continue; + } filename.append(argv[i], errorCode); - LocalStdioFilePointer f(fopen(filename.data(), "r")); - if(f==NULL) { + std::ifstream f(filename.data()); + if(f.fail()) { fprintf(stderr, "gennorm2 error: unable to open %s\n", filename.data()); exit(U_FILE_ACCESS_ERROR); } builder->setOverrideHandling(Normalizer2DataBuilder::OVERRIDE_PREVIOUS); - parseFile(f.getAlias(), *builder); + parseFile(f, *builder); filename.truncate(pathLength); } - builder->writeBinaryFile(options[OUTPUT_FILENAME].value); + if(doMinus) { + Normalizer2DataBuilder::computeDiff(*b1, *b2, *diff); + diff->writeDataFile(options[OUTPUT_FILENAME].value, /* writeRemoved= */ true); + } else if(options[WRITE_COMBINED_DATA].doesOccur) { + builder->writeDataFile(options[OUTPUT_FILENAME].value, /* writeRemoved= */ false); + } else if(options[WRITE_C_SOURCE].doesOccur) { + builder->writeCSourceFile(options[OUTPUT_FILENAME].value); + } else { + builder->writeBinaryFile(options[OUTPUT_FILENAME].value); + } return errorCode.get(); @@ -185,11 +230,19 @@ main(int argc, char* argv[]) { #if !UCONFIG_NO_NORMALIZATION -void parseFile(FILE *f, Normalizer2DataBuilder &builder) { +void parseFile(std::ifstream &f, Normalizer2DataBuilder &builder) { IcuToolErrorCode errorCode("gennorm2/parseFile()"); - char line[300]; + std::string lineString; uint32_t startCP, endCP; - while(NULL!=fgets(line, (int)sizeof(line), f)) { + while(std::getline(f, lineString)) { + if (lineString.empty()) { + continue; // skip empty lines. + } +#if (U_CPLUSPLUS_VERSION >= 11) + char *line = &lineString.front(); +#else + char *line = &lineString.at(0); +#endif char *comment=(char *)strchr(line, '#'); if(comment!=NULL) { *comment=0; @@ -213,6 +266,11 @@ void parseFile(FILE *f, Normalizer2DataBuilder &builder) { fprintf(stderr, "gennorm2 error: parsing code point range from %s\n", line); exit(errorCode.reset()); } + if (endCP >= 0xd800 && startCP <= 0xdfff) { + fprintf(stderr, "gennorm2 error: value or mapping for surrogate code points: %s\n", + line); + exit(U_ILLEGAL_ARGUMENT_ERROR); + } delimiter=u_skipWhitespace(delimiter); if(*delimiter==':') { const char *s=u_skipWhitespace(delimiter+1); @@ -239,7 +297,7 @@ void parseFile(FILE *f, Normalizer2DataBuilder &builder) { } if(*delimiter=='=' || *delimiter=='>') { UChar uchars[Normalizer2Impl::MAPPING_LENGTH_MASK]; - int32_t length=u_parseString(delimiter+1, uchars, LENGTHOF(uchars), NULL, errorCode); + int32_t length=u_parseString(delimiter+1, uchars, UPRV_LENGTHOF(uchars), NULL, errorCode); if(errorCode.isFailure()) { fprintf(stderr, "gennorm2 error: parsing mapping string from %s\n", line); exit(errorCode.reset());