[apple/icu.git] / icuSources / tools / gennorm2 / gennorm2.cpp

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
*   Copyright (C) 2009-2014, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  gennorm2.cpp
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2009nov25
*   created by: Markus W. Scherer
*
*   This program reads text files that define Unicode normalization,
*   parses them, and builds a binary data file.
*/

#include "unicode/utypes.h"
#include "n2builder.h"

#include <fstream>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <string.h>
#include "unicode/errorcode.h"
#include "unicode/localpointer.h"
#include "unicode/putil.h"
#include "unicode/uchar.h"
#include "unicode/unistr.h"
#include "charstr.h"
#include "normalizer2impl.h"
#include "toolutil.h"
#include "uoptions.h"
#include "uparse.h"

#if UCONFIG_NO_NORMALIZATION
#include "unewdata.h"
#endif

U_NAMESPACE_BEGIN

UBool beVerbose=FALSE, haveCopyright=TRUE;

#if !UCONFIG_NO_NORMALIZATION
void parseFile(std::ifstream &f, Normalizer2DataBuilder &builder);
#endif

/* -------------------------------------------------------------------------- */

enum {
    HELP_H,
    HELP_QUESTION_MARK,
    VERBOSE,
    COPYRIGHT,
    SOURCEDIR,
    OUTPUT_FILENAME,
    UNICODE_VERSION,
    WRITE_C_SOURCE,
    WRITE_COMBINED_DATA,
    OPT_FAST
};

static UOption options[]={
    UOPTION_HELP_H,
    UOPTION_HELP_QUESTION_MARK,
    UOPTION_VERBOSE,
    UOPTION_COPYRIGHT,
    UOPTION_SOURCEDIR,
    UOPTION_DEF("output", 'o', UOPT_REQUIRES_ARG),
    UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG),
    UOPTION_DEF("csource", '\1', UOPT_NO_ARG),
    UOPTION_DEF("combined", '\1', UOPT_NO_ARG),
    UOPTION_DEF("fast", '\1', UOPT_NO_ARG)
};

extern "C" int
main(int argc, char* argv[]) {
    U_MAIN_INIT_ARGS(argc, argv);

    /* preset then read command line options */
    options[SOURCEDIR].value="";
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[HELP_H]), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "error in command line argument \"%s\"\n",
            argv[-argc]);
    }
    if(!options[OUTPUT_FILENAME].doesOccur) {
        argc=-1;
    }
    if( argc<2 ||
        options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur
    ) {
        fprintf(stderr,
            "Usage: %s [-options] infiles+ -o outputfilename\n"
            "\n"
            "Reads the infiles with normalization data and\n"
            "creates a binary file, or a C source file (--csource), with the data,\n"
            "or writes a data file with the combined data (--combined).\n"
            "See http://userguide.icu-project.org/transforms/normalization#TOC-Data-File-Syntax\n"
            "\n"
            "Alternate usage: %s [-options] a.txt b.txt minus p.txt q.txt -o outputfilename\n"
            "\n"
            "Computes the difference of (a, b) minus (p, q) and writes the diff data\n"
            "in input-file syntax to the outputfilename.\n"
            "It is then possible to build (p, q, diff) to get the same data as (a, b).\n"
            "(Useful for computing minimal incremental mapping data files.)\n"
            "\n",
            argv[0], argv[0]);
        fprintf(stderr,
            "Options:\n"
            "\t-h or -? or --help  this usage text\n"
            "\t-v or --verbose     verbose output\n"
            "\t-c or --copyright   include a copyright notice\n"
            "\t-u or --unicode     Unicode version, followed by the version like 5.2.0\n");
        fprintf(stderr,
            "\t-s or --sourcedir   source directory, followed by the path\n"
            "\t-o or --output      output filename\n"
            "\t      --csource     writes a C source file with initializers\n"
            "\t      --combined    writes a .txt file (input-file syntax) with the\n"
            "\t                    combined data from all of the input files\n");
        fprintf(stderr,
            "\t      --fast        optimize the data for fast normalization,\n"
            "\t                    which might increase its size  (Writes fully decomposed\n"
            "\t                    regular mappings instead of delta mappings.\n"
            "\t                    You should measure the runtime speed to make sure that\n"
            "\t                    this is a good trade-off.)\n");
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    beVerbose=options[VERBOSE].doesOccur;
    haveCopyright=options[COPYRIGHT].doesOccur;

    IcuToolErrorCode errorCode("gennorm2/main()");

#if UCONFIG_NO_NORMALIZATION

    fprintf(stderr,
        "gennorm2 writes a dummy binary data file "
        "because UCONFIG_NO_NORMALIZATION is set, \n"
        "see icu/source/common/unicode/uconfig.h\n");
    udata_createDummy(NULL, NULL, options[OUTPUT_FILENAME].value, errorCode);
    // Should not return an error since this is the expected behaviour if UCONFIG_NO_NORMALIZATION is on.
    // return U_UNSUPPORTED_ERROR;
    return 0;

#else

    LocalPointer<Normalizer2DataBuilder> b1(new Normalizer2DataBuilder(errorCode), errorCode);
    LocalPointer<Normalizer2DataBuilder> b2;
    LocalPointer<Normalizer2DataBuilder> diff;
    Normalizer2DataBuilder *builder = b1.getAlias();
    errorCode.assertSuccess();

    if(options[UNICODE_VERSION].doesOccur) {
        builder->setUnicodeVersion(options[UNICODE_VERSION].value);
    }

    if(options[OPT_FAST].doesOccur) {
        builder->setOptimization(Normalizer2DataBuilder::OPTIMIZE_FAST);
    }

    // prepare the filename beginning with the source dir
    CharString filename(options[SOURCEDIR].value, errorCode);
    int32_t pathLength=filename.length();
    if( pathLength>0 &&
        filename[pathLength-1]!=U_FILE_SEP_CHAR &&
        filename[pathLength-1]!=U_FILE_ALT_SEP_CHAR
    ) {
        filename.append(U_FILE_SEP_CHAR, errorCode);
        pathLength=filename.length();
    }

    bool doMinus = false;
    for(int i=1; i<argc; ++i) {
        printf("gennorm2: processing %s\n", argv[i]);
        if(strcmp(argv[i], "minus") == 0) {
            if(doMinus) {
                fprintf(stderr, "gennorm2 error: only one 'minus' can be specified\n");
                exit(U_ILLEGAL_ARGUMENT_ERROR);
            }
            // Data from previous input files has been collected in b1.
            // Collect data from further input files in b2.
            b2.adoptInsteadAndCheckErrorCode(new Normalizer2DataBuilder(errorCode), errorCode);
            diff.adoptInsteadAndCheckErrorCode(new Normalizer2DataBuilder(errorCode), errorCode);
            errorCode.assertSuccess();
            builder = b2.getAlias();
            if(options[UNICODE_VERSION].doesOccur) {
                builder->setUnicodeVersion(options[UNICODE_VERSION].value);
            }
            if(options[OPT_FAST].doesOccur) {
                builder->setOptimization(Normalizer2DataBuilder::OPTIMIZE_FAST);
            }
            doMinus = true;
            continue;
        }
        filename.append(argv[i], errorCode);
        std::ifstream f(filename.data());
        if(f.fail()) {
            fprintf(stderr, "gennorm2 error: unable to open %s\n", filename.data());
            exit(U_FILE_ACCESS_ERROR);
        }
        builder->setOverrideHandling(Normalizer2DataBuilder::OVERRIDE_PREVIOUS);
        parseFile(f, *builder);
        filename.truncate(pathLength);
    }

    if(doMinus) {
        Normalizer2DataBuilder::computeDiff(*b1, *b2, *diff);
        diff->writeDataFile(options[OUTPUT_FILENAME].value, /* writeRemoved= */ true);
    } else if(options[WRITE_COMBINED_DATA].doesOccur) {
        builder->writeDataFile(options[OUTPUT_FILENAME].value, /* writeRemoved= */ false);
    } else if(options[WRITE_C_SOURCE].doesOccur) {
        builder->writeCSourceFile(options[OUTPUT_FILENAME].value);
    } else {
        builder->writeBinaryFile(options[OUTPUT_FILENAME].value);
    }

    return errorCode.get();

#endif
}

#if !UCONFIG_NO_NORMALIZATION

void parseFile(std::ifstream &f, Normalizer2DataBuilder &builder) {
    IcuToolErrorCode errorCode("gennorm2/parseFile()");
    std::string lineString;
    uint32_t startCP, endCP;
    while(std::getline(f, lineString)) {
        if (lineString.empty()) {
            continue;  // skip empty lines.
        }
#if (U_CPLUSPLUS_VERSION >= 11)
        char *line = &lineString.front();
#else
        char *line = &lineString.at(0);
#endif
        char *comment=(char *)strchr(line, '#');
        if(comment!=NULL) {
            *comment=0;
        }
        u_rtrim(line);
        if(line[0]==0) {
            continue;  // skip empty and comment-only lines
        }
        if(line[0]=='*') {
            const char *s=u_skipWhitespace(line+1);
            if(0==strncmp(s, "Unicode", 7)) {
                s=u_skipWhitespace(s+7);
                builder.setUnicodeVersion(s);
            }
            continue;  // reserved syntax
        }
        const char *delimiter;
        int32_t rangeLength=
            u_parseCodePointRangeAnyTerminator(line, &startCP, &endCP, &delimiter, errorCode);
        if(errorCode.isFailure()) {
            fprintf(stderr, "gennorm2 error: parsing code point range from %s\n", line);
            exit(errorCode.reset());
        }
        if (endCP >= 0xd800 && startCP <= 0xdfff) {
                fprintf(stderr, "gennorm2 error: value or mapping for surrogate code points: %s\n",
                        line);
                exit(U_ILLEGAL_ARGUMENT_ERROR);
        }
        delimiter=u_skipWhitespace(delimiter);
        if(*delimiter==':') {
            const char *s=u_skipWhitespace(delimiter+1);
            char *end;
            unsigned long value=strtoul(s, &end, 10);
            if(end<=s || *u_skipWhitespace(end)!=0 || value>=0xff) {
                fprintf(stderr, "gennorm2 error: parsing ccc from %s\n", line);
                exit(U_PARSE_ERROR);
            }
            for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) {
                builder.setCC(c, (uint8_t)value);
            }
            continue;
        }
        if(*delimiter=='-') {
            if(*u_skipWhitespace(delimiter+1)!=0) {
                fprintf(stderr, "gennorm2 error: parsing remove-mapping %s\n", line);
                exit(U_PARSE_ERROR);
            }
            for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) {
                builder.removeMapping(c);
            }
            continue;
        }
        if(*delimiter=='=' || *delimiter=='>') {
            UChar uchars[Normalizer2Impl::MAPPING_LENGTH_MASK];
            int32_t length=u_parseString(delimiter+1, uchars, UPRV_LENGTHOF(uchars), NULL, errorCode);
            if(errorCode.isFailure()) {
                fprintf(stderr, "gennorm2 error: parsing mapping string from %s\n", line);
                exit(errorCode.reset());
            }
            UnicodeString mapping(FALSE, uchars, length);
            if(*delimiter=='=') {
                if(rangeLength!=1) {
                    fprintf(stderr,
                            "gennorm2 error: round-trip mapping for more than 1 code point on %s\n",
                            line);
                    exit(U_PARSE_ERROR);
                }
                builder.setRoundTripMapping((UChar32)startCP, mapping);
            } else {
                for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) {
                    builder.setOneWayMapping(c, mapping);
                }
            }
            continue;
        }
        fprintf(stderr, "gennorm2 error: unrecognized data line %s\n", line);
        exit(U_PARSE_ERROR);
    }
}

#endif // !UCONFIG_NO_NORMALIZATION

U_NAMESPACE_END

/*
 * Hey, Emacs, please set the following:
 *
 * Local Variables:
 * indent-tabs-mode: nil
 * End:
 *
 */
Commit	Line	Data
f3c0d7a5 A	1	// © 2016 and later: Unicode, Inc. and others.
f3c0d7a5 A	2	// License & terms of use: http://www.unicode.org/copyright.html
729e4ab9 A	3	/*
	4	*******************************************************************************
	5	*
b331163b	6	* Copyright (C) 2009-2014, International Business Machines
729e4ab9 A	7	* Corporation and others. All Rights Reserved.
	8	*
	9	*******************************************************************************
	10	* file name: gennorm2.cpp
f3c0d7a5	11	* encoding: UTF-8
729e4ab9 A	12	* tab size: 8 (not used)
	13	* indentation:4
	14	*
	15	* created on: 2009nov25
	16	* created by: Markus W. Scherer
	17	*
	18	* This program reads text files that define Unicode normalization,
	19	* parses them, and builds a binary data file.
	20	*/
	21
	22	#include "unicode/utypes.h"
	23	#include "n2builder.h"
	24
0f5d89e8	25	#include <fstream>
729e4ab9 A	26	#include <stdio.h>
729e4ab9 A	27	#include <stdlib.h>
0f5d89e8	28	#include <string>
729e4ab9 A	29	#include <string.h>
	30	#include "unicode/errorcode.h"
	31	#include "unicode/localpointer.h"
	32	#include "unicode/putil.h"
	33	#include "unicode/uchar.h"
	34	#include "unicode/unistr.h"
	35	#include "charstr.h"
	36	#include "normalizer2impl.h"
	37	#include "toolutil.h"
	38	#include "uoptions.h"
	39	#include "uparse.h"
	40
	41	#if UCONFIG_NO_NORMALIZATION
	42	#include "unewdata.h"
	43	#endif
	44
729e4ab9 A	45	U_NAMESPACE_BEGIN
	46
	47	UBool beVerbose=FALSE, haveCopyright=TRUE;
	48
729e4ab9	49	#if !UCONFIG_NO_NORMALIZATION
0f5d89e8	50	void parseFile(std::ifstream &f, Normalizer2DataBuilder &builder);
729e4ab9 A	51	#endif
	52
	53	/* -------------------------------------------------------------------------- */
	54
	55	enum {
	56	HELP_H,
	57	HELP_QUESTION_MARK,
	58	VERBOSE,
	59	COPYRIGHT,
	60	SOURCEDIR,
	61	OUTPUT_FILENAME,
	62	UNICODE_VERSION,
b331163b	63	WRITE_C_SOURCE,
0f5d89e8	64	WRITE_COMBINED_DATA,
729e4ab9 A	65	OPT_FAST
	66	};
	67
	68	static UOption options[]={
	69	UOPTION_HELP_H,
	70	UOPTION_HELP_QUESTION_MARK,
	71	UOPTION_VERBOSE,
	72	UOPTION_COPYRIGHT,
	73	UOPTION_SOURCEDIR,
	74	UOPTION_DEF("output", 'o', UOPT_REQUIRES_ARG),
	75	UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG),
b331163b	76	UOPTION_DEF("csource", '\1', UOPT_NO_ARG),
0f5d89e8	77	UOPTION_DEF("combined", '\1', UOPT_NO_ARG),
729e4ab9 A	78	UOPTION_DEF("fast", '\1', UOPT_NO_ARG)
	79	};
	80
	81	extern "C" int
	82	main(int argc, char* argv[]) {
	83	U_MAIN_INIT_ARGS(argc, argv);
	84
	85	/* preset then read command line options */
	86	options[SOURCEDIR].value="";
729e4ab9 A	87	argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[HELP_H]), options);
	88
	89	/* error handling, printing usage message */
	90	if(argc<0) {
	91	fprintf(stderr,
	92	"error in command line argument \"%s\"\n",
	93	argv[-argc]);
	94	}
	95	if(!options[OUTPUT_FILENAME].doesOccur) {
	96	argc=-1;
	97	}
	98	if( argc<2 \|\|
	99	options[HELP_H].doesOccur \|\| options[HELP_QUESTION_MARK].doesOccur
	100	) {
729e4ab9 A	101	fprintf(stderr,
	102	"Usage: %s [-options] infiles+ -o outputfilename\n"
	103	"\n"
	104	"Reads the infiles with normalization data and\n"
0f5d89e8 A	105	"creates a binary file, or a C source file (--csource), with the data,\n"
	106	"or writes a data file with the combined data (--combined).\n"
	107	"See http://userguide.icu-project.org/transforms/normalization#TOC-Data-File-Syntax\n"
	108	"\n"
	109	"Alternate usage: %s [-options] a.txt b.txt minus p.txt q.txt -o outputfilename\n"
	110	"\n"
	111	"Computes the difference of (a, b) minus (p, q) and writes the diff data\n"
	112	"in input-file syntax to the outputfilename.\n"
	113	"It is then possible to build (p, q, diff) to get the same data as (a, b).\n"
	114	"(Useful for computing minimal incremental mapping data files.)\n"
729e4ab9	115	"\n",
0f5d89e8	116	argv[0], argv[0]);
729e4ab9 A	117	fprintf(stderr,
	118	"Options:\n"
	119	"\t-h or -? or --help this usage text\n"
	120	"\t-v or --verbose verbose output\n"
	121	"\t-c or --copyright include a copyright notice\n"
	122	"\t-u or --unicode Unicode version, followed by the version like 5.2.0\n");
	123	fprintf(stderr,
	124	"\t-s or --sourcedir source directory, followed by the path\n"
b331163b	125	"\t-o or --output output filename\n"
0f5d89e8 A	126	"\t --csource writes a C source file with initializers\n"
	127	"\t --combined writes a .txt file (input-file syntax) with the\n"
	128	"\t combined data from all of the input files\n");
729e4ab9	129	fprintf(stderr,
b331163b	130	"\t --fast optimize the data for fast normalization,\n"
729e4ab9 A	131	"\t which might increase its size (Writes fully decomposed\n"
	132	"\t regular mappings instead of delta mappings.\n"
	133	"\t You should measure the runtime speed to make sure that\n"
	134	"\t this is a good trade-off.)\n");
	135	return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
	136	}
	137
	138	beVerbose=options[VERBOSE].doesOccur;
	139	haveCopyright=options[COPYRIGHT].doesOccur;
	140
	141	IcuToolErrorCode errorCode("gennorm2/main()");
	142
	143	#if UCONFIG_NO_NORMALIZATION
	144
	145	fprintf(stderr,
	146	"gennorm2 writes a dummy binary data file "
	147	"because UCONFIG_NO_NORMALIZATION is set, \n"
	148	"see icu/source/common/unicode/uconfig.h\n");
	149	udata_createDummy(NULL, NULL, options[OUTPUT_FILENAME].value, errorCode);
	150	// Should not return an error since this is the expected behaviour if UCONFIG_NO_NORMALIZATION is on.
	151	// return U_UNSUPPORTED_ERROR;
	152	return 0;
	153
	154	#else
	155
0f5d89e8 A	156	LocalPointer<Normalizer2DataBuilder> b1(new Normalizer2DataBuilder(errorCode), errorCode);
	157	LocalPointer<Normalizer2DataBuilder> b2;
	158	LocalPointer<Normalizer2DataBuilder> diff;
	159	Normalizer2DataBuilder *builder = b1.getAlias();
729e4ab9 A	160	errorCode.assertSuccess();
729e4ab9 A	161
4388f060 A	162	if(options[UNICODE_VERSION].doesOccur) {
	163	builder->setUnicodeVersion(options[UNICODE_VERSION].value);
	164	}
729e4ab9 A	165
	166	if(options[OPT_FAST].doesOccur) {
	167	builder->setOptimization(Normalizer2DataBuilder::OPTIMIZE_FAST);
	168	}
	169
	170	// prepare the filename beginning with the source dir
	171	CharString filename(options[SOURCEDIR].value, errorCode);
	172	int32_t pathLength=filename.length();
	173	if( pathLength>0 &&
	174	filename[pathLength-1]!=U_FILE_SEP_CHAR &&
	175	filename[pathLength-1]!=U_FILE_ALT_SEP_CHAR
	176	) {
	177	filename.append(U_FILE_SEP_CHAR, errorCode);
	178	pathLength=filename.length();
	179	}
	180
0f5d89e8	181	bool doMinus = false;
729e4ab9 A	182	for(int i=1; i<argc; ++i) {
729e4ab9 A	183	printf("gennorm2: processing %s\n", argv[i]);
0f5d89e8 A	184	if(strcmp(argv[i], "minus") == 0) {
	185	if(doMinus) {
	186	fprintf(stderr, "gennorm2 error: only one 'minus' can be specified\n");
	187	exit(U_ILLEGAL_ARGUMENT_ERROR);
	188	}
	189	// Data from previous input files has been collected in b1.
	190	// Collect data from further input files in b2.
	191	b2.adoptInsteadAndCheckErrorCode(new Normalizer2DataBuilder(errorCode), errorCode);
	192	diff.adoptInsteadAndCheckErrorCode(new Normalizer2DataBuilder(errorCode), errorCode);
	193	errorCode.assertSuccess();
	194	builder = b2.getAlias();
	195	if(options[UNICODE_VERSION].doesOccur) {
	196	builder->setUnicodeVersion(options[UNICODE_VERSION].value);
	197	}
	198	if(options[OPT_FAST].doesOccur) {
	199	builder->setOptimization(Normalizer2DataBuilder::OPTIMIZE_FAST);
	200	}
	201	doMinus = true;
	202	continue;
	203	}
729e4ab9	204	filename.append(argv[i], errorCode);
0f5d89e8 A	205	std::ifstream f(filename.data());
0f5d89e8 A	206	if(f.fail()) {
729e4ab9 A	207	fprintf(stderr, "gennorm2 error: unable to open %s\n", filename.data());
	208	exit(U_FILE_ACCESS_ERROR);
	209	}
	210	builder->setOverrideHandling(Normalizer2DataBuilder::OVERRIDE_PREVIOUS);
0f5d89e8	211	parseFile(f, *builder);
729e4ab9 A	212	filename.truncate(pathLength);
	213	}
	214
0f5d89e8 A	215	if(doMinus) {
	216	Normalizer2DataBuilder::computeDiff(b1, b2, *diff);
	217	diff->writeDataFile(options[OUTPUT_FILENAME].value, /* writeRemoved= */ true);
	218	} else if(options[WRITE_COMBINED_DATA].doesOccur) {
	219	builder->writeDataFile(options[OUTPUT_FILENAME].value, /* writeRemoved= */ false);
	220	} else if(options[WRITE_C_SOURCE].doesOccur) {
b331163b A	221	builder->writeCSourceFile(options[OUTPUT_FILENAME].value);
	222	} else {
	223	builder->writeBinaryFile(options[OUTPUT_FILENAME].value);
	224	}
729e4ab9 A	225
	226	return errorCode.get();
	227
	228	#endif
	229	}
	230
	231	#if !UCONFIG_NO_NORMALIZATION
	232
0f5d89e8	233	void parseFile(std::ifstream &f, Normalizer2DataBuilder &builder) {
729e4ab9	234	IcuToolErrorCode errorCode("gennorm2/parseFile()");
0f5d89e8	235	std::string lineString;
729e4ab9	236	uint32_t startCP, endCP;
0f5d89e8 A	237	while(std::getline(f, lineString)) {
	238	if (lineString.empty()) {
	239	continue; // skip empty lines.
	240	}
	241	#if (U_CPLUSPLUS_VERSION >= 11)
	242	char *line = &lineString.front();
	243	#else
	244	char *line = &lineString.at(0);
	245	#endif
729e4ab9 A	246	char comment=(char )strchr(line, '#');
	247	if(comment!=NULL) {
	248	*comment=0;
	249	}
	250	u_rtrim(line);
	251	if(line[0]==0) {
	252	continue; // skip empty and comment-only lines
	253	}
	254	if(line[0]=='*') {
4388f060 A	255	const char *s=u_skipWhitespace(line+1);
	256	if(0==strncmp(s, "Unicode", 7)) {
	257	s=u_skipWhitespace(s+7);
	258	builder.setUnicodeVersion(s);
	259	}
729e4ab9 A	260	continue; // reserved syntax
	261	}
	262	const char *delimiter;
	263	int32_t rangeLength=
	264	u_parseCodePointRangeAnyTerminator(line, &startCP, &endCP, &delimiter, errorCode);
	265	if(errorCode.isFailure()) {
	266	fprintf(stderr, "gennorm2 error: parsing code point range from %s\n", line);
	267	exit(errorCode.reset());
	268	}
3d1f044b A	269	if (endCP >= 0xd800 && startCP <= 0xdfff) {
	270	fprintf(stderr, "gennorm2 error: value or mapping for surrogate code points: %s\n",
	271	line);
	272	exit(U_ILLEGAL_ARGUMENT_ERROR);
	273	}
729e4ab9 A	274	delimiter=u_skipWhitespace(delimiter);
	275	if(*delimiter==':') {
	276	const char *s=u_skipWhitespace(delimiter+1);
	277	char *end;
	278	unsigned long value=strtoul(s, &end, 10);
	279	if(end<=s \|\| *u_skipWhitespace(end)!=0 \|\| value>=0xff) {
	280	fprintf(stderr, "gennorm2 error: parsing ccc from %s\n", line);
	281	exit(U_PARSE_ERROR);
	282	}
	283	for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) {
	284	builder.setCC(c, (uint8_t)value);
	285	}
	286	continue;
	287	}
	288	if(*delimiter=='-') {
	289	if(*u_skipWhitespace(delimiter+1)!=0) {
	290	fprintf(stderr, "gennorm2 error: parsing remove-mapping %s\n", line);
	291	exit(U_PARSE_ERROR);
	292	}
	293	for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) {
	294	builder.removeMapping(c);
	295	}
	296	continue;
	297	}
	298	if(delimiter=='=' \|\| delimiter=='>') {
	299	UChar uchars[Normalizer2Impl::MAPPING_LENGTH_MASK];
b331163b	300	int32_t length=u_parseString(delimiter+1, uchars, UPRV_LENGTHOF(uchars), NULL, errorCode);
729e4ab9 A	301	if(errorCode.isFailure()) {
	302	fprintf(stderr, "gennorm2 error: parsing mapping string from %s\n", line);
	303	exit(errorCode.reset());
	304	}
	305	UnicodeString mapping(FALSE, uchars, length);
	306	if(*delimiter=='=') {
	307	if(rangeLength!=1) {
	308	fprintf(stderr,
	309	"gennorm2 error: round-trip mapping for more than 1 code point on %s\n",
	310	line);
	311	exit(U_PARSE_ERROR);
	312	}
	313	builder.setRoundTripMapping((UChar32)startCP, mapping);
	314	} else {
	315	for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) {
	316	builder.setOneWayMapping(c, mapping);
	317	}
	318	}
	319	continue;
	320	}
	321	fprintf(stderr, "gennorm2 error: unrecognized data line %s\n", line);
	322	exit(U_PARSE_ERROR);
	323	}
	324	}
	325
	326	#endif // !UCONFIG_NO_NORMALIZATION
	327
	328	U_NAMESPACE_END
	329
	330	/*
	331	* Hey, Emacs, please set the following:
	332	*
	333	* Local Variables:
	334	* indent-tabs-mode: nil
	335	* End:
	336	*
	337	*/