git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/test/perf/ubrkperf/ubrkperfold.cpp

... / ...

Commit	Line	Data
	1	/********************************************************************
	2	* COPYRIGHT:
	3	* Copyright (C) 2001-2012 IBM, Inc. All Rights Reserved.
	4	*
	5	********************************************************************/
	6	/********************************************************************************
	7	*
	8	* File ubrkperf.cpp
	9	*
	10	* Modification History:
	11	* Name Description
	12	* Vladimir Weinstein First Version, based on collperf
	13	*
	14	*********************************************************************************
	15	*/
	16
	17	//
	18	// This program tests break iterator performance
	19	// Currently we test only ICU APIs with the future possibility of testing *nix & win32 APIs
	20	// (if any)
	21	// A text file is required as input. It must be in utf-8 or utf-16 format,
	22	// and include a byte order mark. Either LE or BE format is OK.
	23	//
	24
	25	const char gUsageString[] =
	26	"usage: ubrkperf options...\n"
	27	"-help Display this message.\n"
	28	"-file file_name utf-16/utf-8 format file.\n"
	29	"-locale name ICU locale to use. Default is en_US\n"
	30	"-langid 0x1234 Windows Language ID number. Default to value for -locale option\n"
	31	" see http://msdn.microsoft.com/library/psdk/winbase/nls_8xo3.htm\n"
	32	"-win Run test using Windows native services. (currently not working) (ICU is default)\n"
	33	"-unix Run test using Unix word breaking services. (currently not working) \n"
	34	"-mac Run test using MacOSX word breaking services.\n"
	35	"-uselen Use API with string lengths. Default is null-terminated strings\n"
	36	"-char Use character break iterator\n"
	37	"-word Use word break iterator\n"
	38	"-line Use line break iterator\n"
	39	"-sentence Use sentence break iterator\n"
	40	"-loop nnnn Loopcount for test. Adjust for reasonable total running time.\n"
	41	"-iloop n Inner Loop Count. Default = 1. Number of calls to function\n"
	42	" under test at each call point. For measuring test overhead.\n"
	43	"-terse Terse numbers-only output. Intended for use by scripts.\n"
	44	"-dump Display stuff.\n"
	45	"-capi Use C APIs instead of C++ APIs (currently not working)\n"
	46	"-next Do the next test\n"
	47	"-isBound Do the isBound test\n"
	48	;
	49
	50
	51	#include <stdio.h>
	52	#include <string.h>
	53	#include <stdlib.h>
	54	#include <math.h>
	55	#include <locale.h>
	56	#include <errno.h>
	57	#include <sys/stat.h>
	58
	59	#include <unicode/utypes.h>
	60	#include <unicode/ucol.h>
	61	#include <unicode/ucoleitr.h>
	62	#include <unicode/uloc.h>
	63	#include <unicode/ustring.h>
	64	#include <unicode/ures.h>
	65	#include <unicode/uchar.h>
	66	#include <unicode/ucnv.h>
	67	#include <unicode/utf8.h>
	68
	69	#include <unicode/brkiter.h>
	70
	71
	72	#if U_PLATFORM_HAS_WIN32_API
	73	#include <windows.h>
	74	#else
	75	//
	76	// Stubs for Windows API functions when building on UNIXes.
	77	//
	78	#include <sys/time.h>
	79	unsigned long timeGetTime() {
	80	struct timeval t;
	81	gettimeofday(&t, 0);
	82	unsigned long val = t.tv_sec * 1000; // Let it overflow. Who cares.
	83	val += t.tv_usec / 1000;
	84	return val;
	85	};
	86	#define MAKELCID(a,b) 0
	87	#endif
	88
	89
	90	//
	91	// Command line option variables
	92	// These global variables are set according to the options specified
	93	// on the command line by the user.
	94	char * opt_fName = 0;
	95	char * opt_locale = "en_US";
	96	int opt_langid = 0; // Defaults to value corresponding to opt_locale.
	97	char * opt_rules = 0;
	98	UBool opt_help = FALSE;
	99	int opt_time = 0;
	100	int opt_loopCount = 0;
	101	int opt_passesCount= 1;
	102	UBool opt_terse = FALSE;
	103	UBool opt_icu = TRUE;
	104	UBool opt_win = FALSE; // Run with Windows native functions.
	105	UBool opt_unix = FALSE; // Run with UNIX strcoll, strxfrm functions.
	106	UBool opt_mac = FALSE; // Run with MacOSX word break services.
	107	UBool opt_uselen = FALSE;
	108	UBool opt_dump = FALSE;
	109	UBool opt_char = FALSE;
	110	UBool opt_word = FALSE;
	111	UBool opt_line = FALSE;
	112	UBool opt_sentence = FALSE;
	113	UBool opt_capi = FALSE;
	114
	115	UBool opt_next = FALSE;
	116	UBool opt_isBound = FALSE;
	117
	118
	119
	120	//
	121	// Definitions for the command line options
	122	//
	123	struct OptSpec {
	124	const char *name;
	125	enum {FLAG, NUM, STRING} type;
	126	void *pVar;
	127	};
	128
	129	OptSpec opts[] = {
	130	{"-file", OptSpec::STRING, &opt_fName},
	131	{"-locale", OptSpec::STRING, &opt_locale},
	132	{"-langid", OptSpec::NUM, &opt_langid},
	133	{"-win", OptSpec::FLAG, &opt_win},
	134	{"-unix", OptSpec::FLAG, &opt_unix},
	135	{"-mac", OptSpec::FLAG, &opt_mac},
	136	{"-uselen", OptSpec::FLAG, &opt_uselen},
	137	{"-loop", OptSpec::NUM, &opt_loopCount},
	138	{"-time", OptSpec::NUM, &opt_time},
	139	{"-passes", OptSpec::NUM, &opt_passesCount},
	140	{"-char", OptSpec::FLAG, &opt_char},
	141	{"-word", OptSpec::FLAG, &opt_word},
	142	{"-line", OptSpec::FLAG, &opt_line},
	143	{"-sentence", OptSpec::FLAG, &opt_sentence},
	144	{"-terse", OptSpec::FLAG, &opt_terse},
	145	{"-dump", OptSpec::FLAG, &opt_dump},
	146	{"-capi", OptSpec::FLAG, &opt_capi},
	147	{"-next", OptSpec::FLAG, &opt_next},
	148	{"-isBound", OptSpec::FLAG, &opt_isBound},
	149	{"-help", OptSpec::FLAG, &opt_help},
	150	{"-?", OptSpec::FLAG, &opt_help},
	151	{0, OptSpec::FLAG, 0}
	152	};
	153
	154
	155	//---------------------------------------------------------------------------
	156	//
	157	// Global variables pointing to and describing the test file
	158	//
	159	//---------------------------------------------------------------------------
	160
	161	//DWORD gWinLCID;
	162	BreakIterator *brkit = NULL;
	163	UChar *text = NULL;
	164	int32_t textSize = 0;
	165
	166
	167
	168	#if U_PLATFORM_IS_DARWIN_BASED
	169	#include <ApplicationServices/ApplicationServices.h>
	170	enum{
	171	kUCTextBreakAllMask = (kUCTextBreakClusterMask \| kUCTextBreakWordMask \| kUCTextBreakLineMask)
	172	};
	173	UCTextBreakType breakTypes[4] = {kUCTextBreakCharMask, kUCTextBreakClusterMask, kUCTextBreakWordMask, kUCTextBreakLineMask};
	174	TextBreakLocatorRef breakRef;
	175	UCTextBreakType macBreakType;
	176
	177	void createMACBrkIt() {
	178	OSStatus status = noErr;
	179	LocaleRef lref;
	180	status = LocaleRefFromLocaleString(opt_locale, &lref);
	181	status = UCCreateTextBreakLocator(lref, 0, kUCTextBreakAllMask, (TextBreakLocatorRef*)&breakRef);
	182	if(opt_char == TRUE) {
	183	macBreakType = kUCTextBreakClusterMask;
	184	} else if(opt_word == TRUE) {
	185	macBreakType = kUCTextBreakWordMask;
	186	} else if(opt_line == TRUE) {
	187	macBreakType = kUCTextBreakLineMask;
	188	} else if(opt_sentence == TRUE) {
	189	// error
	190	// brkit = BreakIterator::createSentenceInstance(opt_locale, status);
	191	} else {
	192	// default is character iterator
	193	macBreakType = kUCTextBreakClusterMask;
	194	}
	195	}
	196	#endif
	197
	198	void createICUBrkIt() {
	199	//
	200	// Set up an ICU break iterator
	201	//
	202	UErrorCode status = U_ZERO_ERROR;
	203	if(opt_char == TRUE) {
	204	brkit = BreakIterator::createCharacterInstance(opt_locale, status);
	205	} else if(opt_word == TRUE) {
	206	brkit = BreakIterator::createWordInstance(opt_locale, status);
	207	} else if(opt_line == TRUE) {
	208	brkit = BreakIterator::createLineInstance(opt_locale, status);
	209	} else if(opt_sentence == TRUE) {
	210	brkit = BreakIterator::createSentenceInstance(opt_locale, status);
	211	} else {
	212	// default is character iterator
	213	brkit = BreakIterator::createCharacterInstance(opt_locale, status);
	214	}
	215	if (status==U_USING_DEFAULT_WARNING && opt_terse==FALSE) {
	216	fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n", opt_locale);
	217	}
	218	if (status==U_USING_FALLBACK_WARNING && opt_terse==FALSE) {
	219	fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n", opt_locale);
	220	}
	221
	222	}
	223
	224	//---------------------------------------------------------------------------
	225	//
	226	// ProcessOptions() Function to read the command line options.
	227	//
	228	//---------------------------------------------------------------------------
	229	UBool ProcessOptions(int argc, const char **argv, OptSpec opts[])
	230	{
	231	int i;
	232	int argNum;
	233	const char *pArgName;
	234	OptSpec *pOpt;
	235
	236	for (argNum=1; argNum<argc; argNum++) {
	237	pArgName = argv[argNum];
	238	for (pOpt = opts; pOpt->name != 0; pOpt++) {
	239	if (strcmp(pOpt->name, pArgName) == 0) {
	240	switch (pOpt->type) {
	241	case OptSpec::FLAG:
	242	(UBool )(pOpt->pVar) = TRUE;
	243	break;
	244	case OptSpec::STRING:
	245	argNum ++;
	246	if (argNum >= argc) {
	247	fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name);
	248	return FALSE;
	249	}
	250	(const char *)(pOpt->pVar) = argv[argNum];
	251	break;
	252	case OptSpec::NUM:
	253	argNum ++;
	254	if (argNum >= argc) {
	255	fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name);
	256	return FALSE;
	257	}
	258	char *endp;
	259	i = strtol(argv[argNum], &endp, 0);
	260	if (endp == argv[argNum]) {
	261	fprintf(stderr, "integer value expected for \"%s\" option.\n", pOpt->name);
	262	return FALSE;
	263	}
	264	(int )(pOpt->pVar) = i;
	265	}
	266	break;
	267	}
	268	}
	269	if (pOpt->name == 0)
	270	{
	271	fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName);
	272	return FALSE;
	273	}
	274	}
	275	return TRUE;
	276	}
	277
	278
	279	void doForwardTest() {
	280	if (opt_terse == FALSE) {
	281	printf("Doing the forward test\n");
	282	}
	283	int32_t noBreaks = 0;
	284	int32_t i = 0;
	285	unsigned long startTime = timeGetTime();
	286	unsigned long elapsedTime = 0;
	287	if(opt_icu) {
	288	createICUBrkIt();
	289	brkit->setText(UnicodeString(text, textSize));
	290	brkit->first();
	291	if (opt_terse == FALSE) {
	292	printf("Warmup\n");
	293	}
	294	int j;
	295	while((j = brkit->next()) != BreakIterator::DONE) {
	296	noBreaks++;
	297	//fprintf(stderr, "%d ", j);
	298	}
	299
	300	if (opt_terse == FALSE) {
	301	printf("Measure\n");
	302	}
	303	startTime = timeGetTime();
	304	for(i = 0; i < opt_loopCount; i++) {
	305	brkit->first();
	306	while(brkit->next() != BreakIterator::DONE) {
	307	}
	308	}
	309
	310	elapsedTime = timeGetTime()-startTime;
	311	} else if(opt_mac) {
	312	#if U_PLATFORM_IS_DARWIN_BASED
	313	createMACBrkIt();
	314	UniChar* filePtr = text;
	315	OSStatus status = noErr;
	316	UniCharCount startOffset = 0, breakOffset = 0, numUniChars = textSize;
	317	startOffset = 0;
	318	//printf("\t---Search forward--\n");
	319
	320	while (startOffset < numUniChars)
	321	{
	322	status = UCFindTextBreak(breakRef, macBreakType, kUCTextBreakLeadingEdgeMask, filePtr, numUniChars,
	323	startOffset, &breakOffset);
	324	//require_action(status == noErr, EXIT, printf( "**UCFindTextBreak failed: startOffset %d, status %d\n", (int)startOffset, (int)status));
	325	//require_action((breakOffset <= numUniChars),EXIT, printf("**UCFindTextBreak breakOffset too big: startOffset %d, breakOffset %d\n", (int)startOffset, (int)breakOffset));
	326
	327	// Output break
	328	//printf("\t%d\n", (int)breakOffset);
	329
	330	// Increment counters
	331	noBreaks++;
	332	startOffset = breakOffset;
	333	}
	334	startTime = timeGetTime();
	335	for(i = 0; i < opt_loopCount; i++) {
	336	startOffset = 0;
	337
	338	while (startOffset < numUniChars)
	339	{
	340	status = UCFindTextBreak(breakRef, macBreakType, kUCTextBreakLeadingEdgeMask, filePtr, numUniChars,
	341	startOffset, &breakOffset);
	342	// Increment counters
	343	startOffset = breakOffset;
	344	}
	345	}
	346	elapsedTime = timeGetTime()-startTime;
	347	UCDisposeTextBreakLocator(&breakRef);
	348	#endif
	349
	350
	351	}
	352
	353
	354	if (opt_terse == FALSE) {
	355	int32_t loopTime = (int)(float(1000) * ((float)elapsedTime/(float)opt_loopCount));
	356	int32_t timePerCU = (int)(float(1000) * ((float)loopTime/(float)textSize));
	357	int32_t timePerBreak = (int)(float(1000) * ((float)loopTime/(float)noBreaks));
	358	printf("forward break iteration average loop time %d\n", loopTime);
	359	printf("number of code units %d average time per code unit %d\n", textSize, timePerCU);
	360	printf("number of breaks %d average time per break %d\n", noBreaks, timePerBreak);
	361	} else {
	362	printf("time=%d\nevents=%d\nsize=%d\n", elapsedTime, noBreaks, textSize);
	363	}
	364
	365
	366	}
	367
	368	void doIsBoundTest() {
	369	int32_t noBreaks = 0, hit = 0;
	370	int32_t i = 0, j = 0;
	371	unsigned long startTime = timeGetTime();
	372	unsigned long elapsedTime = 0;
	373	createICUBrkIt();
	374	brkit->setText(UnicodeString(text, textSize));
	375	brkit->first();
	376	for(j = 0; j < textSize; j++) {
	377	if(brkit->isBoundary(j)) {
	378	noBreaks++;
	379	//fprintf(stderr, "%d ", j);
	380	}
	381	}
	382	/*
	383	while(brkit->next() != BreakIterator::DONE) {
	384	noBreaks++;
	385	}
	386	*/
	387
	388	startTime = timeGetTime();
	389	for(i = 0; i < opt_loopCount; i++) {
	390	for(j = 0; j < textSize; j++) {
	391	if(brkit->isBoundary(j)) {
	392	hit++;
	393	}
	394	}
	395	}
	396
	397	elapsedTime = timeGetTime()-startTime;
	398	int32_t loopTime = (int)(float(1000) * ((float)elapsedTime/(float)opt_loopCount));
	399	if (opt_terse == FALSE) {
	400	int32_t timePerCU = (int)(float(1000) * ((float)loopTime/(float)textSize));
	401	int32_t timePerBreak = (int)(float(1000) * ((float)loopTime/(float)noBreaks));
	402	printf("forward break iteration average loop time %d\n", loopTime);
	403	printf("number of code units %d average time per code unit %d\n", textSize, timePerCU);
	404	printf("number of breaks %d average time per break %d\n", noBreaks, timePerBreak);
	405	} else {
	406	printf("time=%d\nevents=%d\nsize=%d\n", elapsedTime, noBreaks, textSize);
	407	}
	408	}
	409
	410	//----------------------------------------------------------------------------------------
	411	//
	412	// UnixConvert -- Convert the lines of the file to the encoding for UNIX
	413	// Since it appears that Unicode support is going in the general
	414	// direction of the use of UTF-8 locales, that is the approach
	415	// that is used here.
	416	//
	417	//----------------------------------------------------------------------------------------
	418	void UnixConvert() {
	419	#if 0
	420	int line;
	421
	422	UConverter *cvrtr; // An ICU code page converter.
	423	UErrorCode status = U_ZERO_ERROR;
	424
	425
	426	cvrtr = ucnv_open("utf-8", &status); // we are just doing UTF-8 locales for now.
	427	if (U_FAILURE(status)) {
	428	fprintf(stderr, "ICU Converter open failed.: %d\n", &status);
	429	exit(-1);
	430	}
	431	// redo for unix
	432	for (line=0; line < gNumFileLines; line++) {
	433	int sizeNeeded = ucnv_fromUChars(cvrtr,
	434	0, // ptr to target buffer.
	435	0, // length of target buffer.
	436	gFileLines[line].name,
	437	-1, // source is null terminated
	438	&status);
	439	if (status != U_BUFFER_OVERFLOW_ERROR && status != U_ZERO_ERROR) {
	440	fprintf(stderr, "Conversion from Unicode, something is wrong.\n");
	441	exit(-1);
	442	}
	443	status = U_ZERO_ERROR;
	444	gFileLines[line].unixName = new char[sizeNeeded+1];
	445	sizeNeeded = ucnv_fromUChars(cvrtr,
	446	gFileLines[line].unixName, // ptr to target buffer.
	447	sizeNeeded+1, // length of target buffer.
	448	gFileLines[line].name,
	449	-1, // source is null terminated
	450	&status);
	451	if (U_FAILURE(status)) {
	452	fprintf(stderr, "ICU Conversion Failed.: %d\n", status);
	453	exit(-1);
	454	}
	455	gFileLines[line].unixName[sizeNeeded] = 0;
	456	};
	457	ucnv_close(cvrtr);
	458	#endif
	459	}
	460
	461
	462	//----------------------------------------------------------------------------------------
	463	//
	464	// class UCharFile Class to hide all the gorp to read a file in
	465	// and produce a stream of UChars.
	466	//
	467	//----------------------------------------------------------------------------------------
	468	class UCharFile {
	469	public:
	470	UCharFile(const char *fileName);
	471	~UCharFile();
	472	UChar get();
	473	UBool eof() {return fEof;};
	474	UBool error() {return fError;};
	475	int32_t size() { return fFileSize; };
	476
	477	private:
	478	UCharFile (const UCharFile &other) {}; // No copy constructor.
	479	UCharFile & operator = (const UCharFile &other) {return *this;}; // No assignment op
	480
	481	FILE *fFile;
	482	const char *fName;
	483	UBool fEof;
	484	UBool fError;
	485	UChar fPending2ndSurrogate;
	486	int32_t fFileSize;
	487
	488	enum {UTF16LE, UTF16BE, UTF8} fEncoding;
	489	};
	490
	491	UCharFile::UCharFile(const char * fileName) {
	492	fEof = FALSE;
	493	fError = FALSE;
	494	fName = fileName;
	495	struct stat buf;
	496	int32_t result = stat(fileName, &buf);
	497	if(result != 0) {
	498	fprintf(stderr, "Error getting info\n");
	499	fFileSize = -1;
	500	} else {
	501	fFileSize = buf.st_size;
	502	}
	503	fFile = fopen(fName, "rb");
	504	fPending2ndSurrogate = 0;
	505	if (fFile == NULL) {
	506	fprintf(stderr, "Can not open file \"%s\"\n", opt_fName);
	507	fError = TRUE;
	508	return;
	509	}
	510	//
	511	// Look for the byte order mark at the start of the file.
	512	//
	513	int BOMC1, BOMC2, BOMC3;
	514	BOMC1 = fgetc(fFile);
	515	BOMC2 = fgetc(fFile);
	516
	517	if (BOMC1 == 0xff && BOMC2 == 0xfe) {
	518	fEncoding = UTF16LE; }
	519	else if (BOMC1 == 0xfe && BOMC2 == 0xff) {
	520	fEncoding = UTF16BE; }
	521	else if (BOMC1 == 0xEF && BOMC2 == 0xBB && (BOMC3 = fgetc(fFile)) == 0xBF ) {
	522	fEncoding = UTF8; }
	523	else
	524	{
	525	fprintf(stderr, "collperf: file \"%s\" encoding must be UTF-8 or UTF-16, and "
	526	"must include a BOM.\n", fileName);
	527	fError = true;
	528	return;
	529	}
	530	}
	531
	532
	533	UCharFile::~UCharFile() {
	534	fclose(fFile);
	535	}
	536
	537
	538
	539	UChar UCharFile::get() {
	540	UChar c;
	541	switch (fEncoding) {
	542	case UTF16LE:
	543	{
	544	int cL, cH;
	545	cL = fgetc(fFile);
	546	cH = fgetc(fFile);
	547	c = cL \| (cH << 8);
	548	if (cH == EOF) {
	549	c = 0;
	550	fEof = TRUE;
	551	}
	552	break;
	553	}
	554	case UTF16BE:
	555	{
	556	int cL, cH;
	557	cH = fgetc(fFile);
	558	cL = fgetc(fFile);
	559	c = cL \| (cH << 8);
	560	if (cL == EOF) {
	561	c = 0;
	562	fEof = TRUE;
	563	}
	564	break;
	565	}
	566	case UTF8:
	567	{
	568	if (fPending2ndSurrogate != 0) {
	569	c = fPending2ndSurrogate;
	570	fPending2ndSurrogate = 0;
	571	break;
	572	}
	573
	574	int ch = fgetc(fFile); // Note: c and ch are separate cause eof test doesn't work on UChar type.
	575	if (ch == EOF) {
	576	c = 0;
	577	fEof = TRUE;
	578	break;
	579	}
	580
	581	if (ch <= 0x7f) {
	582	// It's ascii. No further utf-8 conversion.
	583	c = ch;
	584	break;
	585	}
	586
	587	// Figure out the lenght of the char and read the rest of the bytes
	588	// into a temp array.
	589	int nBytes;
	590	if (ch >= 0xF0) {nBytes=4;}
	591	else if (ch >= 0xE0) {nBytes=3;}
	592	else if (ch >= 0xC0) {nBytes=2;}
	593	else {
	594	fprintf(stderr, "not likely utf-8 encoded file %s contains corrupt data at offset %d.\n", fName, ftell(fFile));
	595	fError = TRUE;
	596	return 0;
	597	}
	598
	599	unsigned char bytes[10];
	600	bytes[0] = (unsigned char)ch;
	601	int i;
	602	for (i=1; i<nBytes; i++) {
	603	bytes[i] = fgetc(fFile);
	604	if (bytes[i] < 0x80 \|\| bytes[i] >= 0xc0) {
	605	fprintf(stderr, "utf-8 encoded file %s contains corrupt data at offset %d. Expected %d bytes, byte %d is invalid. First byte is %02X\n", fName, ftell(fFile), nBytes, i, ch);
	606	fError = TRUE;
	607	return 0;
	608	}
	609	}
	610
	611	// Convert the bytes from the temp array to a Unicode char.
	612	i = 0;
	613	uint32_t cp;
	614	U8_NEXT_UNSAFE(bytes, i, cp);
	615	c = (UChar)cp;
	616
	617	if (cp >= 0x10000) {
	618	// The code point needs to be broken up into a utf-16 surrogate pair.
	619	// Process first half this time through the main loop, and
	620	// remember the other half for the next time through.
	621	UChar utf16Buf[3];
	622	i = 0;
	623	UTF16_APPEND_CHAR_UNSAFE(utf16Buf, i, cp);
	624	fPending2ndSurrogate = utf16Buf[1];
	625	c = utf16Buf[0];
	626	}
	627	break;
	628	};
	629	}
	630	return c;
	631	}
	632
	633
	634	//----------------------------------------------------------------------------------------
	635	//
	636	// Main -- process command line, read in and pre-process the test file,
	637	// call other functions to do the actual tests.
	638	//
	639	//----------------------------------------------------------------------------------------
	640	int main(int argc, const char** argv) {
	641	if (ProcessOptions(argc, argv, opts) != TRUE \|\| opt_help \|\| opt_fName == 0) {
	642	printf(gUsageString);
	643	exit (1);
	644	}
	645	// Make sure that we've only got one API selected.
	646	if (opt_mac \|\| opt_unix \|\| opt_win) opt_icu = FALSE;
	647	if (opt_mac \|\| opt_unix) opt_win = FALSE;
	648	if (opt_mac) opt_unix = FALSE;
	649
	650	UErrorCode status = U_ZERO_ERROR;
	651
	652
	653
	654	//
	655	// Set up a Windows LCID
	656	//
	657	/*
	658	if (opt_langid != 0) {
	659	gWinLCID = MAKELCID(opt_langid, SORT_DEFAULT);
	660	}
	661	else {
	662	gWinLCID = uloc_getLCID(opt_locale);
	663	}
	664	*/
	665
	666	//
	667	// Set the UNIX locale
	668	//
	669	if (opt_unix) {
	670	if (setlocale(LC_ALL, opt_locale) == 0) {
	671	fprintf(stderr, "setlocale(LC_ALL, %s) failed.\n", opt_locale);
	672	exit(-1);
	673	}
	674	}
	675
	676	// Read in the input file.
	677	// File assumed to be utf-16.
	678	// Lines go onto heap buffers. Global index array to line starts is created.
	679	// Lines themselves are null terminated.
	680	//
	681
	682	UCharFile f(opt_fName);
	683	if (f.error()) {
	684	exit(-1);
	685	}
	686	int32_t fileSize = f.size();
	687	const int STARTSIZE = 70000;
	688	int32_t bufSize = 0;
	689	int32_t charCount = 0;
	690	if(fileSize != -1) {
	691	text = (UChar )malloc(fileSizesizeof(UChar));
	692	bufSize = fileSize;
	693	} else {
	694	text = (UChar )malloc(STARTSIZEsizeof(UChar));
	695	bufSize = STARTSIZE;
	696	}
	697	if(text == NULL) {
	698	fprintf(stderr, "Allocating buffer failed\n");
	699	exit(-1);
	700	}
	701
	702
	703	// Read the file, split into lines, and save in memory.
	704	// Loop runs once per utf-16 value from the input file,
	705	// (The number of bytes read from file per loop iteration depends on external encoding.)
	706	for (;;) {
	707
	708	UChar c = f.get();
	709	if(f.eof()) {
	710	break;
	711	}
	712	if (f.error()){
	713	exit(-1);
	714	}
	715	// We now have a good UTF-16 value in c.
	716	text[charCount++] = c;
	717	if(charCount == bufSize) {
	718	text = (UChar )realloc(text, 2bufSize*sizeof(UChar));
	719	if(text == NULL) {
	720	fprintf(stderr, "Reallocating buffer failed\n");
	721	exit(-1);
	722	}
	723	bufSize *= 2;
	724	}
	725	}
	726
	727
	728	if (opt_terse == FALSE) {
	729	printf("file \"%s\", %d charCount code units.\n", opt_fName, charCount);
	730	}
	731
	732	textSize = charCount;
	733
	734
	735
	736
	737	//
	738	// Dump file contents if requested.
	739	//
	740	if (opt_dump) {
	741	// dump file, etc... possibly
	742	}
	743
	744
	745	//
	746	// We've got the file read into memory. Go do something with it.
	747	//
	748	int32_t i = 0;
	749	for(i = 0; i < opt_passesCount; i++) {
	750	if(opt_loopCount != 0) {
	751	if(opt_next) {
	752	doForwardTest();
	753	} else if(opt_isBound) {
	754	doIsBoundTest();
	755	} else {
	756	doForwardTest();
	757	}
	758	} else if(opt_time != 0) {
	759
	760	}
	761	}
	762
	763	if(text != NULL) {
	764	free(text);
	765	}
	766	if(brkit != NULL) {
	767	delete brkit;
	768	}
	769
	770	return 0;
	771	}