]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/perf/ubrkperf/ubrkperfold.cpp
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / test / perf / ubrkperf / ubrkperfold.cpp
CommitLineData
b75a7d8f
A
1/********************************************************************
2 * COPYRIGHT:
51004dcb 3 * Copyright (C) 2001-2012 IBM, Inc. All Rights Reserved.
b75a7d8f
A
4 *
5 ********************************************************************/
6/********************************************************************************
7*
8* File ubrkperf.cpp
9*
10* Modification History:
11* Name Description
12* Vladimir Weinstein First Version, based on collperf
13*
14*********************************************************************************
15*/
16
17//
18// This program tests break iterator performance
19// Currently we test only ICU APIs with the future possibility of testing *nix & win32 APIs
20// (if any)
21// A text file is required as input. It must be in utf-8 or utf-16 format,
22// and include a byte order mark. Either LE or BE format is OK.
23//
24
25const char gUsageString[] =
26 "usage: ubrkperf options...\n"
27 "-help Display this message.\n"
28 "-file file_name utf-16/utf-8 format file.\n"
29 "-locale name ICU locale to use. Default is en_US\n"
30 "-langid 0x1234 Windows Language ID number. Default to value for -locale option\n"
31 " see http://msdn.microsoft.com/library/psdk/winbase/nls_8xo3.htm\n"
32 "-win Run test using Windows native services. (currently not working) (ICU is default)\n"
33 "-unix Run test using Unix word breaking services. (currently not working) \n"
34 "-mac Run test using MacOSX word breaking services.\n"
35 "-uselen Use API with string lengths. Default is null-terminated strings\n"
36 "-char Use character break iterator\n"
37 "-word Use word break iterator\n"
38 "-line Use line break iterator\n"
39 "-sentence Use sentence break iterator\n"
40 "-loop nnnn Loopcount for test. Adjust for reasonable total running time.\n"
41 "-iloop n Inner Loop Count. Default = 1. Number of calls to function\n"
42 " under test at each call point. For measuring test overhead.\n"
43 "-terse Terse numbers-only output. Intended for use by scripts.\n"
44 "-dump Display stuff.\n"
45 "-capi Use C APIs instead of C++ APIs (currently not working)\n"
46 "-next Do the next test\n"
47 "-isBound Do the isBound test\n"
48 ;
49
50
51#include <stdio.h>
52#include <string.h>
53#include <stdlib.h>
54#include <math.h>
55#include <locale.h>
56#include <errno.h>
57#include <sys/stat.h>
58
59#include <unicode/utypes.h>
60#include <unicode/ucol.h>
61#include <unicode/ucoleitr.h>
62#include <unicode/uloc.h>
63#include <unicode/ustring.h>
64#include <unicode/ures.h>
65#include <unicode/uchar.h>
66#include <unicode/ucnv.h>
67#include <unicode/utf8.h>
68
69#include <unicode/brkiter.h>
70
71
4388f060 72#if U_PLATFORM_HAS_WIN32_API
b75a7d8f
A
73#include <windows.h>
74#else
75//
76// Stubs for Windows API functions when building on UNIXes.
77//
78#include <sys/time.h>
79unsigned long timeGetTime() {
80 struct timeval t;
81 gettimeofday(&t, 0);
82 unsigned long val = t.tv_sec * 1000; // Let it overflow. Who cares.
83 val += t.tv_usec / 1000;
84 return val;
85};
86#define MAKELCID(a,b) 0
87#endif
88
89
90//
91// Command line option variables
92// These global variables are set according to the options specified
93// on the command line by the user.
94char * opt_fName = 0;
95char * opt_locale = "en_US";
96int opt_langid = 0; // Defaults to value corresponding to opt_locale.
97char * opt_rules = 0;
98UBool opt_help = FALSE;
99int opt_time = 0;
100int opt_loopCount = 0;
101int opt_passesCount= 1;
102UBool opt_terse = FALSE;
103UBool opt_icu = TRUE;
104UBool opt_win = FALSE; // Run with Windows native functions.
105UBool opt_unix = FALSE; // Run with UNIX strcoll, strxfrm functions.
106UBool opt_mac = FALSE; // Run with MacOSX word break services.
107UBool opt_uselen = FALSE;
108UBool opt_dump = FALSE;
109UBool opt_char = FALSE;
110UBool opt_word = FALSE;
111UBool opt_line = FALSE;
112UBool opt_sentence = FALSE;
113UBool opt_capi = FALSE;
114
115UBool opt_next = FALSE;
116UBool opt_isBound = FALSE;
117
118
119
120//
121// Definitions for the command line options
122//
123struct OptSpec {
124 const char *name;
125 enum {FLAG, NUM, STRING} type;
126 void *pVar;
127};
128
129OptSpec opts[] = {
130 {"-file", OptSpec::STRING, &opt_fName},
131 {"-locale", OptSpec::STRING, &opt_locale},
132 {"-langid", OptSpec::NUM, &opt_langid},
133 {"-win", OptSpec::FLAG, &opt_win},
134 {"-unix", OptSpec::FLAG, &opt_unix},
135 {"-mac", OptSpec::FLAG, &opt_mac},
136 {"-uselen", OptSpec::FLAG, &opt_uselen},
137 {"-loop", OptSpec::NUM, &opt_loopCount},
138 {"-time", OptSpec::NUM, &opt_time},
139 {"-passes", OptSpec::NUM, &opt_passesCount},
140 {"-char", OptSpec::FLAG, &opt_char},
141 {"-word", OptSpec::FLAG, &opt_word},
142 {"-line", OptSpec::FLAG, &opt_line},
143 {"-sentence", OptSpec::FLAG, &opt_sentence},
144 {"-terse", OptSpec::FLAG, &opt_terse},
145 {"-dump", OptSpec::FLAG, &opt_dump},
146 {"-capi", OptSpec::FLAG, &opt_capi},
147 {"-next", OptSpec::FLAG, &opt_next},
148 {"-isBound", OptSpec::FLAG, &opt_isBound},
149 {"-help", OptSpec::FLAG, &opt_help},
150 {"-?", OptSpec::FLAG, &opt_help},
151 {0, OptSpec::FLAG, 0}
152};
153
154
155//---------------------------------------------------------------------------
156//
157// Global variables pointing to and describing the test file
158//
159//---------------------------------------------------------------------------
160
161//DWORD gWinLCID;
162BreakIterator *brkit = NULL;
163UChar *text = NULL;
164int32_t textSize = 0;
165
166
167
4388f060 168#if U_PLATFORM_IS_DARWIN_BASED
b75a7d8f
A
169#include <ApplicationServices/ApplicationServices.h>
170enum{
171 kUCTextBreakAllMask = (kUCTextBreakClusterMask | kUCTextBreakWordMask | kUCTextBreakLineMask)
172 };
173UCTextBreakType breakTypes[4] = {kUCTextBreakCharMask, kUCTextBreakClusterMask, kUCTextBreakWordMask, kUCTextBreakLineMask};
174TextBreakLocatorRef breakRef;
175UCTextBreakType macBreakType;
176
177void createMACBrkIt() {
178 OSStatus status = noErr;
179 LocaleRef lref;
180 status = LocaleRefFromLocaleString(opt_locale, &lref);
181 status = UCCreateTextBreakLocator(lref, 0, kUCTextBreakAllMask, (TextBreakLocatorRef*)&breakRef);
182 if(opt_char == TRUE) {
183 macBreakType = kUCTextBreakClusterMask;
184 } else if(opt_word == TRUE) {
185 macBreakType = kUCTextBreakWordMask;
186 } else if(opt_line == TRUE) {
187 macBreakType = kUCTextBreakLineMask;
188 } else if(opt_sentence == TRUE) {
189 // error
190 // brkit = BreakIterator::createSentenceInstance(opt_locale, status);
191 } else {
192 // default is character iterator
193 macBreakType = kUCTextBreakClusterMask;
194 }
195}
196#endif
197
198void createICUBrkIt() {
199 //
200 // Set up an ICU break iterator
201 //
202 UErrorCode status = U_ZERO_ERROR;
203 if(opt_char == TRUE) {
204 brkit = BreakIterator::createCharacterInstance(opt_locale, status);
205 } else if(opt_word == TRUE) {
206 brkit = BreakIterator::createWordInstance(opt_locale, status);
207 } else if(opt_line == TRUE) {
208 brkit = BreakIterator::createLineInstance(opt_locale, status);
209 } else if(opt_sentence == TRUE) {
210 brkit = BreakIterator::createSentenceInstance(opt_locale, status);
211 } else {
212 // default is character iterator
213 brkit = BreakIterator::createCharacterInstance(opt_locale, status);
214 }
215 if (status==U_USING_DEFAULT_WARNING && opt_terse==FALSE) {
216 fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n", opt_locale);
217 }
218 if (status==U_USING_FALLBACK_WARNING && opt_terse==FALSE) {
219 fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n", opt_locale);
220 }
221
222}
223
224//---------------------------------------------------------------------------
225//
226// ProcessOptions() Function to read the command line options.
227//
228//---------------------------------------------------------------------------
229UBool ProcessOptions(int argc, const char **argv, OptSpec opts[])
230{
231 int i;
232 int argNum;
233 const char *pArgName;
234 OptSpec *pOpt;
235
236 for (argNum=1; argNum<argc; argNum++) {
237 pArgName = argv[argNum];
238 for (pOpt = opts; pOpt->name != 0; pOpt++) {
239 if (strcmp(pOpt->name, pArgName) == 0) {
240 switch (pOpt->type) {
241 case OptSpec::FLAG:
242 *(UBool *)(pOpt->pVar) = TRUE;
243 break;
244 case OptSpec::STRING:
245 argNum ++;
246 if (argNum >= argc) {
247 fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name);
248 return FALSE;
249 }
250 *(const char **)(pOpt->pVar) = argv[argNum];
251 break;
252 case OptSpec::NUM:
253 argNum ++;
254 if (argNum >= argc) {
255 fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name);
256 return FALSE;
257 }
258 char *endp;
259 i = strtol(argv[argNum], &endp, 0);
260 if (endp == argv[argNum]) {
261 fprintf(stderr, "integer value expected for \"%s\" option.\n", pOpt->name);
262 return FALSE;
263 }
264 *(int *)(pOpt->pVar) = i;
265 }
266 break;
267 }
268 }
269 if (pOpt->name == 0)
270 {
271 fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName);
272 return FALSE;
273 }
274 }
275return TRUE;
276}
277
278
279void doForwardTest() {
280 if (opt_terse == FALSE) {
281 printf("Doing the forward test\n");
282 }
283 int32_t noBreaks = 0;
284 int32_t i = 0;
285 unsigned long startTime = timeGetTime();
286 unsigned long elapsedTime = 0;
287 if(opt_icu) {
288 createICUBrkIt();
289 brkit->setText(UnicodeString(text, textSize));
290 brkit->first();
291 if (opt_terse == FALSE) {
292 printf("Warmup\n");
293 }
294 int j;
295 while((j = brkit->next()) != BreakIterator::DONE) {
296 noBreaks++;
297 //fprintf(stderr, "%d ", j);
298 }
299
300 if (opt_terse == FALSE) {
301 printf("Measure\n");
302 }
303 startTime = timeGetTime();
304 for(i = 0; i < opt_loopCount; i++) {
305 brkit->first();
306 while(brkit->next() != BreakIterator::DONE) {
307 }
308 }
309
310 elapsedTime = timeGetTime()-startTime;
311 } else if(opt_mac) {
4388f060 312#if U_PLATFORM_IS_DARWIN_BASED
b75a7d8f
A
313 createMACBrkIt();
314 UniChar* filePtr = text;
315 OSStatus status = noErr;
316 UniCharCount startOffset = 0, breakOffset = 0, numUniChars = textSize;
317 startOffset = 0;
318 //printf("\t---Search forward--\n");
319
320 while (startOffset < numUniChars)
321 {
322 status = UCFindTextBreak(breakRef, macBreakType, kUCTextBreakLeadingEdgeMask, filePtr, numUniChars,
323 startOffset, &breakOffset);
324 //require_action(status == noErr, EXIT, printf( "**UCFindTextBreak failed: startOffset %d, status %d\n", (int)startOffset, (int)status));
325 //require_action((breakOffset <= numUniChars),EXIT, printf("**UCFindTextBreak breakOffset too big: startOffset %d, breakOffset %d\n", (int)startOffset, (int)breakOffset));
326
327 // Output break
328 //printf("\t%d\n", (int)breakOffset);
329
330 // Increment counters
331 noBreaks++;
332 startOffset = breakOffset;
333 }
334 startTime = timeGetTime();
335 for(i = 0; i < opt_loopCount; i++) {
336 startOffset = 0;
337
338 while (startOffset < numUniChars)
339 {
340 status = UCFindTextBreak(breakRef, macBreakType, kUCTextBreakLeadingEdgeMask, filePtr, numUniChars,
341 startOffset, &breakOffset);
342 // Increment counters
343 startOffset = breakOffset;
344 }
345 }
346 elapsedTime = timeGetTime()-startTime;
347 UCDisposeTextBreakLocator(&breakRef);
348#endif
349
350
351 }
352
353
354 if (opt_terse == FALSE) {
355 int32_t loopTime = (int)(float(1000) * ((float)elapsedTime/(float)opt_loopCount));
356 int32_t timePerCU = (int)(float(1000) * ((float)loopTime/(float)textSize));
357 int32_t timePerBreak = (int)(float(1000) * ((float)loopTime/(float)noBreaks));
358 printf("forward break iteration average loop time %d\n", loopTime);
359 printf("number of code units %d average time per code unit %d\n", textSize, timePerCU);
360 printf("number of breaks %d average time per break %d\n", noBreaks, timePerBreak);
361 } else {
362 printf("time=%d\nevents=%d\nsize=%d\n", elapsedTime, noBreaks, textSize);
363 }
364
365
366}
367
368void doIsBoundTest() {
369 int32_t noBreaks = 0, hit = 0;
370 int32_t i = 0, j = 0;
371 unsigned long startTime = timeGetTime();
372 unsigned long elapsedTime = 0;
373 createICUBrkIt();
374 brkit->setText(UnicodeString(text, textSize));
375 brkit->first();
376 for(j = 0; j < textSize; j++) {
377 if(brkit->isBoundary(j)) {
378 noBreaks++;
379 //fprintf(stderr, "%d ", j);
380 }
381 }
382 /*
383 while(brkit->next() != BreakIterator::DONE) {
384 noBreaks++;
385 }
386 */
387
388 startTime = timeGetTime();
389 for(i = 0; i < opt_loopCount; i++) {
390 for(j = 0; j < textSize; j++) {
391 if(brkit->isBoundary(j)) {
392 hit++;
393 }
394 }
395 }
396
397 elapsedTime = timeGetTime()-startTime;
398 int32_t loopTime = (int)(float(1000) * ((float)elapsedTime/(float)opt_loopCount));
399 if (opt_terse == FALSE) {
400 int32_t timePerCU = (int)(float(1000) * ((float)loopTime/(float)textSize));
401 int32_t timePerBreak = (int)(float(1000) * ((float)loopTime/(float)noBreaks));
402 printf("forward break iteration average loop time %d\n", loopTime);
403 printf("number of code units %d average time per code unit %d\n", textSize, timePerCU);
404 printf("number of breaks %d average time per break %d\n", noBreaks, timePerBreak);
405 } else {
406 printf("time=%d\nevents=%d\nsize=%d\n", elapsedTime, noBreaks, textSize);
407 }
408}
409
410//----------------------------------------------------------------------------------------
411//
412// UnixConvert -- Convert the lines of the file to the encoding for UNIX
413// Since it appears that Unicode support is going in the general
414// direction of the use of UTF-8 locales, that is the approach
415// that is used here.
416//
417//----------------------------------------------------------------------------------------
418void UnixConvert() {
419#if 0
420 int line;
421
422 UConverter *cvrtr; // An ICU code page converter.
423 UErrorCode status = U_ZERO_ERROR;
424
425
426 cvrtr = ucnv_open("utf-8", &status); // we are just doing UTF-8 locales for now.
427 if (U_FAILURE(status)) {
428 fprintf(stderr, "ICU Converter open failed.: %d\n", &status);
429 exit(-1);
430 }
431 // redo for unix
432 for (line=0; line < gNumFileLines; line++) {
433 int sizeNeeded = ucnv_fromUChars(cvrtr,
434 0, // ptr to target buffer.
435 0, // length of target buffer.
436 gFileLines[line].name,
437 -1, // source is null terminated
438 &status);
439 if (status != U_BUFFER_OVERFLOW_ERROR && status != U_ZERO_ERROR) {
440 fprintf(stderr, "Conversion from Unicode, something is wrong.\n");
441 exit(-1);
442 }
443 status = U_ZERO_ERROR;
444 gFileLines[line].unixName = new char[sizeNeeded+1];
445 sizeNeeded = ucnv_fromUChars(cvrtr,
446 gFileLines[line].unixName, // ptr to target buffer.
447 sizeNeeded+1, // length of target buffer.
448 gFileLines[line].name,
449 -1, // source is null terminated
450 &status);
451 if (U_FAILURE(status)) {
452 fprintf(stderr, "ICU Conversion Failed.: %d\n", status);
453 exit(-1);
454 }
455 gFileLines[line].unixName[sizeNeeded] = 0;
456 };
457 ucnv_close(cvrtr);
458#endif
459}
460
461
462//----------------------------------------------------------------------------------------
463//
464// class UCharFile Class to hide all the gorp to read a file in
465// and produce a stream of UChars.
466//
467//----------------------------------------------------------------------------------------
468class UCharFile {
469public:
470 UCharFile(const char *fileName);
471 ~UCharFile();
472 UChar get();
473 UBool eof() {return fEof;};
474 UBool error() {return fError;};
475 int32_t size() { return fFileSize; };
476
477private:
478 UCharFile (const UCharFile &other) {}; // No copy constructor.
479 UCharFile & operator = (const UCharFile &other) {return *this;}; // No assignment op
480
481 FILE *fFile;
482 const char *fName;
483 UBool fEof;
484 UBool fError;
485 UChar fPending2ndSurrogate;
486 int32_t fFileSize;
487
488 enum {UTF16LE, UTF16BE, UTF8} fEncoding;
489};
490
491UCharFile::UCharFile(const char * fileName) {
492 fEof = FALSE;
493 fError = FALSE;
494 fName = fileName;
495 struct stat buf;
496 int32_t result = stat(fileName, &buf);
497 if(result != 0) {
498 fprintf(stderr, "Error getting info\n");
499 fFileSize = -1;
500 } else {
501 fFileSize = buf.st_size;
502 }
503 fFile = fopen(fName, "rb");
504 fPending2ndSurrogate = 0;
505 if (fFile == NULL) {
506 fprintf(stderr, "Can not open file \"%s\"\n", opt_fName);
507 fError = TRUE;
508 return;
509 }
510 //
511 // Look for the byte order mark at the start of the file.
512 //
513 int BOMC1, BOMC2, BOMC3;
514 BOMC1 = fgetc(fFile);
515 BOMC2 = fgetc(fFile);
516
517 if (BOMC1 == 0xff && BOMC2 == 0xfe) {
518 fEncoding = UTF16LE; }
519 else if (BOMC1 == 0xfe && BOMC2 == 0xff) {
520 fEncoding = UTF16BE; }
521 else if (BOMC1 == 0xEF && BOMC2 == 0xBB && (BOMC3 = fgetc(fFile)) == 0xBF ) {
522 fEncoding = UTF8; }
523 else
524 {
525 fprintf(stderr, "collperf: file \"%s\" encoding must be UTF-8 or UTF-16, and "
526 "must include a BOM.\n", fileName);
527 fError = true;
528 return;
529 }
530}
531
532
533UCharFile::~UCharFile() {
534 fclose(fFile);
535}
536
537
538
539UChar UCharFile::get() {
540 UChar c;
541 switch (fEncoding) {
542 case UTF16LE:
543 {
544 int cL, cH;
545 cL = fgetc(fFile);
546 cH = fgetc(fFile);
547 c = cL | (cH << 8);
548 if (cH == EOF) {
549 c = 0;
550 fEof = TRUE;
551 }
552 break;
553 }
554 case UTF16BE:
555 {
556 int cL, cH;
557 cH = fgetc(fFile);
558 cL = fgetc(fFile);
559 c = cL | (cH << 8);
560 if (cL == EOF) {
561 c = 0;
562 fEof = TRUE;
563 }
564 break;
565 }
566 case UTF8:
567 {
568 if (fPending2ndSurrogate != 0) {
569 c = fPending2ndSurrogate;
570 fPending2ndSurrogate = 0;
571 break;
572 }
573
574 int ch = fgetc(fFile); // Note: c and ch are separate cause eof test doesn't work on UChar type.
575 if (ch == EOF) {
576 c = 0;
577 fEof = TRUE;
578 break;
579 }
580
581 if (ch <= 0x7f) {
582 // It's ascii. No further utf-8 conversion.
583 c = ch;
584 break;
585 }
586
587 // Figure out the lenght of the char and read the rest of the bytes
588 // into a temp array.
589 int nBytes;
590 if (ch >= 0xF0) {nBytes=4;}
591 else if (ch >= 0xE0) {nBytes=3;}
592 else if (ch >= 0xC0) {nBytes=2;}
593 else {
594 fprintf(stderr, "not likely utf-8 encoded file %s contains corrupt data at offset %d.\n", fName, ftell(fFile));
595 fError = TRUE;
596 return 0;
597 }
598
599 unsigned char bytes[10];
600 bytes[0] = (unsigned char)ch;
601 int i;
602 for (i=1; i<nBytes; i++) {
603 bytes[i] = fgetc(fFile);
604 if (bytes[i] < 0x80 || bytes[i] >= 0xc0) {
605 fprintf(stderr, "utf-8 encoded file %s contains corrupt data at offset %d. Expected %d bytes, byte %d is invalid. First byte is %02X\n", fName, ftell(fFile), nBytes, i, ch);
606 fError = TRUE;
607 return 0;
608 }
609 }
610
611 // Convert the bytes from the temp array to a Unicode char.
612 i = 0;
613 uint32_t cp;
51004dcb 614 U8_NEXT_UNSAFE(bytes, i, cp);
b75a7d8f
A
615 c = (UChar)cp;
616
617 if (cp >= 0x10000) {
618 // The code point needs to be broken up into a utf-16 surrogate pair.
619 // Process first half this time through the main loop, and
620 // remember the other half for the next time through.
621 UChar utf16Buf[3];
622 i = 0;
623 UTF16_APPEND_CHAR_UNSAFE(utf16Buf, i, cp);
624 fPending2ndSurrogate = utf16Buf[1];
625 c = utf16Buf[0];
626 }
627 break;
628 };
629 }
630 return c;
631}
632
633
634//----------------------------------------------------------------------------------------
635//
636// Main -- process command line, read in and pre-process the test file,
637// call other functions to do the actual tests.
638//
639//----------------------------------------------------------------------------------------
640int main(int argc, const char** argv) {
641 if (ProcessOptions(argc, argv, opts) != TRUE || opt_help || opt_fName == 0) {
642 printf(gUsageString);
643 exit (1);
644 }
645 // Make sure that we've only got one API selected.
646 if (opt_mac || opt_unix || opt_win) opt_icu = FALSE;
647 if (opt_mac || opt_unix) opt_win = FALSE;
648 if (opt_mac) opt_unix = FALSE;
649
650 UErrorCode status = U_ZERO_ERROR;
651
652
653
654 //
655 // Set up a Windows LCID
656 //
657 /*
658 if (opt_langid != 0) {
659 gWinLCID = MAKELCID(opt_langid, SORT_DEFAULT);
660 }
661 else {
662 gWinLCID = uloc_getLCID(opt_locale);
663 }
664 */
665
666 //
667 // Set the UNIX locale
668 //
669 if (opt_unix) {
670 if (setlocale(LC_ALL, opt_locale) == 0) {
671 fprintf(stderr, "setlocale(LC_ALL, %s) failed.\n", opt_locale);
672 exit(-1);
673 }
674 }
675
676 // Read in the input file.
677 // File assumed to be utf-16.
678 // Lines go onto heap buffers. Global index array to line starts is created.
679 // Lines themselves are null terminated.
680 //
681
682 UCharFile f(opt_fName);
683 if (f.error()) {
684 exit(-1);
685 }
686 int32_t fileSize = f.size();
687 const int STARTSIZE = 70000;
688 int32_t bufSize = 0;
689 int32_t charCount = 0;
690 if(fileSize != -1) {
691 text = (UChar *)malloc(fileSize*sizeof(UChar));
692 bufSize = fileSize;
693 } else {
694 text = (UChar *)malloc(STARTSIZE*sizeof(UChar));
695 bufSize = STARTSIZE;
696 }
697 if(text == NULL) {
698 fprintf(stderr, "Allocating buffer failed\n");
699 exit(-1);
700 }
701
702
703 // Read the file, split into lines, and save in memory.
704 // Loop runs once per utf-16 value from the input file,
705 // (The number of bytes read from file per loop iteration depends on external encoding.)
706 for (;;) {
707
708 UChar c = f.get();
709 if(f.eof()) {
710 break;
711 }
712 if (f.error()){
713 exit(-1);
714 }
715 // We now have a good UTF-16 value in c.
716 text[charCount++] = c;
717 if(charCount == bufSize) {
718 text = (UChar *)realloc(text, 2*bufSize*sizeof(UChar));
719 if(text == NULL) {
720 fprintf(stderr, "Reallocating buffer failed\n");
721 exit(-1);
722 }
723 bufSize *= 2;
724 }
725 }
726
727
728 if (opt_terse == FALSE) {
729 printf("file \"%s\", %d charCount code units.\n", opt_fName, charCount);
730 }
731
732 textSize = charCount;
733
734
735
736
737 //
738 // Dump file contents if requested.
739 //
740 if (opt_dump) {
741 // dump file, etc... possibly
742 }
743
744
745 //
746 // We've got the file read into memory. Go do something with it.
747 //
748 int32_t i = 0;
749 for(i = 0; i < opt_passesCount; i++) {
750 if(opt_loopCount != 0) {
751 if(opt_next) {
752 doForwardTest();
753 } else if(opt_isBound) {
754 doIsBoundTest();
755 } else {
756 doForwardTest();
757 }
758 } else if(opt_time != 0) {
759
760 }
761 }
762
763 if(text != NULL) {
764 free(text);
765 }
766 if(brkit != NULL) {
767 delete brkit;
768 }
769
770 return 0;
771}