]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/perf/dicttrieperf/dicttrieperf.cpp
ICU-491.11.2.tar.gz
[apple/icu.git] / icuSources / test / perf / dicttrieperf / dicttrieperf.cpp
1 /*
2 **********************************************************************
3 * Copyright (C) 2010-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * file name: dicttrieperf.cpp
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * created on: 2010dec09
12 * created by: Markus W. Scherer
13 *
14 * Performance test program for dictionary-type tries.
15 *
16 * Usage from within <ICU build tree>/test/perf/dicttrieperf/ :
17 * (Linux)
18 * make
19 * export LD_LIBRARY_PATH=../../../lib:../../../stubdata:../../../tools/ctestfw
20 * ./dicttrieperf --sourcedir <ICU build tree>/data/out/tmp --passes 3 --iterations 1000
21 * or
22 * ./dicttrieperf -f <ICU source tree>/source/data/brkitr/thaidict.txt --passes 3 --iterations 250
23 */
24
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include "unicode/bytestrie.h"
28 #include "unicode/bytestriebuilder.h"
29 #include "unicode/localpointer.h"
30 #include "unicode/ucharstrie.h"
31 #include "unicode/ucharstriebuilder.h"
32 #include "unicode/uperf.h"
33 #include "unicode/utext.h"
34 #include "charstr.h"
35 #include "package.h"
36 #include "toolutil.h"
37 #include "triedict.h"
38 #include "ucbuf.h" // struct ULine
39 #include "uoptions.h"
40 #include "uvectr32.h"
41
42 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
43
44 // Test object.
45 class DictionaryTriePerfTest : public UPerfTest {
46 public:
47 DictionaryTriePerfTest(int32_t argc, const char *argv[], UErrorCode &status)
48 : UPerfTest(argc, argv, NULL, 0, "", status), numTextLines(0) {
49 if(hasFile()) {
50 getLines(status);
51 for(int32_t i=0; i<numLines; ++i) {
52 // Skip comment lines (start with a character below 'A').
53 if(lines[i].name[0]>=0x41) {
54 ++numTextLines;
55 // Remove trailing CR LF.
56 int32_t len=lines[i].len;
57 UChar c;
58 while(len>0 && ((c=lines[i].name[len-1])==0xa || c==0xd)) {
59 --len;
60 }
61 lines[i].len=len;
62 }
63 }
64 }
65 }
66
67 virtual UPerfFunction *runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
68
69 const char *getSourceDir() const { return sourceDir; }
70
71 UBool hasFile() const { return ucharBuf!=NULL; }
72 const ULine *getCachedLines() const { return lines; }
73 int32_t getNumLines() const { return numLines; }
74 int32_t numTextLines; // excluding comment lines
75 };
76
77 // Performance test function object.
78 // Loads icudt46l.dat (or whatever its current versioned filename)
79 // from the -s or --sourcedir path.
80 class PackageLookup : public UPerfFunction {
81 protected:
82 PackageLookup(const DictionaryTriePerfTest &perf) {
83 IcuToolErrorCode errorCode("PackageLookup()");
84 CharString filename(perf.getSourceDir(), errorCode);
85 int32_t filenameLength=filename.length();
86 if(filenameLength>0 && filename[filenameLength-1]!=U_FILE_SEP_CHAR &&
87 filename[filenameLength-1]!=U_FILE_ALT_SEP_CHAR) {
88 filename.append(U_FILE_SEP_CHAR, errorCode);
89 }
90 filename.append(U_ICUDATA_NAME, errorCode);
91 filename.append(".dat", errorCode);
92 pkg.readPackage(filename.data());
93 }
94
95 public:
96 virtual ~PackageLookup() {}
97
98 // virtual void call(UErrorCode* pErrorCode) { ... }
99
100 virtual long getOperationsPerIteration() {
101 return pkg.getItemCount();
102 }
103
104 // virtual long getEventsPerIteration();
105
106 protected:
107 Package pkg;
108 };
109
110 struct TOCEntry {
111 int32_t nameOffset, dataOffset;
112 };
113
114 // Similar to ICU 4.6 offsetTOCLookupFn() (in ucmndata.c).
115 static int32_t simpleBinarySearch(const char *s, const char *names, const TOCEntry *toc, int32_t count) {
116 int32_t start=0;
117 int32_t limit=count;
118 int32_t lastNumber=limit;
119 for(;;) {
120 int32_t number=(start+limit)/2;
121 if(lastNumber==number) { // have we moved?
122 return -1; // not found
123 }
124 lastNumber=number;
125 int32_t cmp=strcmp(s, names+toc[number].nameOffset);
126 if(cmp<0) {
127 limit=number;
128 } else if(cmp>0) {
129 start=number;
130 } else { // found s
131 return number;
132 }
133 }
134 }
135
136 class BinarySearchPackageLookup : public PackageLookup {
137 public:
138 BinarySearchPackageLookup(const DictionaryTriePerfTest &perf)
139 : PackageLookup(perf) {
140 IcuToolErrorCode errorCode("BinarySearchPackageLookup()");
141 int32_t count=pkg.getItemCount();
142 toc=new TOCEntry[count];
143 for(int32_t i=0; i<count; ++i) {
144 toc[i].nameOffset=itemNames.length();
145 toc[i].dataOffset=i; // arbitrary value, see toc comment below
146 // The Package class removes the "icudt46l/" prefix.
147 // We restore that here for a fair performance test.
148 const char *name=pkg.getItem(i)->name;
149 itemNames.append("icudt46l/", errorCode);
150 itemNames.append(name, strlen(name)+1, errorCode);
151 }
152 printf("size of item names: %6ld\n", (long)itemNames.length());
153 printf("size of TOC: %6ld\n", (long)(count*8));
154 printf("total index size: %6ld\n", (long)(itemNames.length()+count*8));
155 }
156 virtual ~BinarySearchPackageLookup() {
157 delete[] toc;
158 }
159
160 virtual void call(UErrorCode * /*pErrorCode*/) {
161 int32_t count=pkg.getItemCount();
162 const char *itemNameChars=itemNames.data();
163 const char *name=itemNameChars;
164 for(int32_t i=0; i<count; ++i) {
165 if(simpleBinarySearch(name, itemNameChars, toc, count)<0) {
166 fprintf(stderr, "item not found: %s\n", name);
167 }
168 name=strchr(name, 0)+1;
169 }
170 }
171
172 protected:
173 CharString itemNames;
174 // toc imitates a .dat file's array of UDataOffsetTOCEntry
175 // with nameOffset and dataOffset.
176 // We don't need the dataOffsets, but we want to imitate the real
177 // memory density, to measure equivalent CPU cache usage.
178 TOCEntry *toc;
179 };
180
181 #ifndef MIN
182 #define MIN(a,b) (((a)<(b)) ? (a) : (b))
183 #endif
184
185 // Compare strings where we know the shared prefix length,
186 // and advance the prefix length as we find that the strings share even more characters.
187 static int32_t strcmpAfterPrefix(const char *s1, const char *s2, int32_t &prefixLength) {
188 int32_t pl=prefixLength;
189 s1+=pl;
190 s2+=pl;
191 int32_t cmp=0;
192 for(;;) {
193 int32_t c1=(uint8_t)*s1++;
194 int32_t c2=(uint8_t)*s2++;
195 cmp=c1-c2;
196 if(cmp!=0 || c1==0) { // different or done
197 break;
198 }
199 ++pl; // increment shared same-prefix length
200 }
201 prefixLength=pl;
202 return cmp;
203 }
204
205 static int32_t prefixBinarySearch(const char *s, const char *names, const TOCEntry *toc, int32_t count) {
206 if(count==0) {
207 return -1;
208 }
209 int32_t start=0;
210 int32_t limit=count;
211 // Remember the shared prefix between s, start and limit,
212 // and don't compare that shared prefix again.
213 // The shared prefix should get longer as we narrow the [start, limit[ range.
214 int32_t startPrefixLength=0;
215 int32_t limitPrefixLength=0;
216 // Prime the prefix lengths so that we don't keep prefixLength at 0 until
217 // both the start and limit indexes have moved.
218 // At the same time, we find if s is one of the start and (limit-1) names,
219 // and if not, exclude them from the actual binary search.
220 if(0==strcmpAfterPrefix(s, names+toc[0].nameOffset, startPrefixLength)) {
221 return 0;
222 }
223 ++start;
224 --limit;
225 if(0==strcmpAfterPrefix(s, names+toc[limit].nameOffset, limitPrefixLength)) {
226 return limit;
227 }
228 while(start<limit) {
229 int32_t i=(start+limit)/2;
230 int32_t prefixLength=MIN(startPrefixLength, limitPrefixLength);
231 int32_t cmp=strcmpAfterPrefix(s, names+toc[i].nameOffset, prefixLength);
232 if(cmp<0) {
233 limit=i;
234 limitPrefixLength=prefixLength;
235 } else if(cmp==0) {
236 return i;
237 } else {
238 start=i+1;
239 startPrefixLength=prefixLength;
240 }
241 }
242 return -1;
243 }
244
245 class PrefixBinarySearchPackageLookup : public BinarySearchPackageLookup {
246 public:
247 PrefixBinarySearchPackageLookup(const DictionaryTriePerfTest &perf)
248 : BinarySearchPackageLookup(perf) {}
249
250 virtual void call(UErrorCode * /*pErrorCode*/) {
251 int32_t count=pkg.getItemCount();
252 const char *itemNameChars=itemNames.data();
253 const char *name=itemNameChars;
254 for(int32_t i=0; i<count; ++i) {
255 if(prefixBinarySearch(name, itemNameChars, toc, count)<0) {
256 fprintf(stderr, "item not found: %s\n", name);
257 }
258 name=strchr(name, 0)+1;
259 }
260 }
261 };
262
263 static int32_t bytesTrieLookup(const char *s, const char *nameTrieBytes) {
264 BytesTrie trie(nameTrieBytes);
265 if(USTRINGTRIE_HAS_VALUE(trie.next(s, -1))) {
266 return trie.getValue();
267 } else {
268 return -1;
269 }
270 }
271
272 class BytesTriePackageLookup : public PackageLookup {
273 public:
274 BytesTriePackageLookup(const DictionaryTriePerfTest &perf)
275 : PackageLookup(perf) {
276 IcuToolErrorCode errorCode("BinarySearchPackageLookup()");
277 builder=new BytesTrieBuilder(errorCode);
278 int32_t count=pkg.getItemCount();
279 for(int32_t i=0; i<count; ++i) {
280 // The Package class removes the "icudt46l/" prefix.
281 // We restore that here for a fair performance test.
282 // We store all full names so that we do not have to reconstruct them
283 // in the call() function.
284 const char *name=pkg.getItem(i)->name;
285 int32_t offset=itemNames.length();
286 itemNames.append("icudt46l/", errorCode);
287 itemNames.append(name, -1, errorCode);
288 // As value, set the data item index.
289 // In a real implementation, we would use that to get the
290 // start and limit offset of the data item.
291 StringPiece fullName(itemNames.toStringPiece());
292 fullName.remove_prefix(offset);
293 builder->add(fullName, i, errorCode);
294 // NUL-terminate the name for call() to find the next one.
295 itemNames.append(0, errorCode);
296 }
297 int32_t length=builder->buildStringPiece(USTRINGTRIE_BUILD_SMALL, errorCode).length();
298 printf("size of BytesTrie: %6ld\n", (long)length);
299 // count+1: +1 for the last-item limit offset which we should have always had
300 printf("size of dataOffsets:%6ld\n", (long)((count+1)*4));
301 printf("total index size: %6ld\n", (long)(length+(count+1)*4));
302 }
303 virtual ~BytesTriePackageLookup() {
304 delete builder;
305 }
306
307 virtual void call(UErrorCode *pErrorCode) {
308 int32_t count=pkg.getItemCount();
309 const char *nameTrieBytes=builder->buildStringPiece(USTRINGTRIE_BUILD_SMALL, *pErrorCode).data();
310 const char *name=itemNames.data();
311 for(int32_t i=0; i<count; ++i) {
312 if(bytesTrieLookup(name, nameTrieBytes)<0) {
313 fprintf(stderr, "item not found: %s\n", name);
314 }
315 name=strchr(name, 0)+1;
316 }
317 }
318
319 protected:
320 BytesTrieBuilder *builder;
321 CharString itemNames;
322 };
323
324 // Performance test function object.
325 // Each subclass loads a dictionary text file
326 // from the -s or --sourcedir path plus -f or --file-name.
327 // For example, <ICU source dir>/source/data/brkitr/thaidict.txt.
328 class DictLookup : public UPerfFunction {
329 public:
330 DictLookup(const DictionaryTriePerfTest &perfTest) : perf(perfTest) {}
331
332 virtual long getOperationsPerIteration() {
333 return perf.numTextLines;
334 }
335
336 protected:
337 const DictionaryTriePerfTest &perf;
338 };
339
340 class CompactTrieDictLookup : public DictLookup {
341 public:
342 CompactTrieDictLookup(const DictionaryTriePerfTest &perfTest)
343 : DictLookup(perfTest), ctd(NULL) {
344 IcuToolErrorCode errorCode("UCharsTrieDictLookup()");
345 // U+0E1C is the median code unit, from
346 // the UCharsTrie root node (split-branch node) for thaidict.txt.
347 MutableTrieDictionary builder(0xe1c, errorCode);
348 const ULine *lines=perf.getCachedLines();
349 int32_t numLines=perf.getNumLines();
350 for(int32_t i=0; i<numLines; ++i) {
351 // Skip comment lines (start with a character below 'A').
352 if(lines[i].name[0]<0x41) {
353 continue;
354 }
355 builder.addWord(lines[i].name, lines[i].len, errorCode);
356 }
357 ctd=new CompactTrieDictionary(builder, errorCode);
358 int32_t length=(int32_t)ctd->dataSize();
359 printf("size of CompactTrieDict: %6ld bytes\n", (long)length);
360 }
361
362 virtual ~CompactTrieDictLookup() {
363 delete ctd;
364 }
365
366 virtual void call(UErrorCode *pErrorCode) {
367 UText text=UTEXT_INITIALIZER;
368 int32_t lengths[20];
369 const ULine *lines=perf.getCachedLines();
370 int32_t numLines=perf.getNumLines();
371 for(int32_t i=0; i<numLines; ++i) {
372 // Skip comment lines (start with a character below 'A').
373 if(lines[i].name[0]<0x41) {
374 continue;
375 }
376 utext_openUChars(&text, lines[i].name, lines[i].len, pErrorCode);
377 int32_t count;
378 ctd->matches(&text, lines[i].len,
379 lengths, count, LENGTHOF(lengths));
380 if(count==0 || lengths[count-1]!=lines[i].len) {
381 fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
382 }
383 }
384 }
385
386 protected:
387 CompactTrieDictionary *ctd;
388 };
389
390 // Closely imitate CompactTrieDictionary::matches().
391 // Note: CompactTrieDictionary::matches() is part of its trie implementation,
392 // and while it loops over the text, it knows the current state.
393 // By contrast, this implementation uses UCharsTrie API functions that have to
394 // check the trie state each time and load/store state in the object.
395 // (Whether it hasNext() and whether it is in the middle of a linear-match node.)
396 static int32_t
397 ucharsTrieMatches(UCharsTrie &trie,
398 UText *text, int32_t textLimit,
399 int32_t *lengths, int &count, int limit ) {
400 UChar32 c=utext_next32(text);
401 // Notes:
402 // a) CompactTrieDictionary::matches() does not check for U_SENTINEL.
403 // b) It also ignores non-BMP code points by casting to UChar!
404 if(c<0) {
405 return 0;
406 }
407 // Should be firstForCodePoint() but CompactTrieDictionary
408 // handles only code units.
409 UStringTrieResult result=trie.first(c);
410 int32_t numChars=1;
411 count=0;
412 for(;;) {
413 if(USTRINGTRIE_HAS_VALUE(result)) {
414 if(count<limit) {
415 // lengths[count++]=(int32_t)utext_getNativeIndex(text);
416 lengths[count++]=numChars; // CompactTrieDictionary just counts chars too.
417 }
418 if(result==USTRINGTRIE_FINAL_VALUE) {
419 break;
420 }
421 } else if(result==USTRINGTRIE_NO_MATCH) {
422 break;
423 }
424 if(numChars>=textLimit) {
425 // Note: Why do we have both a text limit and a UText that knows its length?
426 break;
427 }
428 UChar32 c=utext_next32(text);
429 // Notes:
430 // a) CompactTrieDictionary::matches() does not check for U_SENTINEL.
431 // b) It also ignores non-BMP code points by casting to UChar!
432 if(c<0) {
433 break;
434 }
435 ++numChars;
436 // Should be nextForCodePoint() but CompactTrieDictionary
437 // handles only code units.
438 result=trie.next(c);
439 }
440 #if 0
441 // Note: CompactTrieDictionary::matches() comments say that it leaves the UText
442 // after the longest prefix match and returns the number of characters
443 // that were matched.
444 if(index!=lastMatch) {
445 utext_setNativeIndex(text, lastMatch);
446 }
447 return lastMatch-start;
448 // However, it does not do either of these, so I am not trying to
449 // imitate it (or its docs) 100%.
450 #endif
451 return numChars;
452 }
453
454 class UCharsTrieDictLookup : public DictLookup {
455 public:
456 UCharsTrieDictLookup(const DictionaryTriePerfTest &perfTest)
457 : DictLookup(perfTest), trie(NULL) {
458 IcuToolErrorCode errorCode("UCharsTrieDictLookup()");
459 builder=new UCharsTrieBuilder(errorCode);
460 const ULine *lines=perf.getCachedLines();
461 int32_t numLines=perf.getNumLines();
462 for(int32_t i=0; i<numLines; ++i) {
463 // Skip comment lines (start with a character below 'A').
464 if(lines[i].name[0]<0x41) {
465 continue;
466 }
467 builder->add(UnicodeString(FALSE, lines[i].name, lines[i].len), 0, errorCode);
468 }
469 UnicodeString trieUChars;
470 int32_t length=builder->buildUnicodeString(USTRINGTRIE_BUILD_SMALL, trieUChars, errorCode).length();
471 printf("size of UCharsTrie: %6ld bytes\n", (long)length*2);
472 trie=builder->build(USTRINGTRIE_BUILD_SMALL, errorCode);
473 }
474
475 virtual ~UCharsTrieDictLookup() {
476 delete builder;
477 delete trie;
478 }
479
480 protected:
481 UCharsTrieBuilder *builder;
482 UCharsTrie *trie;
483 };
484
485 class UCharsTrieDictMatches : public UCharsTrieDictLookup {
486 public:
487 UCharsTrieDictMatches(const DictionaryTriePerfTest &perfTest)
488 : UCharsTrieDictLookup(perfTest) {}
489
490 virtual void call(UErrorCode *pErrorCode) {
491 UText text=UTEXT_INITIALIZER;
492 int32_t lengths[20];
493 const ULine *lines=perf.getCachedLines();
494 int32_t numLines=perf.getNumLines();
495 for(int32_t i=0; i<numLines; ++i) {
496 // Skip comment lines (start with a character below 'A').
497 if(lines[i].name[0]<0x41) {
498 continue;
499 }
500 utext_openUChars(&text, lines[i].name, lines[i].len, pErrorCode);
501 int32_t count=0;
502 ucharsTrieMatches(*trie, &text, lines[i].len,
503 lengths, count, LENGTHOF(lengths));
504 if(count==0 || lengths[count-1]!=lines[i].len) {
505 fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
506 }
507 }
508 }
509 };
510
511 class UCharsTrieDictContains : public UCharsTrieDictLookup {
512 public:
513 UCharsTrieDictContains(const DictionaryTriePerfTest &perfTest)
514 : UCharsTrieDictLookup(perfTest) {}
515
516 virtual void call(UErrorCode * /*pErrorCode*/) {
517 const ULine *lines=perf.getCachedLines();
518 int32_t numLines=perf.getNumLines();
519 for(int32_t i=0; i<numLines; ++i) {
520 // Skip comment lines (which start with a character below 'A').
521 if(lines[i].name[0]<0x41) {
522 continue;
523 }
524 if(!USTRINGTRIE_HAS_VALUE(trie->reset().next(lines[i].name, lines[i].len))) {
525 fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
526 }
527 }
528 }
529 };
530
531 static inline int32_t thaiCharToByte(UChar32 c) {
532 if(0xe00<=c && c<=0xefe) {
533 return c&0xff;
534 } else if(c==0x2e) {
535 return 0xff;
536 } else {
537 return -1;
538 }
539 }
540
541 static UBool thaiWordToBytes(const UChar *s, int32_t length,
542 CharString &str, UErrorCode &errorCode) {
543 for(int32_t i=0; i<length; ++i) {
544 UChar c=s[i];
545 int32_t b=thaiCharToByte(c);
546 if(b>=0) {
547 str.append((char)b, errorCode);
548 } else {
549 fprintf(stderr, "thaiWordToBytes(): unable to encode U+%04X as a byte\n", c);
550 return FALSE;
551 }
552 }
553 return TRUE;
554 }
555
556 class BytesTrieDictLookup : public DictLookup {
557 public:
558 BytesTrieDictLookup(const DictionaryTriePerfTest &perfTest)
559 : DictLookup(perfTest), trie(NULL), noDict(FALSE) {
560 IcuToolErrorCode errorCode("BytesTrieDictLookup()");
561 builder=new BytesTrieBuilder(errorCode);
562 CharString str;
563 const ULine *lines=perf.getCachedLines();
564 int32_t numLines=perf.getNumLines();
565 for(int32_t i=0; i<numLines; ++i) {
566 // Skip comment lines (start with a character below 'A').
567 if(lines[i].name[0]<0x41) {
568 continue;
569 }
570 if(!thaiWordToBytes(lines[i].name, lines[i].len, str.clear(), errorCode)) {
571 fprintf(stderr, "thaiWordToBytes(): failed for word %ld (0-based)\n", (long)i);
572 noDict=TRUE;
573 break;
574 }
575 builder->add(str.toStringPiece(), 0, errorCode);
576 }
577 if(!noDict) {
578 int32_t length=builder->buildStringPiece(USTRINGTRIE_BUILD_SMALL, errorCode).length();
579 printf("size of BytesTrie: %6ld bytes\n", (long)length);
580 trie=builder->build(USTRINGTRIE_BUILD_SMALL, errorCode);
581 }
582 }
583
584 virtual ~BytesTrieDictLookup() {
585 delete builder;
586 delete trie;
587 }
588
589 protected:
590 BytesTrieBuilder *builder;
591 BytesTrie *trie;
592 UBool noDict;
593 };
594
595 static int32_t
596 bytesTrieMatches(BytesTrie &trie,
597 UText *text, int32_t textLimit,
598 int32_t *lengths, int &count, int limit ) {
599 UChar32 c=utext_next32(text);
600 if(c<0) {
601 return 0;
602 }
603 UStringTrieResult result=trie.first(thaiCharToByte(c));
604 int32_t numChars=1;
605 count=0;
606 for(;;) {
607 if(USTRINGTRIE_HAS_VALUE(result)) {
608 if(count<limit) {
609 // lengths[count++]=(int32_t)utext_getNativeIndex(text);
610 lengths[count++]=numChars; // CompactTrieDictionary just counts chars too.
611 }
612 if(result==USTRINGTRIE_FINAL_VALUE) {
613 break;
614 }
615 } else if(result==USTRINGTRIE_NO_MATCH) {
616 break;
617 }
618 if(numChars>=textLimit) {
619 break;
620 }
621 UChar32 c=utext_next32(text);
622 if(c<0) {
623 break;
624 }
625 ++numChars;
626 result=trie.next(thaiCharToByte(c));
627 }
628 return numChars;
629 }
630
631 class BytesTrieDictMatches : public BytesTrieDictLookup {
632 public:
633 BytesTrieDictMatches(const DictionaryTriePerfTest &perfTest)
634 : BytesTrieDictLookup(perfTest) {}
635
636 virtual void call(UErrorCode *pErrorCode) {
637 if(noDict) {
638 return;
639 }
640 UText text=UTEXT_INITIALIZER;
641 int32_t lengths[20];
642 const ULine *lines=perf.getCachedLines();
643 int32_t numLines=perf.getNumLines();
644 for(int32_t i=0; i<numLines; ++i) {
645 // Skip comment lines (start with a character below 'A').
646 if(lines[i].name[0]<0x41) {
647 continue;
648 }
649 utext_openUChars(&text, lines[i].name, lines[i].len, pErrorCode);
650 int32_t count=0;
651 bytesTrieMatches(*trie, &text, lines[i].len,
652 lengths, count, LENGTHOF(lengths));
653 if(count==0 || lengths[count-1]!=lines[i].len) {
654 fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
655 }
656 }
657 }
658 };
659
660 class BytesTrieDictContains : public BytesTrieDictLookup {
661 public:
662 BytesTrieDictContains(const DictionaryTriePerfTest &perfTest)
663 : BytesTrieDictLookup(perfTest) {}
664
665 virtual void call(UErrorCode * /*pErrorCode*/) {
666 if(noDict) {
667 return;
668 }
669 const ULine *lines=perf.getCachedLines();
670 int32_t numLines=perf.getNumLines();
671 for(int32_t i=0; i<numLines; ++i) {
672 const UChar *line=lines[i].name;
673 // Skip comment lines (start with a character below 'A').
674 if(line[0]<0x41) {
675 continue;
676 }
677 UStringTrieResult result=trie->first(thaiCharToByte(line[0]));
678 int32_t lineLength=lines[i].len;
679 for(int32_t j=1; j<lineLength; ++j) {
680 if(!USTRINGTRIE_HAS_NEXT(result)) {
681 fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
682 break;
683 }
684 result=trie->next(thaiCharToByte(line[j]));
685 }
686 if(!USTRINGTRIE_HAS_VALUE(result)) {
687 fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
688 }
689 }
690 }
691 };
692
693 UPerfFunction *DictionaryTriePerfTest::runIndexedTest(int32_t index, UBool exec,
694 const char *&name, char * /*par*/) {
695 if(hasFile()) {
696 switch(index) {
697 case 0:
698 name="compacttriematches";
699 if(exec) {
700 return new CompactTrieDictLookup(*this);
701 }
702 break;
703 case 1:
704 name="ucharstriematches";
705 if(exec) {
706 return new UCharsTrieDictMatches(*this);
707 }
708 break;
709 case 2:
710 name="ucharstriecontains";
711 if(exec) {
712 return new UCharsTrieDictContains(*this);
713 }
714 break;
715 case 3:
716 name="bytestriematches";
717 if(exec) {
718 return new BytesTrieDictMatches(*this);
719 }
720 break;
721 case 4:
722 name="bytestriecontains";
723 if(exec) {
724 return new BytesTrieDictContains(*this);
725 }
726 break;
727 default:
728 name="";
729 break;
730 }
731 } else {
732 if(index==0 && exec) {
733 puts("Running BytesTrie perf tests on the .dat package file from the --sourcedir.\n"
734 "For UCharsTrie perf tests on a dictionary text file, specify the -f or --file-name.\n");
735 }
736 switch(index) {
737 case 0:
738 name="simplebinarysearch";
739 if(exec) {
740 return new BinarySearchPackageLookup(*this);
741 }
742 break;
743 case 1:
744 name="prefixbinarysearch";
745 if(exec) {
746 return new PrefixBinarySearchPackageLookup(*this);
747 }
748 break;
749 case 2:
750 name="bytestrie";
751 if(exec) {
752 return new BytesTriePackageLookup(*this);
753 }
754 break;
755 default:
756 name="";
757 break;
758 }
759 }
760 return NULL;
761 }
762
763 int main(int argc, const char *argv[]) {
764 IcuToolErrorCode errorCode("dicttrieperf main()");
765 DictionaryTriePerfTest test(argc, argv, errorCode);
766 if(errorCode.isFailure()) {
767 fprintf(stderr, "DictionaryTriePerfTest() failed: %s\n", errorCode.errorName());
768 test.usage();
769 return errorCode.reset();
770 }
771 if(!test.run()) {
772 fprintf(stderr, "FAILED: Tests could not be run, please check the arguments.\n");
773 return -1;
774 }
775 return 0;
776 }