2 *******************************************************************************
3 * Copyright (C) 2010-2012, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: ucharstrietest.cpp
8 * tab size: 8 (not used)
11 * created on: 2010nov16
12 * created by: Markus W. Scherer
17 #include "unicode/utypes.h"
18 #include "unicode/appendable.h"
19 #include "unicode/localpointer.h"
20 #include "unicode/ucharstrie.h"
21 #include "unicode/ucharstriebuilder.h"
22 #include "unicode/uniset.h"
23 #include "unicode/unistr.h"
26 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
28 struct StringAndValue
{
33 class UCharsTrieTest
: public IntlTest
{
36 virtual ~UCharsTrieTest();
38 void runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char *par
=NULL
);
43 void TestShortestBranch();
45 void TestLongSequence();
46 void TestLongBranch();
47 void TestValuesForState();
49 void TestFirstForCodePoint();
50 void TestNextForCodePoint();
52 UCharsTrie
*buildLargeTrie(int32_t numUniqueFirst
);
55 UCharsTrie
*buildMonthsTrie(UStringTrieBuildOption buildOption
);
56 void TestHasUniqueValue();
57 void TestGetNextUChars();
58 void TestIteratorFromBranch();
59 void TestIteratorFromLinearMatch();
60 void TestTruncatingIteratorFromRoot();
61 void TestTruncatingIteratorFromLinearMatchShort();
62 void TestTruncatingIteratorFromLinearMatchLong();
63 void TestIteratorFromUChars();
65 void checkData(const StringAndValue data
[], int32_t dataLength
);
66 void checkData(const StringAndValue data
[], int32_t dataLength
, UStringTrieBuildOption buildOption
);
67 UCharsTrie
*buildTrie(const StringAndValue data
[], int32_t dataLength
,
68 UStringTrieBuildOption buildOption
);
69 void checkFirst(UCharsTrie
&trie
, const StringAndValue data
[], int32_t dataLength
);
70 void checkNext(UCharsTrie
&trie
, const StringAndValue data
[], int32_t dataLength
);
71 void checkNextWithState(UCharsTrie
&trie
, const StringAndValue data
[], int32_t dataLength
);
72 void checkNextString(UCharsTrie
&trie
, const StringAndValue data
[], int32_t dataLength
);
73 void checkIterator(UCharsTrie
&trie
, const StringAndValue data
[], int32_t dataLength
);
74 void checkIterator(UCharsTrie::Iterator
&iter
, const StringAndValue data
[], int32_t dataLength
);
77 UCharsTrieBuilder
*builder_
;
80 extern IntlTest
*createUCharsTrieTest() {
81 return new UCharsTrieTest();
84 UCharsTrieTest::UCharsTrieTest() : builder_(NULL
) {
85 IcuTestErrorCode
errorCode(*this, "UCharsTrieTest()");
86 builder_
=new UCharsTrieBuilder(errorCode
);
89 UCharsTrieTest::~UCharsTrieTest() {
93 void UCharsTrieTest::runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char * /*par*/) {
95 logln("TestSuite UCharsTrieTest: ");
98 TESTCASE_AUTO(TestBuilder
);
99 TESTCASE_AUTO(TestEmpty
);
100 TESTCASE_AUTO(Test_a
);
101 TESTCASE_AUTO(Test_a_ab
);
102 TESTCASE_AUTO(TestShortestBranch
);
103 TESTCASE_AUTO(TestBranches
);
104 TESTCASE_AUTO(TestLongSequence
);
105 TESTCASE_AUTO(TestLongBranch
);
106 TESTCASE_AUTO(TestValuesForState
);
107 TESTCASE_AUTO(TestCompact
);
108 TESTCASE_AUTO(TestFirstForCodePoint
);
109 TESTCASE_AUTO(TestNextForCodePoint
);
110 TESTCASE_AUTO(TestLargeTrie
);
111 TESTCASE_AUTO(TestHasUniqueValue
);
112 TESTCASE_AUTO(TestGetNextUChars
);
113 TESTCASE_AUTO(TestIteratorFromBranch
);
114 TESTCASE_AUTO(TestIteratorFromLinearMatch
);
115 TESTCASE_AUTO(TestTruncatingIteratorFromRoot
);
116 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort
);
117 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong
);
118 TESTCASE_AUTO(TestIteratorFromUChars
);
122 void UCharsTrieTest::TestBuilder() {
123 IcuTestErrorCode
errorCode(*this, "TestBuilder()");
124 delete builder_
->build(USTRINGTRIE_BUILD_FAST
, errorCode
);
125 if(errorCode
.reset()!=U_INDEX_OUTOFBOUNDS_ERROR
) {
126 errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
129 // TODO: remove .build(...) once add() checks for duplicates.
130 builder_
->add("=", 0, errorCode
).add("=", 1, errorCode
).build(USTRINGTRIE_BUILD_FAST
, errorCode
);
131 if(errorCode
.reset()!=U_ILLEGAL_ARGUMENT_ERROR
) {
132 errln("UCharsTrieBuilder.add() did not detect duplicates");
137 void UCharsTrieTest::TestEmpty() {
138 static const StringAndValue data
[]={
141 checkData(data
, LENGTHOF(data
));
144 void UCharsTrieTest::Test_a() {
145 static const StringAndValue data
[]={
148 checkData(data
, LENGTHOF(data
));
151 void UCharsTrieTest::Test_a_ab() {
152 static const StringAndValue data
[]={
156 checkData(data
, LENGTHOF(data
));
159 void UCharsTrieTest::TestShortestBranch() {
160 static const StringAndValue data
[]={
164 checkData(data
, LENGTHOF(data
));
167 void UCharsTrieTest::TestBranches() {
168 static const StringAndValue data
[]={
176 { "ppppp", 0x40000 },
181 { "vv", 0x7fffffff },
182 { "zz", (int32_t)0x80000000 }
184 for(int32_t length
=2; length
<=LENGTHOF(data
); ++length
) {
185 infoln("TestBranches length=%d", (int)length
);
186 checkData(data
, length
);
190 void UCharsTrieTest::TestLongSequence() {
191 static const StringAndValue data
[]={
193 // sequence of linear-match nodes
194 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
195 // more than 256 units
196 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
197 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
198 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
199 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
200 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
201 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
203 checkData(data
, LENGTHOF(data
));
206 void UCharsTrieTest::TestLongBranch() {
207 // Split-branch and interesting compact-integer values.
208 static const StringAndValue data
[]={
221 { "l234567890", 0x1b00 },
222 { "l234567890123", 0x1b01 },
223 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
224 { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
225 { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
227 { "s2345", 0x4444444 },
228 { "t234567890", 0x77777777 },
229 { "z", (int32_t)0x80000001 }
231 checkData(data
, LENGTHOF(data
));
234 void UCharsTrieTest::TestValuesForState() {
235 // Check that saveState() and resetToState() interact properly
236 // with next() and current().
237 static const StringAndValue data
[]={
245 checkData(data
, LENGTHOF(data
));
248 void UCharsTrieTest::TestCompact() {
249 // Duplicate trailing strings and values provide opportunities for compacting.
250 static const StringAndValue data
[]={
267 // The l+n branch (with its sub-nodes) is a duplicate but will be written
268 // both times because each time it follows a different linear-match node.
272 checkData(data
, LENGTHOF(data
));
275 void UCharsTrieTest::TestFirstForCodePoint() {
276 static const StringAndValue data
[]={
279 { "a\\U00010000", 3 },
281 { "\\U00020000\\udbff", 5 },
282 { "\\U00020000\\U0010ffff", 6 },
283 { "\\U00020000\\U0010ffffz", 7 },
284 { "\\U00050000xy", 8 },
285 { "\\U00050000xyz", 9 }
287 checkData(data
, LENGTHOF(data
));
290 void UCharsTrieTest::TestNextForCodePoint() {
291 static const StringAndValue data
[]={
292 { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 },
293 { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 },
294 { "\\u4dff\\U000103ff", 99999 }
296 LocalPointer
<UCharsTrie
> trie(buildTrie(data
, LENGTHOF(data
), USTRINGTRIE_BUILD_FAST
));
298 return; // buildTrie() reported an error
300 UStringTrieResult result
;
301 if( (result
=trie
->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
302 (result
=trie
->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
303 (result
=trie
->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
304 (result
=trie
->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
305 (result
=trie
->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
306 (result
=trie
->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE
|| result
!=trie
->current() ||
307 trie
->getValue()!=2000000000
309 errln("UCharsTrie.nextForCodePoint() fails for %s", data
[0].s
);
311 if( (result
=trie
->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
312 (result
=trie
->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
313 (result
=trie
->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
314 (result
=trie
->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE
|| result
!=trie
->current() ||
315 trie
->getValue()!=44444
317 errln("UCharsTrie.nextForCodePoint() fails for %s", data
[1].s
);
319 if( (result
=trie
->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
320 (result
=trie
->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
321 (result
=trie
->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
322 (result
=trie
->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH
|| result
!=trie
->current() // no match for trail surrogate
324 errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222");
326 if( (result
=trie
->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
327 (result
=trie
->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE
|| result
!=trie
->current() ||
328 trie
->getValue()!=99999
330 errln("UCharsTrie.nextForCodePoint() fails for %s", data
[2].s
);
334 // Definitions in the anonymous namespace are invisible outside this file.
337 // Generate (string, value) pairs.
338 // The first string (before next()) will be empty.
341 Generator() : value(4711), num(0) {}
345 s
.append(c
=(UChar
)(value
>>16));
346 s
.append((UChar
)(value
>>4));
348 s
.append((UChar
)value
);
351 value
+=((value
>>5)&0x7ff)*3+1;
354 const UnicodeString
&getString() const { return s
; }
355 int32_t getValue() const { return value
; }
356 int32_t countUniqueFirstChars() const { return set
.size(); }
357 int32_t getIndex() const { return num
; }
368 UCharsTrie
*UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst
) {
369 IcuTestErrorCode
errorCode(*this, "buildLargeTrie()");
372 while(gen
.countUniqueFirstChars()<numUniqueFirst
) {
373 builder_
->add(gen
.getString(), gen
.getValue(), errorCode
);
376 infoln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst
, (long)gen
.getIndex());
377 UnicodeString trieUChars
;
378 builder_
->buildUnicodeString(USTRINGTRIE_BUILD_FAST
, trieUChars
, errorCode
);
379 logln("serialized trie size: %ld UChars\n", (long)trieUChars
.length());
380 return new UCharsTrie(trieUChars
.getBuffer());
383 // Exercise a large branch node.
384 void UCharsTrieTest::TestLargeTrie() {
385 LocalPointer
<UCharsTrie
> trie(buildLargeTrie(1111));
387 return; // buildTrie() reported an error
390 while(gen
.countUniqueFirstChars()<1111) {
391 UnicodeString
x(gen
.getString());
392 int32_t value
=gen
.getValue();
394 if(trie
->first(x
[0])==USTRINGTRIE_NO_MATCH
) {
395 errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n",
396 x
[0], (long)gen
.getIndex());
401 UStringTrieResult result
=trie
->next(x
.getBuffer(), x
.length());
402 if(!USTRINGTRIE_HAS_VALUE(result
) || result
!=trie
->current() || value
!=trie
->getValue()) {
403 errln("next(%d chars U+%04X U+%04X)!=hasValue or "
404 "next()!=current() or getValue() wrong "
405 "for string %ld\n", (int)x
.length(), x
[0], x
[1], (long)gen
.getIndex());
423 UCharsTrie
*UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption
) {
424 // All types of nodes leading to the same value,
425 // for code coverage of recursive functions.
426 // In particular, we need a lot of branches on some single level
427 // to exercise a split-branch node.
428 static const StringAndValue data
[]={
448 { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
460 return buildTrie(data
, LENGTHOF(data
), buildOption
);
463 void UCharsTrieTest::TestHasUniqueValue() {
464 LocalPointer
<UCharsTrie
> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST
));
466 return; // buildTrie() reported an error
469 if(trie
->hasUniqueValue(uniqueValue
)) {
470 errln("unique value at root");
475 // hasUniqueValue() directly after next()
476 if(!trie
->hasUniqueValue(uniqueValue
) || uniqueValue
!=1) {
477 errln("not unique value 1 after \"jan\"");
481 if(trie
->hasUniqueValue(uniqueValue
)) {
482 errln("unique value after \"ju\"");
484 if(trie
->next(u_n
)!=USTRINGTRIE_INTERMEDIATE_VALUE
|| 6!=trie
->getValue()) {
485 errln("not normal value 6 after \"jun\"");
487 // hasUniqueValue() after getValue()
488 if(!trie
->hasUniqueValue(uniqueValue
) || uniqueValue
!=6) {
489 errln("not unique value 6 after \"jun\"");
491 // hasUniqueValue() from within a linear-match node
494 if(!trie
->hasUniqueValue(uniqueValue
) || uniqueValue
!=8) {
495 errln("not unique value 8 after \"au\"");
499 void UCharsTrieTest::TestGetNextUChars() {
500 LocalPointer
<UCharsTrie
> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL
));
502 return; // buildTrie() reported an error
504 UnicodeString buffer
;
505 UnicodeStringAppendable
app(buffer
);
506 int32_t count
=trie
->getNextUChars(app
);
507 if(count
!=2 || buffer
.length()!=2 || buffer
[0]!=u_a
|| buffer
[1]!=u_j
) {
508 errln("months getNextUChars()!=[aj] at root");
513 // getNextUChars() directly after next()
515 count
=trie
->getNextUChars(app
);
516 if(count
!=20 || buffer
!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
517 errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"");
519 // getNextUChars() after getValue()
520 trie
->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
522 count
=trie
->getNextUChars(app
);
523 if(count
!=20 || buffer
!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
524 errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
526 // getNextUChars() from a linear-match node
529 count
=trie
->getNextUChars(app
);
530 if(count
!=1 || buffer
.length()!=1 || buffer
[0]!=u_a
) {
531 errln("months getNextUChars()!=[a] after \"janu\"");
535 count
=trie
->getNextUChars(app
);
536 if(count
!=1 || buffer
.length()!=1 || buffer
[0]!=u_r
) {
537 errln("months getNextUChars()!=[r] after \"janua\"");
541 // getNextUChars() after a final match
543 count
=trie
->getNextUChars(app
);
544 if(count
!=0 || buffer
.length()!=0) {
545 errln("months getNextUChars()!=[] after \"january\"");
549 void UCharsTrieTest::TestIteratorFromBranch() {
550 LocalPointer
<UCharsTrie
> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST
));
552 return; // buildTrie() reported an error
554 // Go to a branch node.
558 IcuTestErrorCode
errorCode(*this, "TestIteratorFromBranch()");
559 UCharsTrie::Iterator
iter(*trie
, 0, errorCode
);
560 if(errorCode
.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
563 // Expected data: Same as in buildMonthsTrie(), except only the suffixes
565 static const StringAndValue data
[]={
584 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
592 checkIterator(iter
, data
, LENGTHOF(data
));
593 // Reset, and we should get the same result.
594 logln("after iter.reset()");
595 checkIterator(iter
.reset(), data
, LENGTHOF(data
));
598 void UCharsTrieTest::TestIteratorFromLinearMatch() {
599 LocalPointer
<UCharsTrie
> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL
));
601 return; // buildTrie() reported an error
603 // Go into a linear-match node.
609 IcuTestErrorCode
errorCode(*this, "TestIteratorFromLinearMatch()");
610 UCharsTrie::Iterator
iter(*trie
, 0, errorCode
);
611 if(errorCode
.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
614 // Expected data: Same as in buildMonthsTrie(), except only the suffixes
615 // following "janua".
616 static const StringAndValue data
[]={
620 checkIterator(iter
, data
, LENGTHOF(data
));
621 // Reset, and we should get the same result.
622 logln("after iter.reset()");
623 checkIterator(iter
.reset(), data
, LENGTHOF(data
));
626 void UCharsTrieTest::TestTruncatingIteratorFromRoot() {
627 LocalPointer
<UCharsTrie
> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST
));
629 return; // buildTrie() reported an error
631 IcuTestErrorCode
errorCode(*this, "TestTruncatingIteratorFromRoot()");
632 UCharsTrie::Iterator
iter(*trie
, 4, errorCode
);
633 if(errorCode
.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
636 // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
637 // of each string, and no string duplicates from the truncation.
638 static const StringAndValue data
[]={
666 checkIterator(iter
, data
, LENGTHOF(data
));
667 // Reset, and we should get the same result.
668 logln("after iter.reset()");
669 checkIterator(iter
.reset(), data
, LENGTHOF(data
));
672 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
673 static const StringAndValue data
[]={
678 LocalPointer
<UCharsTrie
> trie(buildTrie(data
, LENGTHOF(data
), USTRINGTRIE_BUILD_FAST
));
680 return; // buildTrie() reported an error
682 // Go into a linear-match node.
685 IcuTestErrorCode
errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
686 // Truncate within the linear-match node.
687 UCharsTrie::Iterator
iter(*trie
, 2, errorCode
);
688 if(errorCode
.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
691 static const StringAndValue expected
[]={
694 checkIterator(iter
, expected
, LENGTHOF(expected
));
695 // Reset, and we should get the same result.
696 logln("after iter.reset()");
697 checkIterator(iter
.reset(), expected
, LENGTHOF(expected
));
700 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
701 static const StringAndValue data
[]={
706 LocalPointer
<UCharsTrie
> trie(buildTrie(data
, LENGTHOF(data
), USTRINGTRIE_BUILD_FAST
));
708 return; // buildTrie() reported an error
710 // Go into a linear-match node.
714 IcuTestErrorCode
errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
715 // Truncate after the linear-match node.
716 UCharsTrie::Iterator
iter(*trie
, 3, errorCode
);
717 if(errorCode
.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
720 static const StringAndValue expected
[]={
725 checkIterator(iter
, expected
, LENGTHOF(expected
));
726 // Reset, and we should get the same result.
727 logln("after iter.reset()");
728 checkIterator(iter
.reset(), expected
, LENGTHOF(expected
));
731 void UCharsTrieTest::TestIteratorFromUChars() {
732 static const StringAndValue data
[]={
738 IcuTestErrorCode
errorCode(*this, "TestIteratorFromUChars()");
739 for(int32_t i
=0; i
<LENGTHOF(data
); ++i
) {
740 builder_
->add(data
[i
].s
, data
[i
].value
, errorCode
);
742 UnicodeString trieUChars
;
743 builder_
->buildUnicodeString(USTRINGTRIE_BUILD_FAST
, trieUChars
, errorCode
);
744 UCharsTrie::Iterator
iter(trieUChars
.getBuffer(), 0, errorCode
);
745 checkIterator(iter
, data
, LENGTHOF(data
));
748 void UCharsTrieTest::checkData(const StringAndValue data
[], int32_t dataLength
) {
749 logln("checkData(dataLength=%d, fast)", (int)dataLength
);
750 checkData(data
, dataLength
, USTRINGTRIE_BUILD_FAST
);
751 logln("checkData(dataLength=%d, small)", (int)dataLength
);
752 checkData(data
, dataLength
, USTRINGTRIE_BUILD_SMALL
);
755 void UCharsTrieTest::checkData(const StringAndValue data
[], int32_t dataLength
, UStringTrieBuildOption buildOption
) {
756 LocalPointer
<UCharsTrie
> trie(buildTrie(data
, dataLength
, buildOption
));
758 return; // buildTrie() reported an error
760 checkFirst(*trie
, data
, dataLength
);
761 checkNext(*trie
, data
, dataLength
);
762 checkNextWithState(*trie
, data
, dataLength
);
763 checkNextString(*trie
, data
, dataLength
);
764 checkIterator(*trie
, data
, dataLength
);
767 UCharsTrie
*UCharsTrieTest::buildTrie(const StringAndValue data
[], int32_t dataLength
,
768 UStringTrieBuildOption buildOption
) {
769 IcuTestErrorCode
errorCode(*this, "buildTrie()");
770 // Add the items to the trie builder in an interesting (not trivial, not random) order.
773 // Odd number of items.
776 } else if((dataLength%3
)!=0) {
777 // Not a multiple of 3.
785 for(int32_t i
=0; i
<dataLength
; ++i
) {
786 builder_
->add(UnicodeString(data
[index
].s
, -1, US_INV
).unescape(),
787 data
[index
].value
, errorCode
);
788 index
=(index
+step
)%dataLength
;
790 UnicodeString trieUChars
;
791 builder_
->buildUnicodeString(buildOption
, trieUChars
, errorCode
);
792 LocalPointer
<UCharsTrie
> trie(builder_
->build(buildOption
, errorCode
));
793 if(!errorCode
.logIfFailureAndReset("add()/build()")) {
794 builder_
->add("zzz", 999, errorCode
);
795 if(errorCode
.reset()!=U_NO_WRITE_PERMISSION
) {
796 errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
799 logln("serialized trie size: %ld UChars\n", (long)trieUChars
.length());
800 UnicodeString trieUChars2
;
801 builder_
->buildUnicodeString(buildOption
, trieUChars2
, errorCode
);
802 if(trieUChars
.getBuffer()==trieUChars2
.getBuffer()) {
803 errln("builder.buildUnicodeString() before & after build() returned same array");
805 if(errorCode
.isFailure()) {
808 // Tries from either build() method should be identical but
809 // UCharsTrie does not implement equals().
810 // We just return either one.
811 if((dataLength
&1)!=0) {
812 return trie
.orphan();
814 return new UCharsTrie(trieUChars2
.getBuffer());
818 void UCharsTrieTest::checkFirst(UCharsTrie
&trie
,
819 const StringAndValue data
[], int32_t dataLength
) {
820 for(int32_t i
=0; i
<dataLength
; ++i
) {
822 continue; // skip empty string
824 UnicodeString expectedString
=UnicodeString(data
[i
].s
, -1, US_INV
).unescape();
825 UChar32 c
=expectedString
[0];
826 UChar32 nextCp
=expectedString
.length()>1 ? expectedString
[1] : 0;
827 UStringTrieResult firstResult
=trie
.first(c
);
828 int32_t firstValue
=USTRINGTRIE_HAS_VALUE(firstResult
) ? trie
.getValue() : -1;
829 UStringTrieResult nextResult
=trie
.next(nextCp
);
830 if(firstResult
!=trie
.reset().next(c
) ||
831 firstResult
!=trie
.current() ||
832 firstValue
!=(USTRINGTRIE_HAS_VALUE(firstResult
) ? trie
.getValue() : -1) ||
833 nextResult
!=trie
.next(nextCp
)
835 errln("trie.first(U+%04X)!=trie.reset().next(same) for %s",
838 c
=expectedString
.char32At(0);
839 int32_t cLength
=U16_LENGTH(c
);
840 nextCp
=expectedString
.length()>cLength
? expectedString
.char32At(cLength
) : 0;
841 firstResult
=trie
.firstForCodePoint(c
);
842 firstValue
=USTRINGTRIE_HAS_VALUE(firstResult
) ? trie
.getValue() : -1;
843 nextResult
=trie
.nextForCodePoint(nextCp
);
844 if(firstResult
!=trie
.reset().nextForCodePoint(c
) ||
845 firstResult
!=trie
.current() ||
846 firstValue
!=(USTRINGTRIE_HAS_VALUE(firstResult
) ? trie
.getValue() : -1) ||
847 nextResult
!=trie
.nextForCodePoint(nextCp
)
849 errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s",
856 void UCharsTrieTest::checkNext(UCharsTrie
&trie
,
857 const StringAndValue data
[], int32_t dataLength
) {
858 UCharsTrie::State state
;
859 for(int32_t i
=0; i
<dataLength
; ++i
) {
860 UnicodeString expectedString
=UnicodeString(data
[i
].s
, -1, US_INV
).unescape();
861 int32_t stringLength
= (i
&1) ? -1 : expectedString
.length();
862 UStringTrieResult result
;
863 if( !USTRINGTRIE_HAS_VALUE(
864 result
=trie
.next(expectedString
.getTerminatedBuffer(), stringLength
)) ||
865 result
!=trie
.current()
867 errln("trie does not seem to contain %s", data
[i
].s
);
868 } else if(trie
.getValue()!=data
[i
].value
) {
869 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
871 (long)trie
.getValue(), (long)trie
.getValue(),
872 (long)data
[i
].value
, (long)data
[i
].value
);
873 } else if(result
!=trie
.current() || trie
.getValue()!=data
[i
].value
) {
874 errln("trie value for %s changes when repeating current()/getValue()", data
[i
].s
);
877 stringLength
=expectedString
.length();
878 result
=trie
.current();
879 for(int32_t j
=0; j
<stringLength
; ++j
) {
880 if(!USTRINGTRIE_HAS_NEXT(result
)) {
881 errln("trie.current()!=hasNext before end of %s (at index %d)", data
[i
].s
, j
);
884 if(result
==USTRINGTRIE_INTERMEDIATE_VALUE
) {
886 if(trie
.current()!=USTRINGTRIE_INTERMEDIATE_VALUE
) {
887 errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data
[i
].s
, j
);
891 result
=trie
.next(expectedString
[j
]);
892 if(!USTRINGTRIE_MATCHES(result
)) {
893 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data
[i
].s
, j
);
896 if(result
!=trie
.current()) {
897 errln("trie.next()!=following current() before end of %s (at index %d)", data
[i
].s
, j
);
901 if(!USTRINGTRIE_HAS_VALUE(result
)) {
902 errln("trie.next()!=hasValue at the end of %s", data
[i
].s
);
906 if(result
!=trie
.current()) {
907 errln("trie.current() != current()+getValue()+current() after end of %s",
910 // Compare the final current() with whether next() can actually continue.
911 trie
.saveState(state
);
912 UBool nextContinues
=FALSE
;
913 for(int32_t c
=0x20; c
<0xe000; ++c
) {
915 c
=0xd800; // Check for ASCII and surrogates but not all of the BMP.
917 if(trie
.resetToState(state
).next(c
)) {
922 if((result
==USTRINGTRIE_INTERMEDIATE_VALUE
)!=nextContinues
) {
923 errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
924 "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data
[i
].s
);
930 void UCharsTrieTest::checkNextWithState(UCharsTrie
&trie
,
931 const StringAndValue data
[], int32_t dataLength
) {
932 UCharsTrie::State noState
, state
;
933 for(int32_t i
=0; i
<dataLength
; ++i
) {
935 // This should have no effect.
936 trie
.resetToState(noState
);
938 UnicodeString expectedString
=UnicodeString(data
[i
].s
, -1, US_INV
).unescape();
939 int32_t stringLength
=expectedString
.length();
940 int32_t partialLength
=stringLength
/3;
941 for(int32_t j
=0; j
<partialLength
; ++j
) {
942 if(!USTRINGTRIE_MATCHES(trie
.next(expectedString
[j
]))) {
943 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data
[i
].s
);
947 trie
.saveState(state
);
948 UStringTrieResult resultAtState
=trie
.current();
949 UStringTrieResult result
;
950 int32_t valueAtState
=-99;
951 if(USTRINGTRIE_HAS_VALUE(resultAtState
)) {
952 valueAtState
=trie
.getValue();
954 result
=trie
.next(0); // mismatch
955 if(result
!=USTRINGTRIE_NO_MATCH
|| result
!=trie
.current()) {
956 errln("trie.next(0) matched after part of %s", data
[i
].s
);
958 if( resultAtState
!=trie
.resetToState(state
).current() ||
959 (USTRINGTRIE_HAS_VALUE(resultAtState
) && valueAtState
!=trie
.getValue())
961 errln("trie.next(part of %s) changes current()/getValue() after "
962 "saveState/next(0)/resetToState",
964 } else if(!USTRINGTRIE_HAS_VALUE(
965 result
=trie
.next(expectedString
.getTerminatedBuffer()+partialLength
,
966 stringLength
-partialLength
)) ||
967 result
!=trie
.current()) {
968 errln("trie.next(rest of %s) does not seem to contain %s after "
969 "saveState/next(0)/resetToState",
970 data
[i
].s
, data
[i
].s
);
971 } else if(!USTRINGTRIE_HAS_VALUE(
972 result
=trie
.resetToState(state
).
973 next(expectedString
.getTerminatedBuffer()+partialLength
,
974 stringLength
-partialLength
)) ||
975 result
!=trie
.current()) {
976 errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
978 } else if(trie
.getValue()!=data
[i
].value
) {
979 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
981 (long)trie
.getValue(), (long)trie
.getValue(),
982 (long)data
[i
].value
, (long)data
[i
].value
);
988 // next(string) is also tested in other functions,
989 // but here we try to go partway through the string, and then beyond it.
990 void UCharsTrieTest::checkNextString(UCharsTrie
&trie
,
991 const StringAndValue data
[], int32_t dataLength
) {
992 for(int32_t i
=0; i
<dataLength
; ++i
) {
993 UnicodeString expectedString
=UnicodeString(data
[i
].s
, -1, US_INV
).unescape();
994 int32_t stringLength
=expectedString
.length();
995 if(!trie
.next(expectedString
.getTerminatedBuffer(), stringLength
/2)) {
996 errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data
[i
].s
);
999 // Test that we stop properly at the end of the string.
1000 if(trie
.next(expectedString
.getTerminatedBuffer()+stringLength
/2,
1001 stringLength
+1-stringLength
/2)) {
1002 errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data
[i
].s
);
1008 void UCharsTrieTest::checkIterator(UCharsTrie
&trie
,
1009 const StringAndValue data
[], int32_t dataLength
) {
1010 IcuTestErrorCode
errorCode(*this, "checkIterator()");
1011 UCharsTrie::Iterator
iter(trie
, 0, errorCode
);
1012 if(errorCode
.logIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) {
1015 checkIterator(iter
, data
, dataLength
);
1018 void UCharsTrieTest::checkIterator(UCharsTrie::Iterator
&iter
,
1019 const StringAndValue data
[], int32_t dataLength
) {
1020 IcuTestErrorCode
errorCode(*this, "checkIterator()");
1021 for(int32_t i
=0; i
<dataLength
; ++i
) {
1022 if(!iter
.hasNext()) {
1023 errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i
, data
[i
].s
);
1026 UBool hasNext
=iter
.next(errorCode
);
1027 if(errorCode
.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i
, data
[i
].s
)) {
1031 errln("trie iterator next()=FALSE for item %d: %s", (int)i
, data
[i
].s
);
1034 UnicodeString expectedString
=UnicodeString(data
[i
].s
, -1, US_INV
).unescape();
1035 if(iter
.getString()!=expectedString
) {
1037 UnicodeString
invString(prettify(iter
.getString()));
1038 invString
.extract(0, invString
.length(), buffer
, LENGTHOF(buffer
), US_INV
);
1039 errln("trie iterator next().getString()=%s but expected %s for item %d",
1040 buffer
, data
[i
].s
, (int)i
);
1042 if(iter
.getValue()!=data
[i
].value
) {
1043 errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
1044 (long)iter
.getValue(), (long)iter
.getValue(),
1045 (long)data
[i
].value
, (long)data
[i
].value
,
1049 if(iter
.hasNext()) {
1050 errln("trie iterator hasNext()=TRUE after all items");
1052 UBool hasNext
=iter
.next(errorCode
);
1053 errorCode
.logIfFailureAndReset("trie iterator next() after all items");
1055 errln("trie iterator next()=TRUE after all items");