1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2010-2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: ucharstrietest.cpp
10 * tab size: 8 (not used)
13 * created on: 2010nov16
14 * created by: Markus W. Scherer
19 #include "unicode/utypes.h"
20 #include "unicode/appendable.h"
21 #include "unicode/localpointer.h"
22 #include "unicode/ucharstrie.h"
23 #include "unicode/ucharstriebuilder.h"
24 #include "unicode/uniset.h"
25 #include "unicode/unistr.h"
26 #include "unicode/utf16.h"
30 struct StringAndValue
{
35 class UCharsTrieTest
: public IntlTest
{
38 virtual ~UCharsTrieTest();
40 void runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char *par
=NULL
);
45 void TestShortestBranch();
47 void TestLongSequence();
48 void TestLongBranch();
49 void TestValuesForState();
51 void TestFirstForCodePoint();
52 void TestNextForCodePoint();
54 UCharsTrie
*buildLargeTrie(int32_t numUniqueFirst
);
57 UCharsTrie
*buildMonthsTrie(UStringTrieBuildOption buildOption
);
58 void TestHasUniqueValue();
59 void TestGetNextUChars();
60 void TestIteratorFromBranch();
61 void TestIteratorFromLinearMatch();
62 void TestTruncatingIteratorFromRoot();
63 void TestTruncatingIteratorFromLinearMatchShort();
64 void TestTruncatingIteratorFromLinearMatchLong();
65 void TestIteratorFromUChars();
67 void checkData(const StringAndValue data
[], int32_t dataLength
);
68 void checkData(const StringAndValue data
[], int32_t dataLength
, UStringTrieBuildOption buildOption
);
69 UCharsTrie
*buildTrie(const StringAndValue data
[], int32_t dataLength
,
70 UStringTrieBuildOption buildOption
);
71 void checkFirst(UCharsTrie
&trie
, const StringAndValue data
[], int32_t dataLength
);
72 void checkNext(UCharsTrie
&trie
, const StringAndValue data
[], int32_t dataLength
);
73 void checkNextWithState(UCharsTrie
&trie
, const StringAndValue data
[], int32_t dataLength
);
74 void checkNextWithState64(UCharsTrie
&trie
, const StringAndValue data
[], int32_t dataLength
);
75 void checkNextString(UCharsTrie
&trie
, const StringAndValue data
[], int32_t dataLength
);
76 void checkIterator(UCharsTrie
&trie
, const StringAndValue data
[], int32_t dataLength
);
77 void checkIterator(UCharsTrie::Iterator
&iter
, const StringAndValue data
[], int32_t dataLength
);
80 UCharsTrieBuilder
*builder_
;
83 extern IntlTest
*createUCharsTrieTest() {
84 return new UCharsTrieTest();
87 UCharsTrieTest::UCharsTrieTest() : builder_(NULL
) {
88 IcuTestErrorCode
errorCode(*this, "UCharsTrieTest()");
89 builder_
=new UCharsTrieBuilder(errorCode
);
92 UCharsTrieTest::~UCharsTrieTest() {
96 void UCharsTrieTest::runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char * /*par*/) {
98 logln("TestSuite UCharsTrieTest: ");
101 TESTCASE_AUTO(TestBuilder
);
102 TESTCASE_AUTO(TestEmpty
);
103 TESTCASE_AUTO(Test_a
);
104 TESTCASE_AUTO(Test_a_ab
);
105 TESTCASE_AUTO(TestShortestBranch
);
106 TESTCASE_AUTO(TestBranches
);
107 TESTCASE_AUTO(TestLongSequence
);
108 TESTCASE_AUTO(TestLongBranch
);
109 TESTCASE_AUTO(TestValuesForState
);
110 TESTCASE_AUTO(TestCompact
);
111 TESTCASE_AUTO(TestFirstForCodePoint
);
112 TESTCASE_AUTO(TestNextForCodePoint
);
113 TESTCASE_AUTO(TestLargeTrie
);
114 TESTCASE_AUTO(TestHasUniqueValue
);
115 TESTCASE_AUTO(TestGetNextUChars
);
116 TESTCASE_AUTO(TestIteratorFromBranch
);
117 TESTCASE_AUTO(TestIteratorFromLinearMatch
);
118 TESTCASE_AUTO(TestTruncatingIteratorFromRoot
);
119 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort
);
120 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong
);
121 TESTCASE_AUTO(TestIteratorFromUChars
);
125 void UCharsTrieTest::TestBuilder() {
126 IcuTestErrorCode
errorCode(*this, "TestBuilder()");
127 delete builder_
->build(USTRINGTRIE_BUILD_FAST
, errorCode
);
128 if(errorCode
.reset()!=U_INDEX_OUTOFBOUNDS_ERROR
) {
129 errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
132 // TODO: remove .build(...) once add() checks for duplicates.
133 builder_
->add("=", 0, errorCode
).add("=", 1, errorCode
).build(USTRINGTRIE_BUILD_FAST
, errorCode
);
134 if(errorCode
.reset()!=U_ILLEGAL_ARGUMENT_ERROR
) {
135 errln("UCharsTrieBuilder.add() did not detect duplicates");
140 void UCharsTrieTest::TestEmpty() {
141 static const StringAndValue data
[]={
144 checkData(data
, UPRV_LENGTHOF(data
));
147 void UCharsTrieTest::Test_a() {
148 static const StringAndValue data
[]={
151 checkData(data
, UPRV_LENGTHOF(data
));
154 void UCharsTrieTest::Test_a_ab() {
155 static const StringAndValue data
[]={
159 checkData(data
, UPRV_LENGTHOF(data
));
162 void UCharsTrieTest::TestShortestBranch() {
163 static const StringAndValue data
[]={
167 checkData(data
, UPRV_LENGTHOF(data
));
170 void UCharsTrieTest::TestBranches() {
171 static const StringAndValue data
[]={
179 { "ppppp", 0x40000 },
184 { "vv", 0x7fffffff },
185 { "zz", (int32_t)0x80000000 }
187 for(int32_t length
=2; length
<=UPRV_LENGTHOF(data
); ++length
) {
188 logln("TestBranches length=%d", (int)length
);
189 checkData(data
, length
);
193 void UCharsTrieTest::TestLongSequence() {
194 static const StringAndValue data
[]={
196 // sequence of linear-match nodes
197 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
198 // more than 256 units
199 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
200 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
201 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
202 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
203 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
204 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
206 checkData(data
, UPRV_LENGTHOF(data
));
209 void UCharsTrieTest::TestLongBranch() {
210 // Split-branch and interesting compact-integer values.
211 static const StringAndValue data
[]={
224 { "l234567890", 0x1b00 },
225 { "l234567890123", 0x1b01 },
226 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
227 { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
228 { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
230 { "s2345", 0x4444444 },
231 { "t234567890", 0x77777777 },
232 { "z", (int32_t)0x80000001 }
234 checkData(data
, UPRV_LENGTHOF(data
));
237 void UCharsTrieTest::TestValuesForState() {
238 // Check that saveState() and resetToState() interact properly
239 // with next() and current().
240 static const StringAndValue data
[]={
248 checkData(data
, UPRV_LENGTHOF(data
));
251 void UCharsTrieTest::TestCompact() {
252 // Duplicate trailing strings and values provide opportunities for compacting.
253 static const StringAndValue data
[]={
270 // The l+n branch (with its sub-nodes) is a duplicate but will be written
271 // both times because each time it follows a different linear-match node.
275 checkData(data
, UPRV_LENGTHOF(data
));
278 void UCharsTrieTest::TestFirstForCodePoint() {
279 static const StringAndValue data
[]={
282 { "a\\U00010000", 3 },
284 { "\\U00020000\\udbff", 5 },
285 { "\\U00020000\\U0010ffff", 6 },
286 { "\\U00020000\\U0010ffffz", 7 },
287 { "\\U00050000xy", 8 },
288 { "\\U00050000xyz", 9 }
290 checkData(data
, UPRV_LENGTHOF(data
));
293 void UCharsTrieTest::TestNextForCodePoint() {
294 static const StringAndValue data
[]={
295 { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 },
296 { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 },
297 { "\\u4dff\\U000103ff", 99999 }
299 LocalPointer
<UCharsTrie
> trie(buildTrie(data
, UPRV_LENGTHOF(data
), USTRINGTRIE_BUILD_FAST
));
301 return; // buildTrie() reported an error
303 UStringTrieResult result
;
304 if( (result
=trie
->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
305 (result
=trie
->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
306 (result
=trie
->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
307 (result
=trie
->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
308 (result
=trie
->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
309 (result
=trie
->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE
|| result
!=trie
->current() ||
310 trie
->getValue()!=2000000000
312 errln("UCharsTrie.nextForCodePoint() fails for %s", data
[0].s
);
314 if( (result
=trie
->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
315 (result
=trie
->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
316 (result
=trie
->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
317 (result
=trie
->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE
|| result
!=trie
->current() ||
318 trie
->getValue()!=44444
320 errln("UCharsTrie.nextForCodePoint() fails for %s", data
[1].s
);
322 if( (result
=trie
->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
323 (result
=trie
->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
324 (result
=trie
->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
325 (result
=trie
->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH
|| result
!=trie
->current() // no match for trail surrogate
327 errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222");
329 if( (result
=trie
->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE
|| result
!=trie
->current() ||
330 (result
=trie
->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE
|| result
!=trie
->current() ||
331 trie
->getValue()!=99999
333 errln("UCharsTrie.nextForCodePoint() fails for %s", data
[2].s
);
337 // Definitions in the anonymous namespace are invisible outside this file.
340 // Generate (string, value) pairs.
341 // The first string (before next()) will be empty.
344 Generator() : value(4711), num(0) {}
348 s
.append(c
=(UChar
)(value
>>16));
349 s
.append((UChar
)(value
>>4));
351 s
.append((UChar
)value
);
354 value
+=((value
>>5)&0x7ff)*3+1;
357 const UnicodeString
&getString() const { return s
; }
358 int32_t getValue() const { return value
; }
359 int32_t countUniqueFirstChars() const { return set
.size(); }
360 int32_t getIndex() const { return num
; }
371 UCharsTrie
*UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst
) {
372 IcuTestErrorCode
errorCode(*this, "buildLargeTrie()");
375 while(gen
.countUniqueFirstChars()<numUniqueFirst
) {
376 builder_
->add(gen
.getString(), gen
.getValue(), errorCode
);
379 logln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst
, (long)gen
.getIndex());
380 UnicodeString trieUChars
;
381 builder_
->buildUnicodeString(USTRINGTRIE_BUILD_FAST
, trieUChars
, errorCode
);
382 logln("serialized trie size: %ld UChars\n", (long)trieUChars
.length());
383 return new UCharsTrie(trieUChars
.getBuffer());
386 // Exercise a large branch node.
387 void UCharsTrieTest::TestLargeTrie() {
388 LocalPointer
<UCharsTrie
> trie(buildLargeTrie(1111));
390 return; // buildTrie() reported an error
393 while(gen
.countUniqueFirstChars()<1111) {
394 UnicodeString
x(gen
.getString());
395 int32_t value
=gen
.getValue();
397 if(trie
->first(x
[0])==USTRINGTRIE_NO_MATCH
) {
398 errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n",
399 x
[0], (long)gen
.getIndex());
404 UStringTrieResult result
=trie
->next(x
.getBuffer(), x
.length());
405 if(!USTRINGTRIE_HAS_VALUE(result
) || result
!=trie
->current() || value
!=trie
->getValue()) {
406 errln("next(%d chars U+%04X U+%04X)!=hasValue or "
407 "next()!=current() or getValue() wrong "
408 "for string %ld\n", (int)x
.length(), x
[0], x
[1], (long)gen
.getIndex());
426 UCharsTrie
*UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption
) {
427 // All types of nodes leading to the same value,
428 // for code coverage of recursive functions.
429 // In particular, we need a lot of branches on some single level
430 // to exercise a split-branch node.
431 static const StringAndValue data
[]={
451 { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
463 return buildTrie(data
, UPRV_LENGTHOF(data
), buildOption
);
466 void UCharsTrieTest::TestHasUniqueValue() {
467 LocalPointer
<UCharsTrie
> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST
));
469 return; // buildTrie() reported an error
472 if(trie
->hasUniqueValue(uniqueValue
)) {
473 errln("unique value at root");
478 // hasUniqueValue() directly after next()
479 if(!trie
->hasUniqueValue(uniqueValue
) || uniqueValue
!=1) {
480 errln("not unique value 1 after \"jan\"");
484 if(trie
->hasUniqueValue(uniqueValue
)) {
485 errln("unique value after \"ju\"");
487 if(trie
->next(u_n
)!=USTRINGTRIE_INTERMEDIATE_VALUE
|| 6!=trie
->getValue()) {
488 errln("not normal value 6 after \"jun\"");
490 // hasUniqueValue() after getValue()
491 if(!trie
->hasUniqueValue(uniqueValue
) || uniqueValue
!=6) {
492 errln("not unique value 6 after \"jun\"");
494 // hasUniqueValue() from within a linear-match node
497 if(!trie
->hasUniqueValue(uniqueValue
) || uniqueValue
!=8) {
498 errln("not unique value 8 after \"au\"");
502 void UCharsTrieTest::TestGetNextUChars() {
503 LocalPointer
<UCharsTrie
> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL
));
505 return; // buildTrie() reported an error
507 UnicodeString buffer
;
508 UnicodeStringAppendable
app(buffer
);
509 int32_t count
=trie
->getNextUChars(app
);
510 if(count
!=2 || buffer
.length()!=2 || buffer
[0]!=u_a
|| buffer
[1]!=u_j
) {
511 errln("months getNextUChars()!=[aj] at root");
516 // getNextUChars() directly after next()
518 count
=trie
->getNextUChars(app
);
519 if(count
!=20 || buffer
!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
520 errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"");
522 // getNextUChars() after getValue()
523 trie
->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
525 count
=trie
->getNextUChars(app
);
526 if(count
!=20 || buffer
!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
527 errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
529 // getNextUChars() from a linear-match node
532 count
=trie
->getNextUChars(app
);
533 if(count
!=1 || buffer
.length()!=1 || buffer
[0]!=u_a
) {
534 errln("months getNextUChars()!=[a] after \"janu\"");
538 count
=trie
->getNextUChars(app
);
539 if(count
!=1 || buffer
.length()!=1 || buffer
[0]!=u_r
) {
540 errln("months getNextUChars()!=[r] after \"janua\"");
544 // getNextUChars() after a final match
546 count
=trie
->getNextUChars(app
);
547 if(count
!=0 || buffer
.length()!=0) {
548 errln("months getNextUChars()!=[] after \"january\"");
552 void UCharsTrieTest::TestIteratorFromBranch() {
553 LocalPointer
<UCharsTrie
> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST
));
555 return; // buildTrie() reported an error
557 // Go to a branch node.
561 IcuTestErrorCode
errorCode(*this, "TestIteratorFromBranch()");
562 UCharsTrie::Iterator
iter(*trie
, 0, errorCode
);
563 if(errorCode
.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
566 // Expected data: Same as in buildMonthsTrie(), except only the suffixes
568 static const StringAndValue data
[]={
587 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
595 checkIterator(iter
, data
, UPRV_LENGTHOF(data
));
596 // Reset, and we should get the same result.
597 logln("after iter.reset()");
598 checkIterator(iter
.reset(), data
, UPRV_LENGTHOF(data
));
601 void UCharsTrieTest::TestIteratorFromLinearMatch() {
602 LocalPointer
<UCharsTrie
> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL
));
604 return; // buildTrie() reported an error
606 // Go into a linear-match node.
612 IcuTestErrorCode
errorCode(*this, "TestIteratorFromLinearMatch()");
613 UCharsTrie::Iterator
iter(*trie
, 0, errorCode
);
614 if(errorCode
.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
617 // Expected data: Same as in buildMonthsTrie(), except only the suffixes
618 // following "janua".
619 static const StringAndValue data
[]={
623 checkIterator(iter
, data
, UPRV_LENGTHOF(data
));
624 // Reset, and we should get the same result.
625 logln("after iter.reset()");
626 checkIterator(iter
.reset(), data
, UPRV_LENGTHOF(data
));
629 void UCharsTrieTest::TestTruncatingIteratorFromRoot() {
630 LocalPointer
<UCharsTrie
> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST
));
632 return; // buildTrie() reported an error
634 IcuTestErrorCode
errorCode(*this, "TestTruncatingIteratorFromRoot()");
635 UCharsTrie::Iterator
iter(*trie
, 4, errorCode
);
636 if(errorCode
.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
639 // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
640 // of each string, and no string duplicates from the truncation.
641 static const StringAndValue data
[]={
669 checkIterator(iter
, data
, UPRV_LENGTHOF(data
));
670 // Reset, and we should get the same result.
671 logln("after iter.reset()");
672 checkIterator(iter
.reset(), data
, UPRV_LENGTHOF(data
));
675 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
676 static const StringAndValue data
[]={
681 LocalPointer
<UCharsTrie
> trie(buildTrie(data
, UPRV_LENGTHOF(data
), USTRINGTRIE_BUILD_FAST
));
683 return; // buildTrie() reported an error
685 // Go into a linear-match node.
688 IcuTestErrorCode
errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
689 // Truncate within the linear-match node.
690 UCharsTrie::Iterator
iter(*trie
, 2, errorCode
);
691 if(errorCode
.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
694 static const StringAndValue expected
[]={
697 checkIterator(iter
, expected
, UPRV_LENGTHOF(expected
));
698 // Reset, and we should get the same result.
699 logln("after iter.reset()");
700 checkIterator(iter
.reset(), expected
, UPRV_LENGTHOF(expected
));
703 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
704 static const StringAndValue data
[]={
709 LocalPointer
<UCharsTrie
> trie(buildTrie(data
, UPRV_LENGTHOF(data
), USTRINGTRIE_BUILD_FAST
));
711 return; // buildTrie() reported an error
713 // Go into a linear-match node.
717 IcuTestErrorCode
errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
718 // Truncate after the linear-match node.
719 UCharsTrie::Iterator
iter(*trie
, 3, errorCode
);
720 if(errorCode
.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
723 static const StringAndValue expected
[]={
728 checkIterator(iter
, expected
, UPRV_LENGTHOF(expected
));
729 // Reset, and we should get the same result.
730 logln("after iter.reset()");
731 checkIterator(iter
.reset(), expected
, UPRV_LENGTHOF(expected
));
734 void UCharsTrieTest::TestIteratorFromUChars() {
735 static const StringAndValue data
[]={
741 IcuTestErrorCode
errorCode(*this, "TestIteratorFromUChars()");
742 for(int32_t i
=0; i
<UPRV_LENGTHOF(data
); ++i
) {
743 builder_
->add(data
[i
].s
, data
[i
].value
, errorCode
);
745 UnicodeString trieUChars
;
746 builder_
->buildUnicodeString(USTRINGTRIE_BUILD_FAST
, trieUChars
, errorCode
);
747 UCharsTrie::Iterator
iter(trieUChars
.getBuffer(), 0, errorCode
);
748 checkIterator(iter
, data
, UPRV_LENGTHOF(data
));
751 void UCharsTrieTest::checkData(const StringAndValue data
[], int32_t dataLength
) {
752 logln("checkData(dataLength=%d, fast)", (int)dataLength
);
753 checkData(data
, dataLength
, USTRINGTRIE_BUILD_FAST
);
754 logln("checkData(dataLength=%d, small)", (int)dataLength
);
755 checkData(data
, dataLength
, USTRINGTRIE_BUILD_SMALL
);
758 void UCharsTrieTest::checkData(const StringAndValue data
[], int32_t dataLength
, UStringTrieBuildOption buildOption
) {
759 LocalPointer
<UCharsTrie
> trie(buildTrie(data
, dataLength
, buildOption
));
761 return; // buildTrie() reported an error
763 checkFirst(*trie
, data
, dataLength
);
764 checkNext(*trie
, data
, dataLength
);
765 checkNextWithState(*trie
, data
, dataLength
);
766 checkNextWithState64(*trie
, data
, dataLength
);
767 checkNextString(*trie
, data
, dataLength
);
768 checkIterator(*trie
, data
, dataLength
);
771 UCharsTrie
*UCharsTrieTest::buildTrie(const StringAndValue data
[], int32_t dataLength
,
772 UStringTrieBuildOption buildOption
) {
773 IcuTestErrorCode
errorCode(*this, "buildTrie()");
774 // Add the items to the trie builder in an interesting (not trivial, not random) order.
777 // Odd number of items.
780 } else if((dataLength%3
)!=0) {
781 // Not a multiple of 3.
789 for(int32_t i
=0; i
<dataLength
; ++i
) {
790 builder_
->add(UnicodeString(data
[index
].s
, -1, US_INV
).unescape(),
791 data
[index
].value
, errorCode
);
792 index
=(index
+step
)%dataLength
;
794 UnicodeString trieUChars
;
795 builder_
->buildUnicodeString(buildOption
, trieUChars
, errorCode
);
796 LocalPointer
<UCharsTrie
> trie(builder_
->build(buildOption
, errorCode
));
797 if(!errorCode
.errIfFailureAndReset("add()/build()")) {
798 builder_
->add("zzz", 999, errorCode
);
799 if(errorCode
.reset()!=U_NO_WRITE_PERMISSION
) {
800 errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
803 logln("serialized trie size: %ld UChars\n", (long)trieUChars
.length());
804 UnicodeString trieUChars2
;
805 builder_
->buildUnicodeString(buildOption
, trieUChars2
, errorCode
);
806 if(trieUChars
.getBuffer()==trieUChars2
.getBuffer()) {
807 errln("builder.buildUnicodeString() before & after build() returned same array");
809 if(errorCode
.isFailure()) {
812 // Tries from either build() method should be identical but
813 // UCharsTrie does not implement equals().
814 // We just return either one.
815 if((dataLength
&1)!=0) {
816 return trie
.orphan();
818 return new UCharsTrie(trieUChars2
.getBuffer());
822 void UCharsTrieTest::checkFirst(UCharsTrie
&trie
,
823 const StringAndValue data
[], int32_t dataLength
) {
824 for(int32_t i
=0; i
<dataLength
; ++i
) {
826 continue; // skip empty string
828 UnicodeString expectedString
=UnicodeString(data
[i
].s
, -1, US_INV
).unescape();
829 UChar32 c
=expectedString
[0];
830 UChar32 nextCp
=expectedString
.length()>1 ? expectedString
[1] : 0;
831 UStringTrieResult firstResult
=trie
.first(c
);
832 int32_t firstValue
=USTRINGTRIE_HAS_VALUE(firstResult
) ? trie
.getValue() : -1;
833 UStringTrieResult nextResult
=trie
.next(nextCp
);
834 if(firstResult
!=trie
.reset().next(c
) ||
835 firstResult
!=trie
.current() ||
836 firstValue
!=(USTRINGTRIE_HAS_VALUE(firstResult
) ? trie
.getValue() : -1) ||
837 nextResult
!=trie
.next(nextCp
)
839 errln("trie.first(U+%04X)!=trie.reset().next(same) for %s",
842 c
=expectedString
.char32At(0);
843 int32_t cLength
=U16_LENGTH(c
);
844 nextCp
=expectedString
.length()>cLength
? expectedString
.char32At(cLength
) : 0;
845 firstResult
=trie
.firstForCodePoint(c
);
846 firstValue
=USTRINGTRIE_HAS_VALUE(firstResult
) ? trie
.getValue() : -1;
847 nextResult
=trie
.nextForCodePoint(nextCp
);
848 if(firstResult
!=trie
.reset().nextForCodePoint(c
) ||
849 firstResult
!=trie
.current() ||
850 firstValue
!=(USTRINGTRIE_HAS_VALUE(firstResult
) ? trie
.getValue() : -1) ||
851 nextResult
!=trie
.nextForCodePoint(nextCp
)
853 errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s",
860 void UCharsTrieTest::checkNext(UCharsTrie
&trie
,
861 const StringAndValue data
[], int32_t dataLength
) {
862 UCharsTrie::State state
;
863 for(int32_t i
=0; i
<dataLength
; ++i
) {
864 UnicodeString expectedString
=UnicodeString(data
[i
].s
, -1, US_INV
).unescape();
865 int32_t stringLength
= (i
&1) ? -1 : expectedString
.length();
866 UStringTrieResult result
;
867 if( !USTRINGTRIE_HAS_VALUE(
868 result
=trie
.next(expectedString
.getTerminatedBuffer(), stringLength
)) ||
869 result
!=trie
.current()
871 errln("trie does not seem to contain %s", data
[i
].s
);
872 } else if(trie
.getValue()!=data
[i
].value
) {
873 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
875 (long)trie
.getValue(), (long)trie
.getValue(),
876 (long)data
[i
].value
, (long)data
[i
].value
);
877 } else if(result
!=trie
.current() || trie
.getValue()!=data
[i
].value
) {
878 errln("trie value for %s changes when repeating current()/getValue()", data
[i
].s
);
881 stringLength
=expectedString
.length();
882 result
=trie
.current();
883 for(int32_t j
=0; j
<stringLength
; ++j
) {
884 if(!USTRINGTRIE_HAS_NEXT(result
)) {
885 errln("trie.current()!=hasNext before end of %s (at index %d)", data
[i
].s
, j
);
888 if(result
==USTRINGTRIE_INTERMEDIATE_VALUE
) {
890 if(trie
.current()!=USTRINGTRIE_INTERMEDIATE_VALUE
) {
891 errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data
[i
].s
, j
);
895 result
=trie
.next(expectedString
[j
]);
896 if(!USTRINGTRIE_MATCHES(result
)) {
897 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data
[i
].s
, j
);
900 if(result
!=trie
.current()) {
901 errln("trie.next()!=following current() before end of %s (at index %d)", data
[i
].s
, j
);
905 if(!USTRINGTRIE_HAS_VALUE(result
)) {
906 errln("trie.next()!=hasValue at the end of %s", data
[i
].s
);
910 if(result
!=trie
.current()) {
911 errln("trie.current() != current()+getValue()+current() after end of %s",
914 // Compare the final current() with whether next() can actually continue.
915 trie
.saveState(state
);
916 UBool nextContinues
=FALSE
;
917 for(int32_t c
=0x20; c
<0xe000; ++c
) {
919 c
=0xd800; // Check for ASCII and surrogates but not all of the BMP.
921 if(trie
.resetToState(state
).next(c
)) {
926 if((result
==USTRINGTRIE_INTERMEDIATE_VALUE
)!=nextContinues
) {
927 errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
928 "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data
[i
].s
);
934 void UCharsTrieTest::checkNextWithState(UCharsTrie
&trie
,
935 const StringAndValue data
[], int32_t dataLength
) {
936 UCharsTrie::State noState
, state
;
937 for(int32_t i
=0; i
<dataLength
; ++i
) {
939 // This should have no effect.
940 trie
.resetToState(noState
);
942 UnicodeString expectedString
=UnicodeString(data
[i
].s
, -1, US_INV
).unescape();
943 int32_t stringLength
=expectedString
.length();
944 int32_t partialLength
=stringLength
/3;
945 for(int32_t j
=0; j
<partialLength
; ++j
) {
946 if(!USTRINGTRIE_MATCHES(trie
.next(expectedString
[j
]))) {
947 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data
[i
].s
);
951 trie
.saveState(state
);
952 UStringTrieResult resultAtState
=trie
.current();
953 UStringTrieResult result
;
954 int32_t valueAtState
=-99;
955 if(USTRINGTRIE_HAS_VALUE(resultAtState
)) {
956 valueAtState
=trie
.getValue();
958 result
=trie
.next(0); // mismatch
959 if(result
!=USTRINGTRIE_NO_MATCH
|| result
!=trie
.current()) {
960 errln("trie.next(0) matched after part of %s", data
[i
].s
);
962 if( resultAtState
!=trie
.resetToState(state
).current() ||
963 (USTRINGTRIE_HAS_VALUE(resultAtState
) && valueAtState
!=trie
.getValue())
965 errln("trie.next(part of %s) changes current()/getValue() after "
966 "saveState/next(0)/resetToState",
968 } else if(!USTRINGTRIE_HAS_VALUE(
969 result
=trie
.next(expectedString
.getTerminatedBuffer()+partialLength
,
970 stringLength
-partialLength
)) ||
971 result
!=trie
.current()) {
972 errln("trie.next(rest of %s) does not seem to contain %s after "
973 "saveState/next(0)/resetToState",
974 data
[i
].s
, data
[i
].s
);
975 } else if(!USTRINGTRIE_HAS_VALUE(
976 result
=trie
.resetToState(state
).
977 next(expectedString
.getTerminatedBuffer()+partialLength
,
978 stringLength
-partialLength
)) ||
979 result
!=trie
.current()) {
980 errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
982 } else if(trie
.getValue()!=data
[i
].value
) {
983 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
985 (long)trie
.getValue(), (long)trie
.getValue(),
986 (long)data
[i
].value
, (long)data
[i
].value
);
992 void UCharsTrieTest::checkNextWithState64(UCharsTrie
&trie
,
993 const StringAndValue data
[], int32_t dataLength
) {
994 assertTrue("trie(initial state).getState64()!=0", trie
.getState64() != 0);
995 for(int32_t i
=0; i
<dataLength
; ++i
) {
996 UnicodeString expectedString
=UnicodeString(data
[i
].s
, -1, US_INV
).unescape();
997 int32_t stringLength
=expectedString
.length();
998 int32_t partialLength
= stringLength
/ 3;
999 for(int32_t j
=0; j
<partialLength
; ++j
) {
1000 if(!USTRINGTRIE_MATCHES(trie
.next(expectedString
[j
]))) {
1001 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data
[i
].s
);
1005 uint64_t state
= trie
.getState64();
1006 assertTrue("trie.getState64()!=0", state
!= 0);
1007 UStringTrieResult resultAtState
=trie
.current();
1008 UStringTrieResult result
;
1009 int32_t valueAtState
=-99;
1010 if(USTRINGTRIE_HAS_VALUE(resultAtState
)) {
1011 valueAtState
=trie
.getValue();
1013 result
=trie
.next(0); // mismatch
1014 if(result
!=USTRINGTRIE_NO_MATCH
|| result
!=trie
.current()) {
1015 errln("trie.next(0) matched after part of %s", data
[i
].s
);
1017 if( resultAtState
!=trie
.resetToState64(state
).current() ||
1018 (USTRINGTRIE_HAS_VALUE(resultAtState
) && valueAtState
!=trie
.getValue())
1020 errln("trie.next(part of %s) changes current()/getValue() after "
1021 "getState64/next(0)/resetToState64",
1023 } else if(!USTRINGTRIE_HAS_VALUE(
1024 result
=trie
.next(expectedString
.getTerminatedBuffer()+partialLength
,
1025 stringLength
-partialLength
)) ||
1026 result
!=trie
.current()) {
1027 errln("trie.next(rest of %s) does not seem to contain %s after "
1028 "getState64/next(0)/resetToState64",
1029 data
[i
].s
, data
[i
].s
);
1030 } else if(!USTRINGTRIE_HAS_VALUE(
1031 result
=trie
.resetToState64(state
).
1032 next(expectedString
.getTerminatedBuffer()+partialLength
,
1033 stringLength
-partialLength
)) ||
1034 result
!=trie
.current()) {
1035 errln("trie does not seem to contain %s after getState64/next(rest)/resetToState64",
1037 } else if(trie
.getValue()!=data
[i
].value
) {
1038 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
1040 (long)trie
.getValue(), (long)trie
.getValue(),
1041 (long)data
[i
].value
, (long)data
[i
].value
);
1047 // next(string) is also tested in other functions,
1048 // but here we try to go partway through the string, and then beyond it.
1049 void UCharsTrieTest::checkNextString(UCharsTrie
&trie
,
1050 const StringAndValue data
[], int32_t dataLength
) {
1051 for(int32_t i
=0; i
<dataLength
; ++i
) {
1052 UnicodeString expectedString
=UnicodeString(data
[i
].s
, -1, US_INV
).unescape();
1053 int32_t stringLength
=expectedString
.length();
1054 if(!trie
.next(expectedString
.getTerminatedBuffer(), stringLength
/2)) {
1055 errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data
[i
].s
);
1058 // Test that we stop properly at the end of the string.
1059 if(trie
.next(expectedString
.getTerminatedBuffer()+stringLength
/2,
1060 stringLength
+1-stringLength
/2)) {
1061 errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data
[i
].s
);
1067 void UCharsTrieTest::checkIterator(UCharsTrie
&trie
,
1068 const StringAndValue data
[], int32_t dataLength
) {
1069 IcuTestErrorCode
errorCode(*this, "checkIterator()");
1070 UCharsTrie::Iterator
iter(trie
, 0, errorCode
);
1071 if(errorCode
.errIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) {
1074 checkIterator(iter
, data
, dataLength
);
1077 void UCharsTrieTest::checkIterator(UCharsTrie::Iterator
&iter
,
1078 const StringAndValue data
[], int32_t dataLength
) {
1079 IcuTestErrorCode
errorCode(*this, "checkIterator()");
1080 for(int32_t i
=0; i
<dataLength
; ++i
) {
1081 if(!iter
.hasNext()) {
1082 errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i
, data
[i
].s
);
1085 UBool hasNext
=iter
.next(errorCode
);
1086 if(errorCode
.errIfFailureAndReset("trie iterator next() for item %d: %s", (int)i
, data
[i
].s
)) {
1090 errln("trie iterator next()=FALSE for item %d: %s", (int)i
, data
[i
].s
);
1093 UnicodeString expectedString
=UnicodeString(data
[i
].s
, -1, US_INV
).unescape();
1094 if(iter
.getString()!=expectedString
) {
1096 UnicodeString
invString(prettify(iter
.getString()));
1097 invString
.extract(0, invString
.length(), buffer
, UPRV_LENGTHOF(buffer
), US_INV
);
1098 errln("trie iterator next().getString()=%s but expected %s for item %d",
1099 buffer
, data
[i
].s
, (int)i
);
1101 if(iter
.getValue()!=data
[i
].value
) {
1102 errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
1103 (long)iter
.getValue(), (long)iter
.getValue(),
1104 (long)data
[i
].value
, (long)data
[i
].value
,
1108 if(iter
.hasNext()) {
1109 errln("trie iterator hasNext()=TRUE after all items");
1111 UBool hasNext
=iter
.next(errorCode
);
1112 errorCode
.errIfFailureAndReset("trie iterator next() after all items");
1114 errln("trie iterator next()=TRUE after all items");