X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b25be06635768807f8f693286fa73bb2297bb06c..4388f060552cc537e71e957d32f35e9d75a61233:/icuSources/test/intltest/ucharstrietest.cpp diff --git a/icuSources/test/intltest/ucharstrietest.cpp b/icuSources/test/intltest/ucharstrietest.cpp new file mode 100644 index 00000000..b44789f1 --- /dev/null +++ b/icuSources/test/intltest/ucharstrietest.cpp @@ -0,0 +1,1057 @@ +/* +******************************************************************************* +* Copyright (C) 2010-2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: ucharstrietest.cpp +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010nov16 +* created by: Markus W. Scherer +*/ + +#include + +#include "unicode/utypes.h" +#include "unicode/appendable.h" +#include "unicode/localpointer.h" +#include "unicode/ucharstrie.h" +#include "unicode/ucharstriebuilder.h" +#include "unicode/uniset.h" +#include "unicode/unistr.h" +#include "intltest.h" + +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) + +struct StringAndValue { + const char *s; + int32_t value; +}; + +class UCharsTrieTest : public IntlTest { +public: + UCharsTrieTest(); + virtual ~UCharsTrieTest(); + + void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); + void TestBuilder(); + void TestEmpty(); + void Test_a(); + void Test_a_ab(); + void TestShortestBranch(); + void TestBranches(); + void TestLongSequence(); + void TestLongBranch(); + void TestValuesForState(); + void TestCompact(); + void TestFirstForCodePoint(); + void TestNextForCodePoint(); + + UCharsTrie *buildLargeTrie(int32_t numUniqueFirst); + void TestLargeTrie(); + + UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption); + void TestHasUniqueValue(); + void TestGetNextUChars(); + void TestIteratorFromBranch(); + void TestIteratorFromLinearMatch(); + void TestTruncatingIteratorFromRoot(); + void TestTruncatingIteratorFromLinearMatchShort(); + void TestTruncatingIteratorFromLinearMatchLong(); + void TestIteratorFromUChars(); + + void checkData(const StringAndValue data[], int32_t dataLength); + void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption); + UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength, + UStringTrieBuildOption buildOption); + void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); + void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); + void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); + void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); + void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); + void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength); + +private: + UCharsTrieBuilder *builder_; +}; + +extern IntlTest *createUCharsTrieTest() { + return new UCharsTrieTest(); +} + +UCharsTrieTest::UCharsTrieTest() : builder_(NULL) { + IcuTestErrorCode errorCode(*this, "UCharsTrieTest()"); + builder_=new UCharsTrieBuilder(errorCode); +} + +UCharsTrieTest::~UCharsTrieTest() { + delete builder_; +} + +void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { + if(exec) { + logln("TestSuite UCharsTrieTest: "); + } + TESTCASE_AUTO_BEGIN; + TESTCASE_AUTO(TestBuilder); + TESTCASE_AUTO(TestEmpty); + TESTCASE_AUTO(Test_a); + TESTCASE_AUTO(Test_a_ab); + TESTCASE_AUTO(TestShortestBranch); + TESTCASE_AUTO(TestBranches); + TESTCASE_AUTO(TestLongSequence); + TESTCASE_AUTO(TestLongBranch); + TESTCASE_AUTO(TestValuesForState); + TESTCASE_AUTO(TestCompact); + TESTCASE_AUTO(TestFirstForCodePoint); + TESTCASE_AUTO(TestNextForCodePoint); + TESTCASE_AUTO(TestLargeTrie); + TESTCASE_AUTO(TestHasUniqueValue); + TESTCASE_AUTO(TestGetNextUChars); + TESTCASE_AUTO(TestIteratorFromBranch); + TESTCASE_AUTO(TestIteratorFromLinearMatch); + TESTCASE_AUTO(TestTruncatingIteratorFromRoot); + TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort); + TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong); + TESTCASE_AUTO(TestIteratorFromUChars); + TESTCASE_AUTO_END; +} + +void UCharsTrieTest::TestBuilder() { + IcuTestErrorCode errorCode(*this, "TestBuilder()"); + delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode); + if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) { + errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR"); + return; + } + // TODO: remove .build(...) once add() checks for duplicates. + builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode); + if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) { + errln("UCharsTrieBuilder.add() did not detect duplicates"); + return; + } +} + +void UCharsTrieTest::TestEmpty() { + static const StringAndValue data[]={ + { "", 0 } + }; + checkData(data, LENGTHOF(data)); +} + +void UCharsTrieTest::Test_a() { + static const StringAndValue data[]={ + { "a", 1 } + }; + checkData(data, LENGTHOF(data)); +} + +void UCharsTrieTest::Test_a_ab() { + static const StringAndValue data[]={ + { "a", 1 }, + { "ab", 100 } + }; + checkData(data, LENGTHOF(data)); +} + +void UCharsTrieTest::TestShortestBranch() { + static const StringAndValue data[]={ + { "a", 1000 }, + { "b", 2000 } + }; + checkData(data, LENGTHOF(data)); +} + +void UCharsTrieTest::TestBranches() { + static const StringAndValue data[]={ + { "a", 0x10 }, + { "cc", 0x40 }, + { "e", 0x100 }, + { "ggg", 0x400 }, + { "i", 0x1000 }, + { "kkkk", 0x4000 }, + { "n", 0x10000 }, + { "ppppp", 0x40000 }, + { "r", 0x100000 }, + { "sss", 0x200000 }, + { "t", 0x400000 }, + { "uu", 0x800000 }, + { "vv", 0x7fffffff }, + { "zz", 0x80000000 } + }; + for(int32_t length=2; length<=LENGTHOF(data); ++length) { + infoln("TestBranches length=%d", (int)length); + checkData(data, length); + } +} + +void UCharsTrieTest::TestLongSequence() { + static const StringAndValue data[]={ + { "a", -1 }, + // sequence of linear-match nodes + { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 }, + // more than 256 units + { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 } + }; + checkData(data, LENGTHOF(data)); +} + +void UCharsTrieTest::TestLongBranch() { + // Split-branch and interesting compact-integer values. + static const StringAndValue data[]={ + { "a", -2 }, + { "b", -1 }, + { "c", 0 }, + { "d2", 1 }, + { "f", 0x3f }, + { "g", 0x40 }, + { "h", 0x41 }, + { "j23", 0x1900 }, + { "j24", 0x19ff }, + { "j25", 0x1a00 }, + { "k2", 0x1a80 }, + { "k3", 0x1aff }, + { "l234567890", 0x1b00 }, + { "l234567890123", 0x1b01 }, + { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff }, + { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 }, + { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 }, + { "r", 0x333333 }, + { "s2345", 0x4444444 }, + { "t234567890", 0x77777777 }, + { "z", 0x80000001 } + }; + checkData(data, LENGTHOF(data)); +} + +void UCharsTrieTest::TestValuesForState() { + // Check that saveState() and resetToState() interact properly + // with next() and current(). + static const StringAndValue data[]={ + { "a", -1 }, + { "ab", -2 }, + { "abc", -3 }, + { "abcd", -4 }, + { "abcde", -5 }, + { "abcdef", -6 } + }; + checkData(data, LENGTHOF(data)); +} + +void UCharsTrieTest::TestCompact() { + // Duplicate trailing strings and values provide opportunities for compacting. + static const StringAndValue data[]={ + { "+", 0 }, + { "+august", 8 }, + { "+december", 12 }, + { "+july", 7 }, + { "+june", 6 }, + { "+november", 11 }, + { "+october", 10 }, + { "+september", 9 }, + { "-", 0 }, + { "-august", 8 }, + { "-december", 12 }, + { "-july", 7 }, + { "-june", 6 }, + { "-november", 11 }, + { "-october", 10 }, + { "-september", 9 }, + // The l+n branch (with its sub-nodes) is a duplicate but will be written + // both times because each time it follows a different linear-match node. + { "xjuly", 7 }, + { "xjune", 6 } + }; + checkData(data, LENGTHOF(data)); +} + +void UCharsTrieTest::TestFirstForCodePoint() { + static const StringAndValue data[]={ + { "a", 1 }, + { "a\\ud800", 2 }, + { "a\\U00010000", 3 }, + { "\\ud840", 4 }, + { "\\U00020000\\udbff", 5 }, + { "\\U00020000\\U0010ffff", 6 }, + { "\\U00020000\\U0010ffffz", 7 }, + { "\\U00050000xy", 8 }, + { "\\U00050000xyz", 9 } + }; + checkData(data, LENGTHOF(data)); +} + +void UCharsTrieTest::TestNextForCodePoint() { + static const StringAndValue data[]={ + { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 }, + { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 }, + { "\\u4dff\\U000103ff", 99999 } + }; + LocalPointer trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); + if(trie.isNull()) { + return; // buildTrie() reported an error + } + UStringTrieResult result; + if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || + trie->getValue()!=2000000000 + ) { + errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s); + } + if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || + trie->getValue()!=44444 + ) { + errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s); + } + if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current() // no match for trail surrogate + ) { + errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222"); + } + if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || + (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || + trie->getValue()!=99999 + ) { + errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s); + } +} + +// Definitions in the anonymous namespace are invisible outside this file. +namespace { + +// Generate (string, value) pairs. +// The first string (before next()) will be empty. +class Generator { +public: + Generator() : value(4711), num(0) {} + void next() { + UChar c; + s.truncate(0); + s.append(c=(UChar)(value>>16)); + s.append((UChar)(value>>4)); + if(value&1) { + s.append((UChar)value); + } + set.add(c); + value+=((value>>5)&0x7ff)*3+1; + ++num; + } + const UnicodeString &getString() const { return s; } + int32_t getValue() const { return value; } + int32_t countUniqueFirstChars() const { return set.size(); } + int32_t getIndex() const { return num; } + +private: + UnicodeString s; + UnicodeSet set; + int32_t value; + int32_t num; +}; + +} // end namespace + +UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) { + IcuTestErrorCode errorCode(*this, "buildLargeTrie()"); + Generator gen; + builder_->clear(); + while(gen.countUniqueFirstChars()add(gen.getString(), gen.getValue(), errorCode); + gen.next(); + } + infoln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex()); + UnicodeString trieUChars; + builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); + logln("serialized trie size: %ld UChars\n", (long)trieUChars.length()); + return new UCharsTrie(trieUChars.getBuffer()); +} + +// Exercise a large branch node. +void UCharsTrieTest::TestLargeTrie() { + LocalPointer trie(buildLargeTrie(1111)); + if(trie.isNull()) { + return; // buildTrie() reported an error + } + Generator gen; + while(gen.countUniqueFirstChars()<1111) { + UnicodeString x(gen.getString()); + int32_t value=gen.getValue(); + if(!x.isEmpty()) { + if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) { + errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n", + x[0], (long)gen.getIndex()); + break; + } + x.remove(0, 1); + } + UStringTrieResult result=trie->next(x.getBuffer(), x.length()); + if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) { + errln("next(%d chars U+%04X U+%04X)!=hasValue or " + "next()!=current() or getValue() wrong " + "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex()); + break; + } + gen.next(); + } +} + +enum { + u_a=0x61, + u_b=0x62, + u_c=0x63, + u_j=0x6a, + u_n=0x6e, + u_r=0x72, + u_u=0x75, + u_y=0x79 +}; + +UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) { + // All types of nodes leading to the same value, + // for code coverage of recursive functions. + // In particular, we need a lot of branches on some single level + // to exercise a split-branch node. + static const StringAndValue data[]={ + { "august", 8 }, + { "jan", 1 }, + { "jan.", 1 }, + { "jana", 1 }, + { "janbb", 1 }, + { "janc", 1 }, + { "janddd", 1 }, + { "janee", 1 }, + { "janef", 1 }, + { "janf", 1 }, + { "jangg", 1 }, + { "janh", 1 }, + { "janiiii", 1 }, + { "janj", 1 }, + { "jankk", 1 }, + { "jankl", 1 }, + { "jankmm", 1 }, + { "janl", 1 }, + { "janm", 1 }, + { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, + { "jano", 1 }, + { "janpp", 1 }, + { "janqqq", 1 }, + { "janr", 1 }, + { "januar", 1 }, + { "january", 1 }, + { "july", 7 }, + { "jun", 6 }, + { "jun.", 6 }, + { "june", 6 } + }; + return buildTrie(data, LENGTHOF(data), buildOption); +} + +void UCharsTrieTest::TestHasUniqueValue() { + LocalPointer trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); + if(trie.isNull()) { + return; // buildTrie() reported an error + } + int32_t uniqueValue; + if(trie->hasUniqueValue(uniqueValue)) { + errln("unique value at root"); + } + trie->next(u_j); + trie->next(u_a); + trie->next(u_n); + // hasUniqueValue() directly after next() + if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) { + errln("not unique value 1 after \"jan\""); + } + trie->first(u_j); + trie->next(u_u); + if(trie->hasUniqueValue(uniqueValue)) { + errln("unique value after \"ju\""); + } + if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) { + errln("not normal value 6 after \"jun\""); + } + // hasUniqueValue() after getValue() + if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) { + errln("not unique value 6 after \"jun\""); + } + // hasUniqueValue() from within a linear-match node + trie->first(u_a); + trie->next(u_u); + if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) { + errln("not unique value 8 after \"au\""); + } +} + +void UCharsTrieTest::TestGetNextUChars() { + LocalPointer trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); + if(trie.isNull()) { + return; // buildTrie() reported an error + } + UnicodeString buffer; + UnicodeStringAppendable app(buffer); + int32_t count=trie->getNextUChars(app); + if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) { + errln("months getNextUChars()!=[aj] at root"); + } + trie->next(u_j); + trie->next(u_a); + trie->next(u_n); + // getNextUChars() directly after next() + buffer.remove(); + count=trie->getNextUChars(app); + if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) { + errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\""); + } + // getNextUChars() after getValue() + trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE. + buffer.remove(); + count=trie->getNextUChars(app); + if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) { + errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()"); + } + // getNextUChars() from a linear-match node + trie->next(u_u); + buffer.remove(); + count=trie->getNextUChars(app); + if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) { + errln("months getNextUChars()!=[a] after \"janu\""); + } + trie->next(u_a); + buffer.remove(); + count=trie->getNextUChars(app); + if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) { + errln("months getNextUChars()!=[r] after \"janua\""); + } + trie->next(u_r); + trie->next(u_y); + // getNextUChars() after a final match + buffer.remove(); + count=trie->getNextUChars(app); + if(count!=0 || buffer.length()!=0) { + errln("months getNextUChars()!=[] after \"january\""); + } +} + +void UCharsTrieTest::TestIteratorFromBranch() { + LocalPointer trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); + if(trie.isNull()) { + return; // buildTrie() reported an error + } + // Go to a branch node. + trie->next(u_j); + trie->next(u_a); + trie->next(u_n); + IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()"); + UCharsTrie::Iterator iter(*trie, 0, errorCode); + if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { + return; + } + // Expected data: Same as in buildMonthsTrie(), except only the suffixes + // following "jan". + static const StringAndValue data[]={ + { "", 1 }, + { ".", 1 }, + { "a", 1 }, + { "bb", 1 }, + { "c", 1 }, + { "ddd", 1 }, + { "ee", 1 }, + { "ef", 1 }, + { "f", 1 }, + { "gg", 1 }, + { "h", 1 }, + { "iiii", 1 }, + { "j", 1 }, + { "kk", 1 }, + { "kl", 1 }, + { "kmm", 1 }, + { "l", 1 }, + { "m", 1 }, + { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, + { "o", 1 }, + { "pp", 1 }, + { "qqq", 1 }, + { "r", 1 }, + { "uar", 1 }, + { "uary", 1 } + }; + checkIterator(iter, data, LENGTHOF(data)); + // Reset, and we should get the same result. + logln("after iter.reset()"); + checkIterator(iter.reset(), data, LENGTHOF(data)); +} + +void UCharsTrieTest::TestIteratorFromLinearMatch() { + LocalPointer trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); + if(trie.isNull()) { + return; // buildTrie() reported an error + } + // Go into a linear-match node. + trie->next(u_j); + trie->next(u_a); + trie->next(u_n); + trie->next(u_u); + trie->next(u_a); + IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()"); + UCharsTrie::Iterator iter(*trie, 0, errorCode); + if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { + return; + } + // Expected data: Same as in buildMonthsTrie(), except only the suffixes + // following "janua". + static const StringAndValue data[]={ + { "r", 1 }, + { "ry", 1 } + }; + checkIterator(iter, data, LENGTHOF(data)); + // Reset, and we should get the same result. + logln("after iter.reset()"); + checkIterator(iter.reset(), data, LENGTHOF(data)); +} + +void UCharsTrieTest::TestTruncatingIteratorFromRoot() { + LocalPointer trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); + if(trie.isNull()) { + return; // buildTrie() reported an error + } + IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()"); + UCharsTrie::Iterator iter(*trie, 4, errorCode); + if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { + return; + } + // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters + // of each string, and no string duplicates from the truncation. + static const StringAndValue data[]={ + { "augu", -1 }, + { "jan", 1 }, + { "jan.", 1 }, + { "jana", 1 }, + { "janb", -1 }, + { "janc", 1 }, + { "jand", -1 }, + { "jane", -1 }, + { "janf", 1 }, + { "jang", -1 }, + { "janh", 1 }, + { "jani", -1 }, + { "janj", 1 }, + { "jank", -1 }, + { "janl", 1 }, + { "janm", 1 }, + { "jann", -1 }, + { "jano", 1 }, + { "janp", -1 }, + { "janq", -1 }, + { "janr", 1 }, + { "janu", -1 }, + { "july", 7 }, + { "jun", 6 }, + { "jun.", 6 }, + { "june", 6 } + }; + checkIterator(iter, data, LENGTHOF(data)); + // Reset, and we should get the same result. + logln("after iter.reset()"); + checkIterator(iter.reset(), data, LENGTHOF(data)); +} + +void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() { + static const StringAndValue data[]={ + { "abcdef", 10 }, + { "abcdepq", 200 }, + { "abcdeyz", 3000 } + }; + LocalPointer trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); + if(trie.isNull()) { + return; // buildTrie() reported an error + } + // Go into a linear-match node. + trie->next(u_a); + trie->next(u_b); + IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()"); + // Truncate within the linear-match node. + UCharsTrie::Iterator iter(*trie, 2, errorCode); + if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { + return; + } + static const StringAndValue expected[]={ + { "cd", -1 } + }; + checkIterator(iter, expected, LENGTHOF(expected)); + // Reset, and we should get the same result. + logln("after iter.reset()"); + checkIterator(iter.reset(), expected, LENGTHOF(expected)); +} + +void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() { + static const StringAndValue data[]={ + { "abcdef", 10 }, + { "abcdepq", 200 }, + { "abcdeyz", 3000 } + }; + LocalPointer trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); + if(trie.isNull()) { + return; // buildTrie() reported an error + } + // Go into a linear-match node. + trie->next(u_a); + trie->next(u_b); + trie->next(u_c); + IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()"); + // Truncate after the linear-match node. + UCharsTrie::Iterator iter(*trie, 3, errorCode); + if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { + return; + } + static const StringAndValue expected[]={ + { "def", 10 }, + { "dep", -1 }, + { "dey", -1 } + }; + checkIterator(iter, expected, LENGTHOF(expected)); + // Reset, and we should get the same result. + logln("after iter.reset()"); + checkIterator(iter.reset(), expected, LENGTHOF(expected)); +} + +void UCharsTrieTest::TestIteratorFromUChars() { + static const StringAndValue data[]={ + { "mm", 3 }, + { "mmm", 33 }, + { "mmnop", 333 } + }; + builder_->clear(); + IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()"); + for(int32_t i=0; iadd(data[i].s, data[i].value, errorCode); + } + UnicodeString trieUChars; + builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); + UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode); + checkIterator(iter, data, LENGTHOF(data)); +} + +void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) { + logln("checkData(dataLength=%d, fast)", (int)dataLength); + checkData(data, dataLength, USTRINGTRIE_BUILD_FAST); + logln("checkData(dataLength=%d, small)", (int)dataLength); + checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL); +} + +void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) { + LocalPointer trie(buildTrie(data, dataLength, buildOption)); + if(trie.isNull()) { + return; // buildTrie() reported an error + } + checkFirst(*trie, data, dataLength); + checkNext(*trie, data, dataLength); + checkNextWithState(*trie, data, dataLength); + checkNextString(*trie, data, dataLength); + checkIterator(*trie, data, dataLength); +} + +UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength, + UStringTrieBuildOption buildOption) { + IcuTestErrorCode errorCode(*this, "buildTrie()"); + // Add the items to the trie builder in an interesting (not trivial, not random) order. + int32_t index, step; + if(dataLength&1) { + // Odd number of items. + index=dataLength/2; + step=2; + } else if((dataLength%3)!=0) { + // Not a multiple of 3. + index=dataLength/5; + step=3; + } else { + index=dataLength-1; + step=-1; + } + builder_->clear(); + for(int32_t i=0; iadd(UnicodeString(data[index].s, -1, US_INV).unescape(), + data[index].value, errorCode); + index=(index+step)%dataLength; + } + UnicodeString trieUChars; + builder_->buildUnicodeString(buildOption, trieUChars, errorCode); + LocalPointer trie(builder_->build(buildOption, errorCode)); + if(!errorCode.logIfFailureAndReset("add()/build()")) { + builder_->add("zzz", 999, errorCode); + if(errorCode.reset()!=U_NO_WRITE_PERMISSION) { + errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION"); + } + } + logln("serialized trie size: %ld UChars\n", (long)trieUChars.length()); + UnicodeString trieUChars2; + builder_->buildUnicodeString(buildOption, trieUChars2, errorCode); + if(trieUChars.getBuffer()==trieUChars2.getBuffer()) { + errln("builder.buildUnicodeString() before & after build() returned same array"); + } + if(errorCode.isFailure()) { + return NULL; + } + // Tries from either build() method should be identical but + // UCharsTrie does not implement equals(). + // We just return either one. + if((dataLength&1)!=0) { + return trie.orphan(); + } else { + return new UCharsTrie(trieUChars2.getBuffer()); + } +} + +void UCharsTrieTest::checkFirst(UCharsTrie &trie, + const StringAndValue data[], int32_t dataLength) { + for(int32_t i=0; i1 ? expectedString[1] : 0; + UStringTrieResult firstResult=trie.first(c); + int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1; + UStringTrieResult nextResult=trie.next(nextCp); + if(firstResult!=trie.reset().next(c) || + firstResult!=trie.current() || + firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) || + nextResult!=trie.next(nextCp) + ) { + errln("trie.first(U+%04X)!=trie.reset().next(same) for %s", + c, data[i].s); + } + c=expectedString.char32At(0); + int32_t cLength=U16_LENGTH(c); + nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0; + firstResult=trie.firstForCodePoint(c); + firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1; + nextResult=trie.nextForCodePoint(nextCp); + if(firstResult!=trie.reset().nextForCodePoint(c) || + firstResult!=trie.current() || + firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) || + nextResult!=trie.nextForCodePoint(nextCp) + ) { + errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s", + c, data[i].s); + } + } + trie.reset(); +} + +void UCharsTrieTest::checkNext(UCharsTrie &trie, + const StringAndValue data[], int32_t dataLength) { + UCharsTrie::State state; + for(int32_t i=0; i