X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b25be06635768807f8f693286fa73bb2297bb06c..4388f060552cc537e71e957d32f35e9d75a61233:/icuSources/test/intltest/bytestrietest.cpp diff --git a/icuSources/test/intltest/bytestrietest.cpp b/icuSources/test/intltest/bytestrietest.cpp new file mode 100644 index 00000000..12101d2f --- /dev/null +++ b/icuSources/test/intltest/bytestrietest.cpp @@ -0,0 +1,880 @@ +/* +******************************************************************************* +* Copyright (C) 2010-2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: bytetrietest.cpp +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010nov16 +* created by: Markus W. Scherer +*/ + +#include + +#include "unicode/utypes.h" +#include "unicode/bytestrie.h" +#include "unicode/bytestriebuilder.h" +#include "unicode/localpointer.h" +#include "unicode/stringpiece.h" +#include "intltest.h" + +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) + +struct StringAndValue { + const char *s; + int32_t value; +}; + +class BytesTrieTest : public IntlTest { +public: + BytesTrieTest(); + virtual ~BytesTrieTest(); + + void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); + void TestBuilder(); + void TestEmpty(); + void Test_a(); + void Test_a_ab(); + void TestShortestBranch(); + void TestBranches(); + void TestLongSequence(); + void TestLongBranch(); + void TestValuesForState(); + void TestCompact(); + + BytesTrie *buildMonthsTrie(UStringTrieBuildOption buildOption); + void TestHasUniqueValue(); + void TestGetNextBytes(); + void TestIteratorFromBranch(); + void TestIteratorFromLinearMatch(); + void TestTruncatingIteratorFromRoot(); + void TestTruncatingIteratorFromLinearMatchShort(); + void TestTruncatingIteratorFromLinearMatchLong(); + void TestIteratorFromBytes(); + + void checkData(const StringAndValue data[], int32_t dataLength); + void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption); + BytesTrie *buildTrie(const StringAndValue data[], int32_t dataLength, + UStringTrieBuildOption buildOption); + void checkFirst(BytesTrie &trie, const StringAndValue data[], int32_t dataLength); + void checkNext(BytesTrie &trie, const StringAndValue data[], int32_t dataLength); + void checkNextWithState(BytesTrie &trie, const StringAndValue data[], int32_t dataLength); + void checkNextString(BytesTrie &trie, const StringAndValue data[], int32_t dataLength); + void checkIterator(const BytesTrie &trie, const StringAndValue data[], int32_t dataLength); + void checkIterator(BytesTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength); + +private: + BytesTrieBuilder *builder_; +}; + +extern IntlTest *createBytesTrieTest() { + return new BytesTrieTest(); +} + +BytesTrieTest::BytesTrieTest() : builder_(NULL) { + IcuTestErrorCode errorCode(*this, "BytesTrieTest()"); + builder_=new BytesTrieBuilder(errorCode); +} + +BytesTrieTest::~BytesTrieTest() { + delete builder_; +} + +void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { + if(exec) { + logln("TestSuite BytesTrieTest: "); + } + TESTCASE_AUTO_BEGIN; + TESTCASE_AUTO(TestBuilder); + TESTCASE_AUTO(TestEmpty); + TESTCASE_AUTO(Test_a); + TESTCASE_AUTO(Test_a_ab); + TESTCASE_AUTO(TestShortestBranch); + TESTCASE_AUTO(TestBranches); + TESTCASE_AUTO(TestLongSequence); + TESTCASE_AUTO(TestLongBranch); + TESTCASE_AUTO(TestValuesForState); + TESTCASE_AUTO(TestCompact); + TESTCASE_AUTO(TestHasUniqueValue); + TESTCASE_AUTO(TestGetNextBytes); + TESTCASE_AUTO(TestIteratorFromBranch); + TESTCASE_AUTO(TestIteratorFromLinearMatch); + TESTCASE_AUTO(TestTruncatingIteratorFromRoot); + TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort); + TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong); + TESTCASE_AUTO(TestIteratorFromBytes); + TESTCASE_AUTO_END; +} + +void BytesTrieTest::TestBuilder() { + IcuTestErrorCode errorCode(*this, "TestBuilder()"); + builder_->clear(); + delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode); + if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) { + errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR"); + return; + } + // TODO: remove .build(...) once add() checks for duplicates. + builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode); + if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) { + errln("BytesTrieBuilder.add() did not detect duplicates"); + return; + } +} + +void BytesTrieTest::TestEmpty() { + static const StringAndValue data[]={ + { "", 0 } + }; + checkData(data, LENGTHOF(data)); +} + +void BytesTrieTest::Test_a() { + static const StringAndValue data[]={ + { "a", 1 } + }; + checkData(data, LENGTHOF(data)); +} + +void BytesTrieTest::Test_a_ab() { + static const StringAndValue data[]={ + { "a", 1 }, + { "ab", 100 } + }; + checkData(data, LENGTHOF(data)); +} + +void BytesTrieTest::TestShortestBranch() { + static const StringAndValue data[]={ + { "a", 1000 }, + { "b", 2000 } + }; + checkData(data, LENGTHOF(data)); +} + +void BytesTrieTest::TestBranches() { + static const StringAndValue data[]={ + { "a", 0x10 }, + { "cc", 0x40 }, + { "e", 0x100 }, + { "ggg", 0x400 }, + { "i", 0x1000 }, + { "kkkk", 0x4000 }, + { "n", 0x10000 }, + { "ppppp", 0x40000 }, + { "r", 0x100000 }, + { "sss", 0x200000 }, + { "t", 0x400000 }, + { "uu", 0x800000 }, + { "vv", 0x7fffffff }, + { "zz", 0x80000000 } + }; + for(int32_t length=2; length<=LENGTHOF(data); ++length) { + infoln("TestBranches length=%d", (int)length); + checkData(data, length); + } +} + +void BytesTrieTest::TestLongSequence() { + static const StringAndValue data[]={ + { "a", -1 }, + // sequence of linear-match nodes + { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 }, + // more than 256 bytes + { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 } + }; + checkData(data, LENGTHOF(data)); +} + +void BytesTrieTest::TestLongBranch() { + // Split-branch and interesting compact-integer values. + static const StringAndValue data[]={ + { "a", -2 }, + { "b", -1 }, + { "c", 0 }, + { "d2", 1 }, + { "f", 0x3f }, + { "g", 0x40 }, + { "h", 0x41 }, + { "j23", 0x1900 }, + { "j24", 0x19ff }, + { "j25", 0x1a00 }, + { "k2", 0x1a80 }, + { "k3", 0x1aff }, + { "l234567890", 0x1b00 }, + { "l234567890123", 0x1b01 }, + { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff }, + { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 }, + { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 }, + { "r", 0x333333 }, + { "s2345", 0x4444444 }, + { "t234567890", 0x77777777 }, + { "z", 0x80000001 } + }; + checkData(data, LENGTHOF(data)); +} + +void BytesTrieTest::TestValuesForState() { + // Check that saveState() and resetToState() interact properly + // with next() and current(). + static const StringAndValue data[]={ + { "a", -1 }, + { "ab", -2 }, + { "abc", -3 }, + { "abcd", -4 }, + { "abcde", -5 }, + { "abcdef", -6 } + }; + checkData(data, LENGTHOF(data)); +} + +void BytesTrieTest::TestCompact() { + // Duplicate trailing strings and values provide opportunities for compacting. + static const StringAndValue data[]={ + { "+", 0 }, + { "+august", 8 }, + { "+december", 12 }, + { "+july", 7 }, + { "+june", 6 }, + { "+november", 11 }, + { "+october", 10 }, + { "+september", 9 }, + { "-", 0 }, + { "-august", 8 }, + { "-december", 12 }, + { "-july", 7 }, + { "-june", 6 }, + { "-november", 11 }, + { "-october", 10 }, + { "-september", 9 }, + // The l+n branch (with its sub-nodes) is a duplicate but will be written + // both times because each time it follows a different linear-match node. + { "xjuly", 7 }, + { "xjune", 6 } + }; + checkData(data, LENGTHOF(data)); +} + +BytesTrie *BytesTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) { + // All types of nodes leading to the same value, + // for code coverage of recursive functions. + // In particular, we need a lot of branches on some single level + // to exercise a split-branch node. + static const StringAndValue data[]={ + { "august", 8 }, + { "jan", 1 }, + { "jan.", 1 }, + { "jana", 1 }, + { "janbb", 1 }, + { "janc", 1 }, + { "janddd", 1 }, + { "janee", 1 }, + { "janef", 1 }, + { "janf", 1 }, + { "jangg", 1 }, + { "janh", 1 }, + { "janiiii", 1 }, + { "janj", 1 }, + { "jankk", 1 }, + { "jankl", 1 }, + { "jankmm", 1 }, + { "janl", 1 }, + { "janm", 1 }, + { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, + { "jano", 1 }, + { "janpp", 1 }, + { "janqqq", 1 }, + { "janr", 1 }, + { "januar", 1 }, + { "january", 1 }, + { "july", 7 }, + { "jun", 6 }, + { "jun.", 6 }, + { "june", 6 } + }; + return buildTrie(data, LENGTHOF(data), buildOption); +} + +void BytesTrieTest::TestHasUniqueValue() { + LocalPointer trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); + if(trie.isNull()) { + return; // buildTrie() reported an error + } + int32_t uniqueValue; + if(trie->hasUniqueValue(uniqueValue)) { + errln("unique value at root"); + } + trie->next('j'); + trie->next('a'); + trie->next('n'); + // hasUniqueValue() directly after next() + if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) { + errln("not unique value 1 after \"jan\""); + } + trie->first('j'); + trie->next('u'); + if(trie->hasUniqueValue(uniqueValue)) { + errln("unique value after \"ju\""); + } + if(trie->next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) { + errln("not normal value 6 after \"jun\""); + } + // hasUniqueValue() after getValue() + if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) { + errln("not unique value 6 after \"jun\""); + } + // hasUniqueValue() from within a linear-match node + trie->first('a'); + trie->next('u'); + if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) { + errln("not unique value 8 after \"au\""); + } +} + +void BytesTrieTest::TestGetNextBytes() { + LocalPointer trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); + if(trie.isNull()) { + return; // buildTrie() reported an error + } + char buffer[40]; + CheckedArrayByteSink sink(buffer, LENGTHOF(buffer)); + int32_t count=trie->getNextBytes(sink); + if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') { + errln("months getNextBytes()!=[aj] at root"); + } + trie->next('j'); + trie->next('a'); + trie->next('n'); + // getNextBytes() directly after next() + count=trie->getNextBytes(sink.Reset()); + buffer[count]=0; + if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) { + errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\""); + } + // getNextBytes() after getValue() + trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE. + memset(buffer, 0, sizeof(buffer)); + count=trie->getNextBytes(sink.Reset()); + if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) { + errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()"); + } + // getNextBytes() from a linear-match node + trie->next('u'); + memset(buffer, 0, sizeof(buffer)); + count=trie->getNextBytes(sink.Reset()); + if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') { + errln("months getNextBytes()!=[a] after \"janu\""); + } + trie->next('a'); + memset(buffer, 0, sizeof(buffer)); + count=trie->getNextBytes(sink.Reset()); + if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') { + errln("months getNextBytes()!=[r] after \"janua\""); + } + trie->next('r'); + trie->next('y'); + // getNextBytes() after a final match + count=trie->getNextBytes(sink.Reset()); + if(count!=0 || sink.NumberOfBytesAppended()!=0) { + errln("months getNextBytes()!=[] after \"january\""); + } +} + +void BytesTrieTest::TestIteratorFromBranch() { + LocalPointer trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); + if(trie.isNull()) { + return; // buildTrie() reported an error + } + // Go to a branch node. + trie->next('j'); + trie->next('a'); + trie->next('n'); + IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()"); + BytesTrie::Iterator iter(*trie, 0, errorCode); + if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { + return; + } + // Expected data: Same as in buildMonthsTrie(), except only the suffixes + // following "jan". + static const StringAndValue data[]={ + { "", 1 }, + { ".", 1 }, + { "a", 1 }, + { "bb", 1 }, + { "c", 1 }, + { "ddd", 1 }, + { "ee", 1 }, + { "ef", 1 }, + { "f", 1 }, + { "gg", 1 }, + { "h", 1 }, + { "iiii", 1 }, + { "j", 1 }, + { "kk", 1 }, + { "kl", 1 }, + { "kmm", 1 }, + { "l", 1 }, + { "m", 1 }, + { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, + { "o", 1 }, + { "pp", 1 }, + { "qqq", 1 }, + { "r", 1 }, + { "uar", 1 }, + { "uary", 1 } + }; + checkIterator(iter, data, LENGTHOF(data)); + // Reset, and we should get the same result. + logln("after iter.reset()"); + checkIterator(iter.reset(), data, LENGTHOF(data)); +} + +void BytesTrieTest::TestIteratorFromLinearMatch() { + LocalPointer trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); + if(trie.isNull()) { + return; // buildTrie() reported an error + } + // Go into a linear-match node. + trie->next('j'); + trie->next('a'); + trie->next('n'); + trie->next('u'); + trie->next('a'); + IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()"); + BytesTrie::Iterator iter(*trie, 0, errorCode); + if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { + return; + } + // Expected data: Same as in buildMonthsTrie(), except only the suffixes + // following "janua". + static const StringAndValue data[]={ + { "r", 1 }, + { "ry", 1 } + }; + checkIterator(iter, data, LENGTHOF(data)); + // Reset, and we should get the same result. + logln("after iter.reset()"); + checkIterator(iter.reset(), data, LENGTHOF(data)); +} + +void BytesTrieTest::TestTruncatingIteratorFromRoot() { + LocalPointer trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); + if(trie.isNull()) { + return; // buildTrie() reported an error + } + IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()"); + BytesTrie::Iterator iter(*trie, 4, errorCode); + if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { + return; + } + // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters + // of each string, and no string duplicates from the truncation. + static const StringAndValue data[]={ + { "augu", -1 }, + { "jan", 1 }, + { "jan.", 1 }, + { "jana", 1 }, + { "janb", -1 }, + { "janc", 1 }, + { "jand", -1 }, + { "jane", -1 }, + { "janf", 1 }, + { "jang", -1 }, + { "janh", 1 }, + { "jani", -1 }, + { "janj", 1 }, + { "jank", -1 }, + { "janl", 1 }, + { "janm", 1 }, + { "jann", -1 }, + { "jano", 1 }, + { "janp", -1 }, + { "janq", -1 }, + { "janr", 1 }, + { "janu", -1 }, + { "july", 7 }, + { "jun", 6 }, + { "jun.", 6 }, + { "june", 6 } + }; + checkIterator(iter, data, LENGTHOF(data)); + // Reset, and we should get the same result. + logln("after iter.reset()"); + checkIterator(iter.reset(), data, LENGTHOF(data)); +} + +void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() { + static const StringAndValue data[]={ + { "abcdef", 10 }, + { "abcdepq", 200 }, + { "abcdeyz", 3000 } + }; + LocalPointer trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); + if(trie.isNull()) { + return; // buildTrie() reported an error + } + // Go into a linear-match node. + trie->next('a'); + trie->next('b'); + IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()"); + // Truncate within the linear-match node. + BytesTrie::Iterator iter(*trie, 2, errorCode); + if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { + return; + } + static const StringAndValue expected[]={ + { "cd", -1 } + }; + checkIterator(iter, expected, LENGTHOF(expected)); + // Reset, and we should get the same result. + logln("after iter.reset()"); + checkIterator(iter.reset(), expected, LENGTHOF(expected)); +} + +void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() { + static const StringAndValue data[]={ + { "abcdef", 10 }, + { "abcdepq", 200 }, + { "abcdeyz", 3000 } + }; + LocalPointer trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); + if(trie.isNull()) { + return; // buildTrie() reported an error + } + // Go into a linear-match node. + trie->next('a'); + trie->next('b'); + trie->next('c'); + IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()"); + // Truncate after the linear-match node. + BytesTrie::Iterator iter(*trie, 3, errorCode); + if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { + return; + } + static const StringAndValue expected[]={ + { "def", 10 }, + { "dep", -1 }, + { "dey", -1 } + }; + checkIterator(iter, expected, LENGTHOF(expected)); + // Reset, and we should get the same result. + logln("after iter.reset()"); + checkIterator(iter.reset(), expected, LENGTHOF(expected)); +} + +void BytesTrieTest::TestIteratorFromBytes() { + static const StringAndValue data[]={ + { "mm", 3 }, + { "mmm", 33 }, + { "mmnop", 333 } + }; + builder_->clear(); + IcuTestErrorCode errorCode(*this, "TestIteratorFromBytes()"); + for(int32_t i=0; iadd(data[i].s, data[i].value, errorCode); + } + StringPiece trieBytes=builder_->buildStringPiece(USTRINGTRIE_BUILD_FAST, errorCode); + BytesTrie::Iterator iter(trieBytes.data(), 0, errorCode); + checkIterator(iter, data, LENGTHOF(data)); +} + +void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) { + logln("checkData(dataLength=%d, fast)", (int)dataLength); + checkData(data, dataLength, USTRINGTRIE_BUILD_FAST); + logln("checkData(dataLength=%d, small)", (int)dataLength); + checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL); +} + +void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) { + LocalPointer trie(buildTrie(data, dataLength, buildOption)); + if(trie.isNull()) { + return; // buildTrie() reported an error + } + checkFirst(*trie, data, dataLength); + checkNext(*trie, data, dataLength); + checkNextWithState(*trie, data, dataLength); + checkNextString(*trie, data, dataLength); + checkIterator(*trie, data, dataLength); +} + +BytesTrie *BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength, + UStringTrieBuildOption buildOption) { + IcuTestErrorCode errorCode(*this, "buildTrie()"); + // Add the items to the trie builder in an interesting (not trivial, not random) order. + int32_t index, step; + if(dataLength&1) { + // Odd number of items. + index=dataLength/2; + step=2; + } else if((dataLength%3)!=0) { + // Not a multiple of 3. + index=dataLength/5; + step=3; + } else { + index=dataLength-1; + step=-1; + } + builder_->clear(); + for(int32_t i=0; iadd(data[index].s, data[index].value, errorCode); + index=(index+step)%dataLength; + } + StringPiece sp=builder_->buildStringPiece(buildOption, errorCode); + LocalPointer trie(builder_->build(buildOption, errorCode)); + if(!errorCode.logIfFailureAndReset("add()/build()")) { + builder_->add("zzz", 999, errorCode); + if(errorCode.reset()!=U_NO_WRITE_PERMISSION) { + errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION"); + } + } + logln("serialized trie size: %ld bytes\n", (long)sp.length()); + StringPiece sp2=builder_->buildStringPiece(buildOption, errorCode); + if(sp.data()==sp2.data()) { + errln("builder.buildStringPiece() before & after build() returned same array"); + } + if(errorCode.isFailure()) { + return NULL; + } + // Tries from either build() method should be identical but + // BytesTrie does not implement equals(). + // We just return either one. + if((dataLength&1)!=0) { + return trie.orphan(); + } else { + return new BytesTrie(sp2.data()); + } +} + +void BytesTrieTest::checkFirst(BytesTrie &trie, + const StringAndValue data[], int32_t dataLength) { + for(int32_t i=0; i