]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/intltest/ucharstrietest.cpp
ICU-59131.0.1.tar.gz
[apple/icu.git] / icuSources / test / intltest / ucharstrietest.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
4388f060
A
3/*
4*******************************************************************************
b331163b 5* Copyright (C) 2010-2014, International Business Machines
4388f060
A
6* Corporation and others. All Rights Reserved.
7*******************************************************************************
8* file name: ucharstrietest.cpp
f3c0d7a5 9* encoding: UTF-8
4388f060
A
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2010nov16
14* created by: Markus W. Scherer
15*/
16
17#include <string.h>
18
19#include "unicode/utypes.h"
20#include "unicode/appendable.h"
21#include "unicode/localpointer.h"
22#include "unicode/ucharstrie.h"
23#include "unicode/ucharstriebuilder.h"
24#include "unicode/uniset.h"
25#include "unicode/unistr.h"
26#include "intltest.h"
b331163b 27#include "cmemory.h"
4388f060
A
28
29struct StringAndValue {
30 const char *s;
31 int32_t value;
32};
33
34class UCharsTrieTest : public IntlTest {
35public:
36 UCharsTrieTest();
37 virtual ~UCharsTrieTest();
38
39 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
40 void TestBuilder();
41 void TestEmpty();
42 void Test_a();
43 void Test_a_ab();
44 void TestShortestBranch();
45 void TestBranches();
46 void TestLongSequence();
47 void TestLongBranch();
48 void TestValuesForState();
49 void TestCompact();
50 void TestFirstForCodePoint();
51 void TestNextForCodePoint();
52
53 UCharsTrie *buildLargeTrie(int32_t numUniqueFirst);
54 void TestLargeTrie();
55
56 UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
57 void TestHasUniqueValue();
58 void TestGetNextUChars();
59 void TestIteratorFromBranch();
60 void TestIteratorFromLinearMatch();
61 void TestTruncatingIteratorFromRoot();
62 void TestTruncatingIteratorFromLinearMatchShort();
63 void TestTruncatingIteratorFromLinearMatchLong();
64 void TestIteratorFromUChars();
65
66 void checkData(const StringAndValue data[], int32_t dataLength);
67 void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
68 UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
69 UStringTrieBuildOption buildOption);
70 void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
71 void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
72 void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
73 void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
74 void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
75 void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
76
77private:
78 UCharsTrieBuilder *builder_;
79};
80
81extern IntlTest *createUCharsTrieTest() {
82 return new UCharsTrieTest();
83}
84
85UCharsTrieTest::UCharsTrieTest() : builder_(NULL) {
86 IcuTestErrorCode errorCode(*this, "UCharsTrieTest()");
87 builder_=new UCharsTrieBuilder(errorCode);
88}
89
90UCharsTrieTest::~UCharsTrieTest() {
91 delete builder_;
92}
93
94void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
95 if(exec) {
96 logln("TestSuite UCharsTrieTest: ");
97 }
98 TESTCASE_AUTO_BEGIN;
99 TESTCASE_AUTO(TestBuilder);
100 TESTCASE_AUTO(TestEmpty);
101 TESTCASE_AUTO(Test_a);
102 TESTCASE_AUTO(Test_a_ab);
103 TESTCASE_AUTO(TestShortestBranch);
104 TESTCASE_AUTO(TestBranches);
105 TESTCASE_AUTO(TestLongSequence);
106 TESTCASE_AUTO(TestLongBranch);
107 TESTCASE_AUTO(TestValuesForState);
108 TESTCASE_AUTO(TestCompact);
109 TESTCASE_AUTO(TestFirstForCodePoint);
110 TESTCASE_AUTO(TestNextForCodePoint);
111 TESTCASE_AUTO(TestLargeTrie);
112 TESTCASE_AUTO(TestHasUniqueValue);
113 TESTCASE_AUTO(TestGetNextUChars);
114 TESTCASE_AUTO(TestIteratorFromBranch);
115 TESTCASE_AUTO(TestIteratorFromLinearMatch);
116 TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
117 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
118 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
119 TESTCASE_AUTO(TestIteratorFromUChars);
120 TESTCASE_AUTO_END;
121}
122
123void UCharsTrieTest::TestBuilder() {
124 IcuTestErrorCode errorCode(*this, "TestBuilder()");
125 delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
126 if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
127 errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
128 return;
129 }
130 // TODO: remove .build(...) once add() checks for duplicates.
131 builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
132 if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
133 errln("UCharsTrieBuilder.add() did not detect duplicates");
134 return;
135 }
136}
137
138void UCharsTrieTest::TestEmpty() {
139 static const StringAndValue data[]={
140 { "", 0 }
141 };
b331163b 142 checkData(data, UPRV_LENGTHOF(data));
4388f060
A
143}
144
145void UCharsTrieTest::Test_a() {
146 static const StringAndValue data[]={
147 { "a", 1 }
148 };
b331163b 149 checkData(data, UPRV_LENGTHOF(data));
4388f060
A
150}
151
152void UCharsTrieTest::Test_a_ab() {
153 static const StringAndValue data[]={
154 { "a", 1 },
155 { "ab", 100 }
156 };
b331163b 157 checkData(data, UPRV_LENGTHOF(data));
4388f060
A
158}
159
160void UCharsTrieTest::TestShortestBranch() {
161 static const StringAndValue data[]={
162 { "a", 1000 },
163 { "b", 2000 }
164 };
b331163b 165 checkData(data, UPRV_LENGTHOF(data));
4388f060
A
166}
167
168void UCharsTrieTest::TestBranches() {
169 static const StringAndValue data[]={
170 { "a", 0x10 },
171 { "cc", 0x40 },
172 { "e", 0x100 },
173 { "ggg", 0x400 },
174 { "i", 0x1000 },
175 { "kkkk", 0x4000 },
176 { "n", 0x10000 },
177 { "ppppp", 0x40000 },
178 { "r", 0x100000 },
179 { "sss", 0x200000 },
180 { "t", 0x400000 },
181 { "uu", 0x800000 },
182 { "vv", 0x7fffffff },
51004dcb 183 { "zz", (int32_t)0x80000000 }
4388f060 184 };
b331163b 185 for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) {
57a6839d 186 logln("TestBranches length=%d", (int)length);
4388f060
A
187 checkData(data, length);
188 }
189}
190
191void UCharsTrieTest::TestLongSequence() {
192 static const StringAndValue data[]={
193 { "a", -1 },
194 // sequence of linear-match nodes
195 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
196 // more than 256 units
197 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
198 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
199 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
200 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
201 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
202 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
203 };
b331163b 204 checkData(data, UPRV_LENGTHOF(data));
4388f060
A
205}
206
207void UCharsTrieTest::TestLongBranch() {
208 // Split-branch and interesting compact-integer values.
209 static const StringAndValue data[]={
210 { "a", -2 },
211 { "b", -1 },
212 { "c", 0 },
213 { "d2", 1 },
214 { "f", 0x3f },
215 { "g", 0x40 },
216 { "h", 0x41 },
217 { "j23", 0x1900 },
218 { "j24", 0x19ff },
219 { "j25", 0x1a00 },
220 { "k2", 0x1a80 },
221 { "k3", 0x1aff },
222 { "l234567890", 0x1b00 },
223 { "l234567890123", 0x1b01 },
224 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
225 { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
226 { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
227 { "r", 0x333333 },
228 { "s2345", 0x4444444 },
229 { "t234567890", 0x77777777 },
51004dcb 230 { "z", (int32_t)0x80000001 }
4388f060 231 };
b331163b 232 checkData(data, UPRV_LENGTHOF(data));
4388f060
A
233}
234
235void UCharsTrieTest::TestValuesForState() {
236 // Check that saveState() and resetToState() interact properly
237 // with next() and current().
238 static const StringAndValue data[]={
239 { "a", -1 },
240 { "ab", -2 },
241 { "abc", -3 },
242 { "abcd", -4 },
243 { "abcde", -5 },
244 { "abcdef", -6 }
245 };
b331163b 246 checkData(data, UPRV_LENGTHOF(data));
4388f060
A
247}
248
249void UCharsTrieTest::TestCompact() {
250 // Duplicate trailing strings and values provide opportunities for compacting.
251 static const StringAndValue data[]={
252 { "+", 0 },
253 { "+august", 8 },
254 { "+december", 12 },
255 { "+july", 7 },
256 { "+june", 6 },
257 { "+november", 11 },
258 { "+october", 10 },
259 { "+september", 9 },
260 { "-", 0 },
261 { "-august", 8 },
262 { "-december", 12 },
263 { "-july", 7 },
264 { "-june", 6 },
265 { "-november", 11 },
266 { "-october", 10 },
267 { "-september", 9 },
268 // The l+n branch (with its sub-nodes) is a duplicate but will be written
269 // both times because each time it follows a different linear-match node.
270 { "xjuly", 7 },
271 { "xjune", 6 }
272 };
b331163b 273 checkData(data, UPRV_LENGTHOF(data));
4388f060
A
274}
275
276void UCharsTrieTest::TestFirstForCodePoint() {
277 static const StringAndValue data[]={
278 { "a", 1 },
279 { "a\\ud800", 2 },
280 { "a\\U00010000", 3 },
281 { "\\ud840", 4 },
282 { "\\U00020000\\udbff", 5 },
283 { "\\U00020000\\U0010ffff", 6 },
284 { "\\U00020000\\U0010ffffz", 7 },
285 { "\\U00050000xy", 8 },
286 { "\\U00050000xyz", 9 }
287 };
b331163b 288 checkData(data, UPRV_LENGTHOF(data));
4388f060
A
289}
290
291void UCharsTrieTest::TestNextForCodePoint() {
292 static const StringAndValue data[]={
293 { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 },
294 { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 },
295 { "\\u4dff\\U000103ff", 99999 }
296 };
b331163b 297 LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
4388f060
A
298 if(trie.isNull()) {
299 return; // buildTrie() reported an error
300 }
301 UStringTrieResult result;
302 if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
303 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
304 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
305 (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
306 (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
307 (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
308 trie->getValue()!=2000000000
309 ) {
310 errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s);
311 }
312 if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
313 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
314 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
315 (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
316 trie->getValue()!=44444
317 ) {
318 errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s);
319 }
320 if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
321 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
322 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
323 (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current() // no match for trail surrogate
324 ) {
325 errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222");
326 }
327 if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
328 (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
329 trie->getValue()!=99999
330 ) {
331 errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s);
332 }
333}
334
335// Definitions in the anonymous namespace are invisible outside this file.
336namespace {
337
338// Generate (string, value) pairs.
339// The first string (before next()) will be empty.
340class Generator {
341public:
342 Generator() : value(4711), num(0) {}
343 void next() {
344 UChar c;
345 s.truncate(0);
346 s.append(c=(UChar)(value>>16));
347 s.append((UChar)(value>>4));
348 if(value&1) {
349 s.append((UChar)value);
350 }
351 set.add(c);
352 value+=((value>>5)&0x7ff)*3+1;
353 ++num;
354 }
355 const UnicodeString &getString() const { return s; }
356 int32_t getValue() const { return value; }
357 int32_t countUniqueFirstChars() const { return set.size(); }
358 int32_t getIndex() const { return num; }
359
360private:
361 UnicodeString s;
362 UnicodeSet set;
363 int32_t value;
364 int32_t num;
365};
366
367} // end namespace
368
369UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) {
370 IcuTestErrorCode errorCode(*this, "buildLargeTrie()");
371 Generator gen;
372 builder_->clear();
373 while(gen.countUniqueFirstChars()<numUniqueFirst) {
374 builder_->add(gen.getString(), gen.getValue(), errorCode);
375 gen.next();
376 }
57a6839d 377 logln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex());
4388f060
A
378 UnicodeString trieUChars;
379 builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
380 logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
381 return new UCharsTrie(trieUChars.getBuffer());
382}
383
384// Exercise a large branch node.
385void UCharsTrieTest::TestLargeTrie() {
386 LocalPointer<UCharsTrie> trie(buildLargeTrie(1111));
387 if(trie.isNull()) {
388 return; // buildTrie() reported an error
389 }
390 Generator gen;
391 while(gen.countUniqueFirstChars()<1111) {
392 UnicodeString x(gen.getString());
393 int32_t value=gen.getValue();
394 if(!x.isEmpty()) {
395 if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) {
396 errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n",
397 x[0], (long)gen.getIndex());
398 break;
399 }
400 x.remove(0, 1);
401 }
402 UStringTrieResult result=trie->next(x.getBuffer(), x.length());
403 if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) {
404 errln("next(%d chars U+%04X U+%04X)!=hasValue or "
405 "next()!=current() or getValue() wrong "
406 "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex());
407 break;
408 }
409 gen.next();
410 }
411}
412
413enum {
414 u_a=0x61,
415 u_b=0x62,
416 u_c=0x63,
417 u_j=0x6a,
418 u_n=0x6e,
419 u_r=0x72,
420 u_u=0x75,
421 u_y=0x79
422};
423
424UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
425 // All types of nodes leading to the same value,
426 // for code coverage of recursive functions.
427 // In particular, we need a lot of branches on some single level
428 // to exercise a split-branch node.
429 static const StringAndValue data[]={
430 { "august", 8 },
431 { "jan", 1 },
432 { "jan.", 1 },
433 { "jana", 1 },
434 { "janbb", 1 },
435 { "janc", 1 },
436 { "janddd", 1 },
437 { "janee", 1 },
438 { "janef", 1 },
439 { "janf", 1 },
440 { "jangg", 1 },
441 { "janh", 1 },
442 { "janiiii", 1 },
443 { "janj", 1 },
444 { "jankk", 1 },
445 { "jankl", 1 },
446 { "jankmm", 1 },
447 { "janl", 1 },
448 { "janm", 1 },
449 { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
450 { "jano", 1 },
451 { "janpp", 1 },
452 { "janqqq", 1 },
453 { "janr", 1 },
454 { "januar", 1 },
455 { "january", 1 },
456 { "july", 7 },
457 { "jun", 6 },
458 { "jun.", 6 },
459 { "june", 6 }
460 };
b331163b 461 return buildTrie(data, UPRV_LENGTHOF(data), buildOption);
4388f060
A
462}
463
464void UCharsTrieTest::TestHasUniqueValue() {
465 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
466 if(trie.isNull()) {
467 return; // buildTrie() reported an error
468 }
469 int32_t uniqueValue;
470 if(trie->hasUniqueValue(uniqueValue)) {
471 errln("unique value at root");
472 }
473 trie->next(u_j);
474 trie->next(u_a);
475 trie->next(u_n);
476 // hasUniqueValue() directly after next()
477 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
478 errln("not unique value 1 after \"jan\"");
479 }
480 trie->first(u_j);
481 trie->next(u_u);
482 if(trie->hasUniqueValue(uniqueValue)) {
483 errln("unique value after \"ju\"");
484 }
485 if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
486 errln("not normal value 6 after \"jun\"");
487 }
488 // hasUniqueValue() after getValue()
489 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
490 errln("not unique value 6 after \"jun\"");
491 }
492 // hasUniqueValue() from within a linear-match node
493 trie->first(u_a);
494 trie->next(u_u);
495 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
496 errln("not unique value 8 after \"au\"");
497 }
498}
499
500void UCharsTrieTest::TestGetNextUChars() {
501 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
502 if(trie.isNull()) {
503 return; // buildTrie() reported an error
504 }
505 UnicodeString buffer;
506 UnicodeStringAppendable app(buffer);
507 int32_t count=trie->getNextUChars(app);
508 if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) {
509 errln("months getNextUChars()!=[aj] at root");
510 }
511 trie->next(u_j);
512 trie->next(u_a);
513 trie->next(u_n);
514 // getNextUChars() directly after next()
515 buffer.remove();
516 count=trie->getNextUChars(app);
517 if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
518 errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"");
519 }
520 // getNextUChars() after getValue()
521 trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
522 buffer.remove();
523 count=trie->getNextUChars(app);
524 if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
525 errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
526 }
527 // getNextUChars() from a linear-match node
528 trie->next(u_u);
529 buffer.remove();
530 count=trie->getNextUChars(app);
531 if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) {
532 errln("months getNextUChars()!=[a] after \"janu\"");
533 }
534 trie->next(u_a);
535 buffer.remove();
536 count=trie->getNextUChars(app);
537 if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) {
538 errln("months getNextUChars()!=[r] after \"janua\"");
539 }
540 trie->next(u_r);
541 trie->next(u_y);
542 // getNextUChars() after a final match
543 buffer.remove();
544 count=trie->getNextUChars(app);
545 if(count!=0 || buffer.length()!=0) {
546 errln("months getNextUChars()!=[] after \"january\"");
547 }
548}
549
550void UCharsTrieTest::TestIteratorFromBranch() {
551 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
552 if(trie.isNull()) {
553 return; // buildTrie() reported an error
554 }
555 // Go to a branch node.
556 trie->next(u_j);
557 trie->next(u_a);
558 trie->next(u_n);
559 IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
560 UCharsTrie::Iterator iter(*trie, 0, errorCode);
561 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
562 return;
563 }
564 // Expected data: Same as in buildMonthsTrie(), except only the suffixes
565 // following "jan".
566 static const StringAndValue data[]={
567 { "", 1 },
568 { ".", 1 },
569 { "a", 1 },
570 { "bb", 1 },
571 { "c", 1 },
572 { "ddd", 1 },
573 { "ee", 1 },
574 { "ef", 1 },
575 { "f", 1 },
576 { "gg", 1 },
577 { "h", 1 },
578 { "iiii", 1 },
579 { "j", 1 },
580 { "kk", 1 },
581 { "kl", 1 },
582 { "kmm", 1 },
583 { "l", 1 },
584 { "m", 1 },
585 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
586 { "o", 1 },
587 { "pp", 1 },
588 { "qqq", 1 },
589 { "r", 1 },
590 { "uar", 1 },
591 { "uary", 1 }
592 };
b331163b 593 checkIterator(iter, data, UPRV_LENGTHOF(data));
4388f060
A
594 // Reset, and we should get the same result.
595 logln("after iter.reset()");
b331163b 596 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
4388f060
A
597}
598
599void UCharsTrieTest::TestIteratorFromLinearMatch() {
600 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
601 if(trie.isNull()) {
602 return; // buildTrie() reported an error
603 }
604 // Go into a linear-match node.
605 trie->next(u_j);
606 trie->next(u_a);
607 trie->next(u_n);
608 trie->next(u_u);
609 trie->next(u_a);
610 IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
611 UCharsTrie::Iterator iter(*trie, 0, errorCode);
612 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
613 return;
614 }
615 // Expected data: Same as in buildMonthsTrie(), except only the suffixes
616 // following "janua".
617 static const StringAndValue data[]={
618 { "r", 1 },
619 { "ry", 1 }
620 };
b331163b 621 checkIterator(iter, data, UPRV_LENGTHOF(data));
4388f060
A
622 // Reset, and we should get the same result.
623 logln("after iter.reset()");
b331163b 624 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
4388f060
A
625}
626
627void UCharsTrieTest::TestTruncatingIteratorFromRoot() {
628 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
629 if(trie.isNull()) {
630 return; // buildTrie() reported an error
631 }
632 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
633 UCharsTrie::Iterator iter(*trie, 4, errorCode);
634 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
635 return;
636 }
637 // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
638 // of each string, and no string duplicates from the truncation.
639 static const StringAndValue data[]={
640 { "augu", -1 },
641 { "jan", 1 },
642 { "jan.", 1 },
643 { "jana", 1 },
644 { "janb", -1 },
645 { "janc", 1 },
646 { "jand", -1 },
647 { "jane", -1 },
648 { "janf", 1 },
649 { "jang", -1 },
650 { "janh", 1 },
651 { "jani", -1 },
652 { "janj", 1 },
653 { "jank", -1 },
654 { "janl", 1 },
655 { "janm", 1 },
656 { "jann", -1 },
657 { "jano", 1 },
658 { "janp", -1 },
659 { "janq", -1 },
660 { "janr", 1 },
661 { "janu", -1 },
662 { "july", 7 },
663 { "jun", 6 },
664 { "jun.", 6 },
665 { "june", 6 }
666 };
b331163b 667 checkIterator(iter, data, UPRV_LENGTHOF(data));
4388f060
A
668 // Reset, and we should get the same result.
669 logln("after iter.reset()");
b331163b 670 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
4388f060
A
671}
672
673void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
674 static const StringAndValue data[]={
675 { "abcdef", 10 },
676 { "abcdepq", 200 },
677 { "abcdeyz", 3000 }
678 };
b331163b 679 LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
4388f060
A
680 if(trie.isNull()) {
681 return; // buildTrie() reported an error
682 }
683 // Go into a linear-match node.
684 trie->next(u_a);
685 trie->next(u_b);
686 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
687 // Truncate within the linear-match node.
688 UCharsTrie::Iterator iter(*trie, 2, errorCode);
689 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
690 return;
691 }
692 static const StringAndValue expected[]={
693 { "cd", -1 }
694 };
b331163b 695 checkIterator(iter, expected, UPRV_LENGTHOF(expected));
4388f060
A
696 // Reset, and we should get the same result.
697 logln("after iter.reset()");
b331163b 698 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
4388f060
A
699}
700
701void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
702 static const StringAndValue data[]={
703 { "abcdef", 10 },
704 { "abcdepq", 200 },
705 { "abcdeyz", 3000 }
706 };
b331163b 707 LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
4388f060
A
708 if(trie.isNull()) {
709 return; // buildTrie() reported an error
710 }
711 // Go into a linear-match node.
712 trie->next(u_a);
713 trie->next(u_b);
714 trie->next(u_c);
715 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
716 // Truncate after the linear-match node.
717 UCharsTrie::Iterator iter(*trie, 3, errorCode);
718 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
719 return;
720 }
721 static const StringAndValue expected[]={
722 { "def", 10 },
723 { "dep", -1 },
724 { "dey", -1 }
725 };
b331163b 726 checkIterator(iter, expected, UPRV_LENGTHOF(expected));
4388f060
A
727 // Reset, and we should get the same result.
728 logln("after iter.reset()");
b331163b 729 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
4388f060
A
730}
731
732void UCharsTrieTest::TestIteratorFromUChars() {
733 static const StringAndValue data[]={
734 { "mm", 3 },
735 { "mmm", 33 },
736 { "mmnop", 333 }
737 };
738 builder_->clear();
739 IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()");
b331163b 740 for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) {
4388f060
A
741 builder_->add(data[i].s, data[i].value, errorCode);
742 }
743 UnicodeString trieUChars;
744 builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
745 UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode);
b331163b 746 checkIterator(iter, data, UPRV_LENGTHOF(data));
4388f060
A
747}
748
749void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
750 logln("checkData(dataLength=%d, fast)", (int)dataLength);
751 checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
752 logln("checkData(dataLength=%d, small)", (int)dataLength);
753 checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
754}
755
756void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
757 LocalPointer<UCharsTrie> trie(buildTrie(data, dataLength, buildOption));
758 if(trie.isNull()) {
759 return; // buildTrie() reported an error
760 }
761 checkFirst(*trie, data, dataLength);
762 checkNext(*trie, data, dataLength);
763 checkNextWithState(*trie, data, dataLength);
764 checkNextString(*trie, data, dataLength);
765 checkIterator(*trie, data, dataLength);
766}
767
768UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
769 UStringTrieBuildOption buildOption) {
770 IcuTestErrorCode errorCode(*this, "buildTrie()");
771 // Add the items to the trie builder in an interesting (not trivial, not random) order.
772 int32_t index, step;
773 if(dataLength&1) {
774 // Odd number of items.
775 index=dataLength/2;
776 step=2;
777 } else if((dataLength%3)!=0) {
778 // Not a multiple of 3.
779 index=dataLength/5;
780 step=3;
781 } else {
782 index=dataLength-1;
783 step=-1;
784 }
785 builder_->clear();
786 for(int32_t i=0; i<dataLength; ++i) {
787 builder_->add(UnicodeString(data[index].s, -1, US_INV).unescape(),
788 data[index].value, errorCode);
789 index=(index+step)%dataLength;
790 }
791 UnicodeString trieUChars;
792 builder_->buildUnicodeString(buildOption, trieUChars, errorCode);
793 LocalPointer<UCharsTrie> trie(builder_->build(buildOption, errorCode));
794 if(!errorCode.logIfFailureAndReset("add()/build()")) {
795 builder_->add("zzz", 999, errorCode);
796 if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
797 errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
798 }
799 }
800 logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
801 UnicodeString trieUChars2;
802 builder_->buildUnicodeString(buildOption, trieUChars2, errorCode);
803 if(trieUChars.getBuffer()==trieUChars2.getBuffer()) {
804 errln("builder.buildUnicodeString() before & after build() returned same array");
805 }
806 if(errorCode.isFailure()) {
807 return NULL;
808 }
809 // Tries from either build() method should be identical but
810 // UCharsTrie does not implement equals().
811 // We just return either one.
812 if((dataLength&1)!=0) {
813 return trie.orphan();
814 } else {
815 return new UCharsTrie(trieUChars2.getBuffer());
816 }
817}
818
819void UCharsTrieTest::checkFirst(UCharsTrie &trie,
820 const StringAndValue data[], int32_t dataLength) {
821 for(int32_t i=0; i<dataLength; ++i) {
822 if(*data[i].s==0) {
823 continue; // skip empty string
824 }
825 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
826 UChar32 c=expectedString[0];
827 UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0;
828 UStringTrieResult firstResult=trie.first(c);
829 int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
830 UStringTrieResult nextResult=trie.next(nextCp);
831 if(firstResult!=trie.reset().next(c) ||
832 firstResult!=trie.current() ||
833 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
834 nextResult!=trie.next(nextCp)
835 ) {
836 errln("trie.first(U+%04X)!=trie.reset().next(same) for %s",
837 c, data[i].s);
838 }
839 c=expectedString.char32At(0);
840 int32_t cLength=U16_LENGTH(c);
841 nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0;
842 firstResult=trie.firstForCodePoint(c);
843 firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
844 nextResult=trie.nextForCodePoint(nextCp);
845 if(firstResult!=trie.reset().nextForCodePoint(c) ||
846 firstResult!=trie.current() ||
847 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
848 nextResult!=trie.nextForCodePoint(nextCp)
849 ) {
850 errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s",
851 c, data[i].s);
852 }
853 }
854 trie.reset();
855}
856
857void UCharsTrieTest::checkNext(UCharsTrie &trie,
858 const StringAndValue data[], int32_t dataLength) {
859 UCharsTrie::State state;
860 for(int32_t i=0; i<dataLength; ++i) {
861 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
862 int32_t stringLength= (i&1) ? -1 : expectedString.length();
863 UStringTrieResult result;
864 if( !USTRINGTRIE_HAS_VALUE(
865 result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) ||
866 result!=trie.current()
867 ) {
868 errln("trie does not seem to contain %s", data[i].s);
869 } else if(trie.getValue()!=data[i].value) {
870 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
871 data[i].s,
872 (long)trie.getValue(), (long)trie.getValue(),
873 (long)data[i].value, (long)data[i].value);
874 } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
875 errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
876 }
877 trie.reset();
878 stringLength=expectedString.length();
879 result=trie.current();
880 for(int32_t j=0; j<stringLength; ++j) {
881 if(!USTRINGTRIE_HAS_NEXT(result)) {
882 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
883 break;
884 }
885 if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
886 trie.getValue();
887 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
888 errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
889 break;
890 }
891 }
892 result=trie.next(expectedString[j]);
893 if(!USTRINGTRIE_MATCHES(result)) {
894 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
895 break;
896 }
897 if(result!=trie.current()) {
898 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
899 break;
900 }
901 }
902 if(!USTRINGTRIE_HAS_VALUE(result)) {
903 errln("trie.next()!=hasValue at the end of %s", data[i].s);
904 continue;
905 }
906 trie.getValue();
907 if(result!=trie.current()) {
908 errln("trie.current() != current()+getValue()+current() after end of %s",
909 data[i].s);
910 }
911 // Compare the final current() with whether next() can actually continue.
912 trie.saveState(state);
913 UBool nextContinues=FALSE;
914 for(int32_t c=0x20; c<0xe000; ++c) {
915 if(c==0x80) {
916 c=0xd800; // Check for ASCII and surrogates but not all of the BMP.
917 }
918 if(trie.resetToState(state).next(c)) {
919 nextContinues=TRUE;
920 break;
921 }
922 }
923 if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
924 errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
925 "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
926 }
927 trie.reset();
928 }
929}
930
931void UCharsTrieTest::checkNextWithState(UCharsTrie &trie,
932 const StringAndValue data[], int32_t dataLength) {
933 UCharsTrie::State noState, state;
934 for(int32_t i=0; i<dataLength; ++i) {
935 if((i&1)==0) {
936 // This should have no effect.
937 trie.resetToState(noState);
938 }
939 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
940 int32_t stringLength=expectedString.length();
941 int32_t partialLength=stringLength/3;
942 for(int32_t j=0; j<partialLength; ++j) {
943 if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
944 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
945 return;
946 }
947 }
948 trie.saveState(state);
949 UStringTrieResult resultAtState=trie.current();
950 UStringTrieResult result;
951 int32_t valueAtState=-99;
952 if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
953 valueAtState=trie.getValue();
954 }
955 result=trie.next(0); // mismatch
956 if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
957 errln("trie.next(0) matched after part of %s", data[i].s);
958 }
959 if( resultAtState!=trie.resetToState(state).current() ||
960 (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
961 ) {
962 errln("trie.next(part of %s) changes current()/getValue() after "
963 "saveState/next(0)/resetToState",
964 data[i].s);
965 } else if(!USTRINGTRIE_HAS_VALUE(
966 result=trie.next(expectedString.getTerminatedBuffer()+partialLength,
967 stringLength-partialLength)) ||
968 result!=trie.current()) {
969 errln("trie.next(rest of %s) does not seem to contain %s after "
970 "saveState/next(0)/resetToState",
971 data[i].s, data[i].s);
972 } else if(!USTRINGTRIE_HAS_VALUE(
973 result=trie.resetToState(state).
974 next(expectedString.getTerminatedBuffer()+partialLength,
975 stringLength-partialLength)) ||
976 result!=trie.current()) {
977 errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
978 data[i].s);
979 } else if(trie.getValue()!=data[i].value) {
980 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
981 data[i].s,
982 (long)trie.getValue(), (long)trie.getValue(),
983 (long)data[i].value, (long)data[i].value);
984 }
985 trie.reset();
986 }
987}
988
989// next(string) is also tested in other functions,
990// but here we try to go partway through the string, and then beyond it.
991void UCharsTrieTest::checkNextString(UCharsTrie &trie,
992 const StringAndValue data[], int32_t dataLength) {
993 for(int32_t i=0; i<dataLength; ++i) {
994 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
995 int32_t stringLength=expectedString.length();
996 if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) {
997 errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
998 continue;
999 }
1000 // Test that we stop properly at the end of the string.
1001 if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2,
1002 stringLength+1-stringLength/2)) {
1003 errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
1004 }
1005 trie.reset();
1006 }
1007}
1008
1009void UCharsTrieTest::checkIterator(UCharsTrie &trie,
1010 const StringAndValue data[], int32_t dataLength) {
1011 IcuTestErrorCode errorCode(*this, "checkIterator()");
1012 UCharsTrie::Iterator iter(trie, 0, errorCode);
1013 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) {
1014 return;
1015 }
1016 checkIterator(iter, data, dataLength);
1017}
1018
1019void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter,
1020 const StringAndValue data[], int32_t dataLength) {
1021 IcuTestErrorCode errorCode(*this, "checkIterator()");
1022 for(int32_t i=0; i<dataLength; ++i) {
1023 if(!iter.hasNext()) {
1024 errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
1025 break;
1026 }
1027 UBool hasNext=iter.next(errorCode);
1028 if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
1029 break;
1030 }
1031 if(!hasNext) {
1032 errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
1033 break;
1034 }
1035 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
1036 if(iter.getString()!=expectedString) {
1037 char buffer[1000];
1038 UnicodeString invString(prettify(iter.getString()));
b331163b 1039 invString.extract(0, invString.length(), buffer, UPRV_LENGTHOF(buffer), US_INV);
4388f060
A
1040 errln("trie iterator next().getString()=%s but expected %s for item %d",
1041 buffer, data[i].s, (int)i);
1042 }
1043 if(iter.getValue()!=data[i].value) {
1044 errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
1045 (long)iter.getValue(), (long)iter.getValue(),
1046 (long)data[i].value, (long)data[i].value,
1047 (int)i, data[i].s);
1048 }
1049 }
1050 if(iter.hasNext()) {
1051 errln("trie iterator hasNext()=TRUE after all items");
1052 }
1053 UBool hasNext=iter.next(errorCode);
1054 errorCode.logIfFailureAndReset("trie iterator next() after all items");
1055 if(hasNext) {
1056 errln("trie iterator next()=TRUE after all items");
1057 }
1058}