]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/bytestrietest.cpp
ICU-62141.0.1.tar.gz
[apple/icu.git] / icuSources / test / intltest / bytestrietest.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2010-2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: bytetrietest.cpp
9 * encoding: UTF-8
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2010nov16
14 * created by: Markus W. Scherer
15 */
16
17 #include <string.h>
18
19 #include "unicode/utypes.h"
20 #include "unicode/bytestrie.h"
21 #include "unicode/bytestriebuilder.h"
22 #include "unicode/localpointer.h"
23 #include "unicode/stringpiece.h"
24 #include "intltest.h"
25 #include "cmemory.h"
26
27 struct StringAndValue {
28 const char *s;
29 int32_t value;
30 };
31
32 class BytesTrieTest : public IntlTest {
33 public:
34 BytesTrieTest();
35 virtual ~BytesTrieTest();
36
37 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
38 void TestBuilder();
39 void TestEmpty();
40 void Test_a();
41 void Test_a_ab();
42 void TestShortestBranch();
43 void TestBranches();
44 void TestLongSequence();
45 void TestLongBranch();
46 void TestValuesForState();
47 void TestCompact();
48
49 BytesTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
50 void TestHasUniqueValue();
51 void TestGetNextBytes();
52 void TestIteratorFromBranch();
53 void TestIteratorFromLinearMatch();
54 void TestTruncatingIteratorFromRoot();
55 void TestTruncatingIteratorFromLinearMatchShort();
56 void TestTruncatingIteratorFromLinearMatchLong();
57 void TestIteratorFromBytes();
58 void TestFailedIterator();
59
60 void checkData(const StringAndValue data[], int32_t dataLength);
61 void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
62 BytesTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
63 UStringTrieBuildOption buildOption);
64 void checkFirst(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
65 void checkNext(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
66 void checkNextWithState(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
67 void checkNextString(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
68 void checkIterator(const BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
69 void checkIterator(BytesTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
70
71 private:
72 BytesTrieBuilder *builder_;
73 };
74
75 extern IntlTest *createBytesTrieTest() {
76 return new BytesTrieTest();
77 }
78
79 BytesTrieTest::BytesTrieTest() : builder_(NULL) {
80 IcuTestErrorCode errorCode(*this, "BytesTrieTest()");
81 builder_=new BytesTrieBuilder(errorCode);
82 }
83
84 BytesTrieTest::~BytesTrieTest() {
85 delete builder_;
86 }
87
88 void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
89 if(exec) {
90 logln("TestSuite BytesTrieTest: ");
91 }
92 TESTCASE_AUTO_BEGIN;
93 TESTCASE_AUTO(TestBuilder);
94 TESTCASE_AUTO(TestEmpty);
95 TESTCASE_AUTO(Test_a);
96 TESTCASE_AUTO(Test_a_ab);
97 TESTCASE_AUTO(TestShortestBranch);
98 TESTCASE_AUTO(TestBranches);
99 TESTCASE_AUTO(TestLongSequence);
100 TESTCASE_AUTO(TestLongBranch);
101 TESTCASE_AUTO(TestValuesForState);
102 TESTCASE_AUTO(TestCompact);
103 TESTCASE_AUTO(TestHasUniqueValue);
104 TESTCASE_AUTO(TestGetNextBytes);
105 TESTCASE_AUTO(TestIteratorFromBranch);
106 TESTCASE_AUTO(TestIteratorFromLinearMatch);
107 TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
108 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
109 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
110 TESTCASE_AUTO(TestIteratorFromBytes);
111 TESTCASE_AUTO(TestFailedIterator);
112 TESTCASE_AUTO_END;
113 }
114
115 void BytesTrieTest::TestBuilder() {
116 IcuTestErrorCode errorCode(*this, "TestBuilder()");
117 builder_->clear();
118 delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
119 if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
120 errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
121 return;
122 }
123 // TODO: remove .build(...) once add() checks for duplicates.
124 builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
125 if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
126 errln("BytesTrieBuilder.add() did not detect duplicates");
127 return;
128 }
129 }
130
131 void BytesTrieTest::TestEmpty() {
132 static const StringAndValue data[]={
133 { "", 0 }
134 };
135 checkData(data, UPRV_LENGTHOF(data));
136 }
137
138 void BytesTrieTest::Test_a() {
139 static const StringAndValue data[]={
140 { "a", 1 }
141 };
142 checkData(data, UPRV_LENGTHOF(data));
143 }
144
145 void BytesTrieTest::Test_a_ab() {
146 static const StringAndValue data[]={
147 { "a", 1 },
148 { "ab", 100 }
149 };
150 checkData(data, UPRV_LENGTHOF(data));
151 }
152
153 void BytesTrieTest::TestShortestBranch() {
154 static const StringAndValue data[]={
155 { "a", 1000 },
156 { "b", 2000 }
157 };
158 checkData(data, UPRV_LENGTHOF(data));
159 }
160
161 void BytesTrieTest::TestBranches() {
162 static const StringAndValue data[]={
163 { "a", 0x10 },
164 { "cc", 0x40 },
165 { "e", 0x100 },
166 { "ggg", 0x400 },
167 { "i", 0x1000 },
168 { "kkkk", 0x4000 },
169 { "n", 0x10000 },
170 { "ppppp", 0x40000 },
171 { "r", 0x100000 },
172 { "sss", 0x200000 },
173 { "t", 0x400000 },
174 { "uu", 0x800000 },
175 { "vv", 0x7fffffff },
176 { "zz", (int32_t)0x80000000 }
177 };
178 for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) {
179 logln("TestBranches length=%d", (int)length);
180 checkData(data, length);
181 }
182 }
183
184 void BytesTrieTest::TestLongSequence() {
185 static const StringAndValue data[]={
186 { "a", -1 },
187 // sequence of linear-match nodes
188 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
189 // more than 256 bytes
190 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
191 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
192 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
193 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
194 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
195 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
196 };
197 checkData(data, UPRV_LENGTHOF(data));
198 }
199
200 void BytesTrieTest::TestLongBranch() {
201 // Split-branch and interesting compact-integer values.
202 static const StringAndValue data[]={
203 { "a", -2 },
204 { "b", -1 },
205 { "c", 0 },
206 { "d2", 1 },
207 { "f", 0x3f },
208 { "g", 0x40 },
209 { "h", 0x41 },
210 { "j23", 0x1900 },
211 { "j24", 0x19ff },
212 { "j25", 0x1a00 },
213 { "k2", 0x1a80 },
214 { "k3", 0x1aff },
215 { "l234567890", 0x1b00 },
216 { "l234567890123", 0x1b01 },
217 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
218 { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
219 { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
220 { "r", 0x333333 },
221 { "s2345", 0x4444444 },
222 { "t234567890", 0x77777777 },
223 { "z", (int32_t)0x80000001 }
224 };
225 checkData(data, UPRV_LENGTHOF(data));
226 }
227
228 void BytesTrieTest::TestValuesForState() {
229 // Check that saveState() and resetToState() interact properly
230 // with next() and current().
231 static const StringAndValue data[]={
232 { "a", -1 },
233 { "ab", -2 },
234 { "abc", -3 },
235 { "abcd", -4 },
236 { "abcde", -5 },
237 { "abcdef", -6 }
238 };
239 checkData(data, UPRV_LENGTHOF(data));
240 }
241
242 void BytesTrieTest::TestCompact() {
243 // Duplicate trailing strings and values provide opportunities for compacting.
244 static const StringAndValue data[]={
245 { "+", 0 },
246 { "+august", 8 },
247 { "+december", 12 },
248 { "+july", 7 },
249 { "+june", 6 },
250 { "+november", 11 },
251 { "+october", 10 },
252 { "+september", 9 },
253 { "-", 0 },
254 { "-august", 8 },
255 { "-december", 12 },
256 { "-july", 7 },
257 { "-june", 6 },
258 { "-november", 11 },
259 { "-october", 10 },
260 { "-september", 9 },
261 // The l+n branch (with its sub-nodes) is a duplicate but will be written
262 // both times because each time it follows a different linear-match node.
263 { "xjuly", 7 },
264 { "xjune", 6 }
265 };
266 checkData(data, UPRV_LENGTHOF(data));
267 }
268
269 BytesTrie *BytesTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
270 // All types of nodes leading to the same value,
271 // for code coverage of recursive functions.
272 // In particular, we need a lot of branches on some single level
273 // to exercise a split-branch node.
274 static const StringAndValue data[]={
275 { "august", 8 },
276 { "jan", 1 },
277 { "jan.", 1 },
278 { "jana", 1 },
279 { "janbb", 1 },
280 { "janc", 1 },
281 { "janddd", 1 },
282 { "janee", 1 },
283 { "janef", 1 },
284 { "janf", 1 },
285 { "jangg", 1 },
286 { "janh", 1 },
287 { "janiiii", 1 },
288 { "janj", 1 },
289 { "jankk", 1 },
290 { "jankl", 1 },
291 { "jankmm", 1 },
292 { "janl", 1 },
293 { "janm", 1 },
294 { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
295 { "jano", 1 },
296 { "janpp", 1 },
297 { "janqqq", 1 },
298 { "janr", 1 },
299 { "januar", 1 },
300 { "january", 1 },
301 { "july", 7 },
302 { "jun", 6 },
303 { "jun.", 6 },
304 { "june", 6 }
305 };
306 return buildTrie(data, UPRV_LENGTHOF(data), buildOption);
307 }
308
309 void BytesTrieTest::TestHasUniqueValue() {
310 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
311 if(trie.isNull()) {
312 return; // buildTrie() reported an error
313 }
314 int32_t uniqueValue;
315 if(trie->hasUniqueValue(uniqueValue)) {
316 errln("unique value at root");
317 }
318 trie->next('j');
319 trie->next('a');
320 trie->next('n');
321 // hasUniqueValue() directly after next()
322 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
323 errln("not unique value 1 after \"jan\"");
324 }
325 trie->first('j');
326 trie->next('u');
327 if(trie->hasUniqueValue(uniqueValue)) {
328 errln("unique value after \"ju\"");
329 }
330 if(trie->next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
331 errln("not normal value 6 after \"jun\"");
332 }
333 // hasUniqueValue() after getValue()
334 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
335 errln("not unique value 6 after \"jun\"");
336 }
337 // hasUniqueValue() from within a linear-match node
338 trie->first('a');
339 trie->next('u');
340 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
341 errln("not unique value 8 after \"au\"");
342 }
343 }
344
345 void BytesTrieTest::TestGetNextBytes() {
346 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
347 if(trie.isNull()) {
348 return; // buildTrie() reported an error
349 }
350 char buffer[40];
351 CheckedArrayByteSink sink(buffer, UPRV_LENGTHOF(buffer));
352 int32_t count=trie->getNextBytes(sink);
353 if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') {
354 errln("months getNextBytes()!=[aj] at root");
355 }
356 trie->next('j');
357 trie->next('a');
358 trie->next('n');
359 // getNextBytes() directly after next()
360 count=trie->getNextBytes(sink.Reset());
361 buffer[count]=0;
362 if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
363 errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"");
364 }
365 // getNextBytes() after getValue()
366 trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
367 memset(buffer, 0, sizeof(buffer));
368 count=trie->getNextBytes(sink.Reset());
369 if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
370 errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
371 }
372 // getNextBytes() from a linear-match node
373 trie->next('u');
374 memset(buffer, 0, sizeof(buffer));
375 count=trie->getNextBytes(sink.Reset());
376 if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') {
377 errln("months getNextBytes()!=[a] after \"janu\"");
378 }
379 trie->next('a');
380 memset(buffer, 0, sizeof(buffer));
381 count=trie->getNextBytes(sink.Reset());
382 if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') {
383 errln("months getNextBytes()!=[r] after \"janua\"");
384 }
385 trie->next('r');
386 trie->next('y');
387 // getNextBytes() after a final match
388 count=trie->getNextBytes(sink.Reset());
389 if(count!=0 || sink.NumberOfBytesAppended()!=0) {
390 errln("months getNextBytes()!=[] after \"january\"");
391 }
392 }
393
394 void BytesTrieTest::TestIteratorFromBranch() {
395 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
396 if(trie.isNull()) {
397 return; // buildTrie() reported an error
398 }
399 // Go to a branch node.
400 trie->next('j');
401 trie->next('a');
402 trie->next('n');
403 IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
404 BytesTrie::Iterator iter(*trie, 0, errorCode);
405 if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
406 return;
407 }
408 // Expected data: Same as in buildMonthsTrie(), except only the suffixes
409 // following "jan".
410 static const StringAndValue data[]={
411 { "", 1 },
412 { ".", 1 },
413 { "a", 1 },
414 { "bb", 1 },
415 { "c", 1 },
416 { "ddd", 1 },
417 { "ee", 1 },
418 { "ef", 1 },
419 { "f", 1 },
420 { "gg", 1 },
421 { "h", 1 },
422 { "iiii", 1 },
423 { "j", 1 },
424 { "kk", 1 },
425 { "kl", 1 },
426 { "kmm", 1 },
427 { "l", 1 },
428 { "m", 1 },
429 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
430 { "o", 1 },
431 { "pp", 1 },
432 { "qqq", 1 },
433 { "r", 1 },
434 { "uar", 1 },
435 { "uary", 1 }
436 };
437 checkIterator(iter, data, UPRV_LENGTHOF(data));
438 // Reset, and we should get the same result.
439 logln("after iter.reset()");
440 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
441 }
442
443 void BytesTrieTest::TestIteratorFromLinearMatch() {
444 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
445 if(trie.isNull()) {
446 return; // buildTrie() reported an error
447 }
448 // Go into a linear-match node.
449 trie->next('j');
450 trie->next('a');
451 trie->next('n');
452 trie->next('u');
453 trie->next('a');
454 IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
455 BytesTrie::Iterator iter(*trie, 0, errorCode);
456 if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
457 return;
458 }
459 // Expected data: Same as in buildMonthsTrie(), except only the suffixes
460 // following "janua".
461 static const StringAndValue data[]={
462 { "r", 1 },
463 { "ry", 1 }
464 };
465 checkIterator(iter, data, UPRV_LENGTHOF(data));
466 // Reset, and we should get the same result.
467 logln("after iter.reset()");
468 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
469 }
470
471 void BytesTrieTest::TestTruncatingIteratorFromRoot() {
472 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
473 if(trie.isNull()) {
474 return; // buildTrie() reported an error
475 }
476 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
477 BytesTrie::Iterator iter(*trie, 4, errorCode);
478 if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
479 return;
480 }
481 // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
482 // of each string, and no string duplicates from the truncation.
483 static const StringAndValue data[]={
484 { "augu", -1 },
485 { "jan", 1 },
486 { "jan.", 1 },
487 { "jana", 1 },
488 { "janb", -1 },
489 { "janc", 1 },
490 { "jand", -1 },
491 { "jane", -1 },
492 { "janf", 1 },
493 { "jang", -1 },
494 { "janh", 1 },
495 { "jani", -1 },
496 { "janj", 1 },
497 { "jank", -1 },
498 { "janl", 1 },
499 { "janm", 1 },
500 { "jann", -1 },
501 { "jano", 1 },
502 { "janp", -1 },
503 { "janq", -1 },
504 { "janr", 1 },
505 { "janu", -1 },
506 { "july", 7 },
507 { "jun", 6 },
508 { "jun.", 6 },
509 { "june", 6 }
510 };
511 checkIterator(iter, data, UPRV_LENGTHOF(data));
512 // Reset, and we should get the same result.
513 logln("after iter.reset()");
514 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
515 }
516
517 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
518 static const StringAndValue data[]={
519 { "abcdef", 10 },
520 { "abcdepq", 200 },
521 { "abcdeyz", 3000 }
522 };
523 LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
524 if(trie.isNull()) {
525 return; // buildTrie() reported an error
526 }
527 // Go into a linear-match node.
528 trie->next('a');
529 trie->next('b');
530 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
531 // Truncate within the linear-match node.
532 BytesTrie::Iterator iter(*trie, 2, errorCode);
533 if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
534 return;
535 }
536 static const StringAndValue expected[]={
537 { "cd", -1 }
538 };
539 checkIterator(iter, expected, UPRV_LENGTHOF(expected));
540 // Reset, and we should get the same result.
541 logln("after iter.reset()");
542 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
543 }
544
545 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
546 static const StringAndValue data[]={
547 { "abcdef", 10 },
548 { "abcdepq", 200 },
549 { "abcdeyz", 3000 }
550 };
551 LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
552 if(trie.isNull()) {
553 return; // buildTrie() reported an error
554 }
555 // Go into a linear-match node.
556 trie->next('a');
557 trie->next('b');
558 trie->next('c');
559 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
560 // Truncate after the linear-match node.
561 BytesTrie::Iterator iter(*trie, 3, errorCode);
562 if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
563 return;
564 }
565 static const StringAndValue expected[]={
566 { "def", 10 },
567 { "dep", -1 },
568 { "dey", -1 }
569 };
570 checkIterator(iter, expected, UPRV_LENGTHOF(expected));
571 // Reset, and we should get the same result.
572 logln("after iter.reset()");
573 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
574 }
575
576 void BytesTrieTest::TestIteratorFromBytes() {
577 static const StringAndValue data[]={
578 { "mm", 3 },
579 { "mmm", 33 },
580 { "mmnop", 333 }
581 };
582 builder_->clear();
583 IcuTestErrorCode errorCode(*this, "TestIteratorFromBytes()");
584 for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) {
585 builder_->add(data[i].s, data[i].value, errorCode);
586 }
587 StringPiece trieBytes=builder_->buildStringPiece(USTRINGTRIE_BUILD_FAST, errorCode);
588 BytesTrie::Iterator iter(trieBytes.data(), 0, errorCode);
589 checkIterator(iter, data, UPRV_LENGTHOF(data));
590 }
591
592 void BytesTrieTest::TestFailedIterator() {
593 UErrorCode failure = U_ILLEGAL_ARGUMENT_ERROR;
594 BytesTrie::Iterator iter(NULL, 0, failure);
595 StringPiece sp = iter.getString();
596 if (!sp.empty()) {
597 errln("failed iterator returned garbage data");
598 }
599 }
600
601 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
602 logln("checkData(dataLength=%d, fast)", (int)dataLength);
603 checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
604 logln("checkData(dataLength=%d, small)", (int)dataLength);
605 checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
606 }
607
608 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
609 LocalPointer<BytesTrie> trie(buildTrie(data, dataLength, buildOption));
610 if(trie.isNull()) {
611 return; // buildTrie() reported an error
612 }
613 checkFirst(*trie, data, dataLength);
614 checkNext(*trie, data, dataLength);
615 checkNextWithState(*trie, data, dataLength);
616 checkNextString(*trie, data, dataLength);
617 checkIterator(*trie, data, dataLength);
618 }
619
620 BytesTrie *BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
621 UStringTrieBuildOption buildOption) {
622 IcuTestErrorCode errorCode(*this, "buildTrie()");
623 // Add the items to the trie builder in an interesting (not trivial, not random) order.
624 int32_t index, step;
625 if(dataLength&1) {
626 // Odd number of items.
627 index=dataLength/2;
628 step=2;
629 } else if((dataLength%3)!=0) {
630 // Not a multiple of 3.
631 index=dataLength/5;
632 step=3;
633 } else {
634 index=dataLength-1;
635 step=-1;
636 }
637 builder_->clear();
638 for(int32_t i=0; i<dataLength; ++i) {
639 builder_->add(data[index].s, data[index].value, errorCode);
640 index=(index+step)%dataLength;
641 }
642 StringPiece sp=builder_->buildStringPiece(buildOption, errorCode);
643 LocalPointer<BytesTrie> trie(builder_->build(buildOption, errorCode));
644 if(!errorCode.errIfFailureAndReset("add()/build()")) {
645 builder_->add("zzz", 999, errorCode);
646 if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
647 errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
648 }
649 }
650 logln("serialized trie size: %ld bytes\n", (long)sp.length());
651 StringPiece sp2=builder_->buildStringPiece(buildOption, errorCode);
652 if(sp.data()==sp2.data()) {
653 errln("builder.buildStringPiece() before & after build() returned same array");
654 }
655 if(errorCode.isFailure()) {
656 return NULL;
657 }
658 // Tries from either build() method should be identical but
659 // BytesTrie does not implement equals().
660 // We just return either one.
661 if((dataLength&1)!=0) {
662 return trie.orphan();
663 } else {
664 return new BytesTrie(sp2.data());
665 }
666 }
667
668 void BytesTrieTest::checkFirst(BytesTrie &trie,
669 const StringAndValue data[], int32_t dataLength) {
670 for(int32_t i=0; i<dataLength; ++i) {
671 int c=*data[i].s;
672 if(c==0) {
673 continue; // skip empty string
674 }
675 UStringTrieResult firstResult=trie.first(c);
676 int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
677 UStringTrieResult nextResult=trie.next(data[i].s[1]);
678 if(firstResult!=trie.reset().next(c) ||
679 firstResult!=trie.current() ||
680 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
681 nextResult!=trie.next(data[i].s[1])
682 ) {
683 errln("trie.first(%c)!=trie.reset().next(same) for %s",
684 c, data[i].s);
685 }
686 }
687 trie.reset();
688 }
689
690 void BytesTrieTest::checkNext(BytesTrie &trie,
691 const StringAndValue data[], int32_t dataLength) {
692 BytesTrie::State state;
693 for(int32_t i=0; i<dataLength; ++i) {
694 int32_t stringLength= (i&1) ? -1 : strlen(data[i].s);
695 UStringTrieResult result;
696 if( !USTRINGTRIE_HAS_VALUE(result=trie.next(data[i].s, stringLength)) ||
697 result!=trie.current()
698 ) {
699 errln("trie does not seem to contain %s", data[i].s);
700 } else if(trie.getValue()!=data[i].value) {
701 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
702 data[i].s,
703 (long)trie.getValue(), (long)trie.getValue(),
704 (long)data[i].value, (long)data[i].value);
705 } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
706 errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
707 }
708 trie.reset();
709 stringLength=strlen(data[i].s);
710 result=trie.current();
711 for(int32_t j=0; j<stringLength; ++j) {
712 if(!USTRINGTRIE_HAS_NEXT(result)) {
713 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
714 break;
715 }
716 if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
717 trie.getValue();
718 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
719 errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
720 break;
721 }
722 }
723 result=trie.next(data[i].s[j]);
724 if(!USTRINGTRIE_MATCHES(result)) {
725 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
726 break;
727 }
728 if(result!=trie.current()) {
729 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
730 break;
731 }
732 }
733 if(!USTRINGTRIE_HAS_VALUE(result)) {
734 errln("trie.next()!=hasValue at the end of %s", data[i].s);
735 continue;
736 }
737 trie.getValue();
738 if(result!=trie.current()) {
739 errln("trie.current() != current()+getValue()+current() after end of %s",
740 data[i].s);
741 }
742 // Compare the final current() with whether next() can actually continue.
743 trie.saveState(state);
744 UBool nextContinues=FALSE;
745 // Try all graphic characters; we only use those in test strings in this file.
746 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
747 const int32_t minChar=0x20;
748 const int32_t maxChar=0x7e;
749 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
750 const int32_t minChar=0x40;
751 const int32_t maxChar=0xfe;
752 #else
753 const int32_t minChar=0;
754 const int32_t maxChar=0xff;
755 #endif
756 for(int32_t c=minChar; c<=maxChar; ++c) {
757 if(trie.resetToState(state).next(c)) {
758 nextContinues=TRUE;
759 break;
760 }
761 }
762 if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
763 errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
764 "(trie.next(some byte)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
765 }
766 trie.reset();
767 }
768 }
769
770 void BytesTrieTest::checkNextWithState(BytesTrie &trie,
771 const StringAndValue data[], int32_t dataLength) {
772 BytesTrie::State noState, state;
773 for(int32_t i=0; i<dataLength; ++i) {
774 if((i&1)==0) {
775 // This should have no effect.
776 trie.resetToState(noState);
777 }
778 const char *expectedString=data[i].s;
779 int32_t stringLength=strlen(expectedString);
780 int32_t partialLength=stringLength/3;
781 for(int32_t j=0; j<partialLength; ++j) {
782 if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
783 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
784 return;
785 }
786 }
787 trie.saveState(state);
788 UStringTrieResult resultAtState=trie.current();
789 UStringTrieResult result;
790 int32_t valueAtState=-99;
791 if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
792 valueAtState=trie.getValue();
793 }
794 result=trie.next(0); // mismatch
795 if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
796 errln("trie.next(0) matched after part of %s", data[i].s);
797 }
798 if( resultAtState!=trie.resetToState(state).current() ||
799 (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
800 ) {
801 errln("trie.next(part of %s) changes current()/getValue() after "
802 "saveState/next(0)/resetToState",
803 data[i].s);
804 } else if(!USTRINGTRIE_HAS_VALUE(
805 result=trie.next(expectedString+partialLength,
806 stringLength-partialLength)) ||
807 result!=trie.current()) {
808 errln("trie.next(rest of %s) does not seem to contain %s after "
809 "saveState/next(0)/resetToState",
810 data[i].s, data[i].s);
811 } else if(!USTRINGTRIE_HAS_VALUE(
812 result=trie.resetToState(state).
813 next(expectedString+partialLength,
814 stringLength-partialLength)) ||
815 result!=trie.current()) {
816 errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
817 data[i].s);
818 } else if(trie.getValue()!=data[i].value) {
819 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
820 data[i].s,
821 (long)trie.getValue(), (long)trie.getValue(),
822 (long)data[i].value, (long)data[i].value);
823 }
824 trie.reset();
825 }
826 }
827
828 // next(string) is also tested in other functions,
829 // but here we try to go partway through the string, and then beyond it.
830 void BytesTrieTest::checkNextString(BytesTrie &trie,
831 const StringAndValue data[], int32_t dataLength) {
832 for(int32_t i=0; i<dataLength; ++i) {
833 const char *expectedString=data[i].s;
834 int32_t stringLength=strlen(expectedString);
835 if(!trie.next(expectedString, stringLength/2)) {
836 errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
837 continue;
838 }
839 // Test that we stop properly at the end of the string.
840 if(trie.next(expectedString+stringLength/2, stringLength+1-stringLength/2)) {
841 errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
842 }
843 trie.reset();
844 }
845 }
846
847 void BytesTrieTest::checkIterator(const BytesTrie &trie,
848 const StringAndValue data[], int32_t dataLength) {
849 IcuTestErrorCode errorCode(*this, "checkIterator()");
850 BytesTrie::Iterator iter(trie, 0, errorCode);
851 if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
852 return;
853 }
854 checkIterator(iter, data, dataLength);
855 }
856
857 void BytesTrieTest::checkIterator(BytesTrie::Iterator &iter,
858 const StringAndValue data[], int32_t dataLength) {
859 IcuTestErrorCode errorCode(*this, "checkIterator()");
860 for(int32_t i=0; i<dataLength; ++i) {
861 if(!iter.hasNext()) {
862 errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
863 break;
864 }
865 UBool hasNext=iter.next(errorCode);
866 if(errorCode.errIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
867 break;
868 }
869 if(!hasNext) {
870 errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
871 break;
872 }
873 if(iter.getString()!=StringPiece(data[i].s)) {
874 errln("trie iterator next().getString()=%s but expected %s for item %d",
875 iter.getString().data(), data[i].s, (int)i);
876 }
877 if(iter.getValue()!=data[i].value) {
878 errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
879 (long)iter.getValue(), (long)iter.getValue(),
880 (long)data[i].value, (long)data[i].value,
881 (int)i, data[i].s);
882 }
883 }
884 if(iter.hasNext()) {
885 errln("trie iterator hasNext()=TRUE after all items");
886 }
887 UBool hasNext=iter.next(errorCode);
888 errorCode.errIfFailureAndReset("trie iterator next() after all items");
889 if(hasNext) {
890 errln("trie iterator next()=TRUE after all items");
891 }
892 }