]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/bytestrietest.cpp
ICU-491.11.1.tar.gz
[apple/icu.git] / icuSources / test / intltest / bytestrietest.cpp
1 /*
2 *******************************************************************************
3 * Copyright (C) 2010-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: bytetrietest.cpp
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * created on: 2010nov16
12 * created by: Markus W. Scherer
13 */
14
15 #include <string.h>
16
17 #include "unicode/utypes.h"
18 #include "unicode/bytestrie.h"
19 #include "unicode/bytestriebuilder.h"
20 #include "unicode/localpointer.h"
21 #include "unicode/stringpiece.h"
22 #include "intltest.h"
23
24 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
25
26 struct StringAndValue {
27 const char *s;
28 int32_t value;
29 };
30
31 class BytesTrieTest : public IntlTest {
32 public:
33 BytesTrieTest();
34 virtual ~BytesTrieTest();
35
36 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
37 void TestBuilder();
38 void TestEmpty();
39 void Test_a();
40 void Test_a_ab();
41 void TestShortestBranch();
42 void TestBranches();
43 void TestLongSequence();
44 void TestLongBranch();
45 void TestValuesForState();
46 void TestCompact();
47
48 BytesTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
49 void TestHasUniqueValue();
50 void TestGetNextBytes();
51 void TestIteratorFromBranch();
52 void TestIteratorFromLinearMatch();
53 void TestTruncatingIteratorFromRoot();
54 void TestTruncatingIteratorFromLinearMatchShort();
55 void TestTruncatingIteratorFromLinearMatchLong();
56 void TestIteratorFromBytes();
57
58 void checkData(const StringAndValue data[], int32_t dataLength);
59 void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
60 BytesTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
61 UStringTrieBuildOption buildOption);
62 void checkFirst(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
63 void checkNext(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
64 void checkNextWithState(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
65 void checkNextString(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
66 void checkIterator(const BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
67 void checkIterator(BytesTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
68
69 private:
70 BytesTrieBuilder *builder_;
71 };
72
73 extern IntlTest *createBytesTrieTest() {
74 return new BytesTrieTest();
75 }
76
77 BytesTrieTest::BytesTrieTest() : builder_(NULL) {
78 IcuTestErrorCode errorCode(*this, "BytesTrieTest()");
79 builder_=new BytesTrieBuilder(errorCode);
80 }
81
82 BytesTrieTest::~BytesTrieTest() {
83 delete builder_;
84 }
85
86 void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
87 if(exec) {
88 logln("TestSuite BytesTrieTest: ");
89 }
90 TESTCASE_AUTO_BEGIN;
91 TESTCASE_AUTO(TestBuilder);
92 TESTCASE_AUTO(TestEmpty);
93 TESTCASE_AUTO(Test_a);
94 TESTCASE_AUTO(Test_a_ab);
95 TESTCASE_AUTO(TestShortestBranch);
96 TESTCASE_AUTO(TestBranches);
97 TESTCASE_AUTO(TestLongSequence);
98 TESTCASE_AUTO(TestLongBranch);
99 TESTCASE_AUTO(TestValuesForState);
100 TESTCASE_AUTO(TestCompact);
101 TESTCASE_AUTO(TestHasUniqueValue);
102 TESTCASE_AUTO(TestGetNextBytes);
103 TESTCASE_AUTO(TestIteratorFromBranch);
104 TESTCASE_AUTO(TestIteratorFromLinearMatch);
105 TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
106 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
107 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
108 TESTCASE_AUTO(TestIteratorFromBytes);
109 TESTCASE_AUTO_END;
110 }
111
112 void BytesTrieTest::TestBuilder() {
113 IcuTestErrorCode errorCode(*this, "TestBuilder()");
114 builder_->clear();
115 delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
116 if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
117 errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
118 return;
119 }
120 // TODO: remove .build(...) once add() checks for duplicates.
121 builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
122 if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
123 errln("BytesTrieBuilder.add() did not detect duplicates");
124 return;
125 }
126 }
127
128 void BytesTrieTest::TestEmpty() {
129 static const StringAndValue data[]={
130 { "", 0 }
131 };
132 checkData(data, LENGTHOF(data));
133 }
134
135 void BytesTrieTest::Test_a() {
136 static const StringAndValue data[]={
137 { "a", 1 }
138 };
139 checkData(data, LENGTHOF(data));
140 }
141
142 void BytesTrieTest::Test_a_ab() {
143 static const StringAndValue data[]={
144 { "a", 1 },
145 { "ab", 100 }
146 };
147 checkData(data, LENGTHOF(data));
148 }
149
150 void BytesTrieTest::TestShortestBranch() {
151 static const StringAndValue data[]={
152 { "a", 1000 },
153 { "b", 2000 }
154 };
155 checkData(data, LENGTHOF(data));
156 }
157
158 void BytesTrieTest::TestBranches() {
159 static const StringAndValue data[]={
160 { "a", 0x10 },
161 { "cc", 0x40 },
162 { "e", 0x100 },
163 { "ggg", 0x400 },
164 { "i", 0x1000 },
165 { "kkkk", 0x4000 },
166 { "n", 0x10000 },
167 { "ppppp", 0x40000 },
168 { "r", 0x100000 },
169 { "sss", 0x200000 },
170 { "t", 0x400000 },
171 { "uu", 0x800000 },
172 { "vv", 0x7fffffff },
173 { "zz", 0x80000000 }
174 };
175 for(int32_t length=2; length<=LENGTHOF(data); ++length) {
176 infoln("TestBranches length=%d", (int)length);
177 checkData(data, length);
178 }
179 }
180
181 void BytesTrieTest::TestLongSequence() {
182 static const StringAndValue data[]={
183 { "a", -1 },
184 // sequence of linear-match nodes
185 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
186 // more than 256 bytes
187 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
188 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
189 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
190 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
191 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
192 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
193 };
194 checkData(data, LENGTHOF(data));
195 }
196
197 void BytesTrieTest::TestLongBranch() {
198 // Split-branch and interesting compact-integer values.
199 static const StringAndValue data[]={
200 { "a", -2 },
201 { "b", -1 },
202 { "c", 0 },
203 { "d2", 1 },
204 { "f", 0x3f },
205 { "g", 0x40 },
206 { "h", 0x41 },
207 { "j23", 0x1900 },
208 { "j24", 0x19ff },
209 { "j25", 0x1a00 },
210 { "k2", 0x1a80 },
211 { "k3", 0x1aff },
212 { "l234567890", 0x1b00 },
213 { "l234567890123", 0x1b01 },
214 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
215 { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
216 { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
217 { "r", 0x333333 },
218 { "s2345", 0x4444444 },
219 { "t234567890", 0x77777777 },
220 { "z", 0x80000001 }
221 };
222 checkData(data, LENGTHOF(data));
223 }
224
225 void BytesTrieTest::TestValuesForState() {
226 // Check that saveState() and resetToState() interact properly
227 // with next() and current().
228 static const StringAndValue data[]={
229 { "a", -1 },
230 { "ab", -2 },
231 { "abc", -3 },
232 { "abcd", -4 },
233 { "abcde", -5 },
234 { "abcdef", -6 }
235 };
236 checkData(data, LENGTHOF(data));
237 }
238
239 void BytesTrieTest::TestCompact() {
240 // Duplicate trailing strings and values provide opportunities for compacting.
241 static const StringAndValue data[]={
242 { "+", 0 },
243 { "+august", 8 },
244 { "+december", 12 },
245 { "+july", 7 },
246 { "+june", 6 },
247 { "+november", 11 },
248 { "+october", 10 },
249 { "+september", 9 },
250 { "-", 0 },
251 { "-august", 8 },
252 { "-december", 12 },
253 { "-july", 7 },
254 { "-june", 6 },
255 { "-november", 11 },
256 { "-october", 10 },
257 { "-september", 9 },
258 // The l+n branch (with its sub-nodes) is a duplicate but will be written
259 // both times because each time it follows a different linear-match node.
260 { "xjuly", 7 },
261 { "xjune", 6 }
262 };
263 checkData(data, LENGTHOF(data));
264 }
265
266 BytesTrie *BytesTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
267 // All types of nodes leading to the same value,
268 // for code coverage of recursive functions.
269 // In particular, we need a lot of branches on some single level
270 // to exercise a split-branch node.
271 static const StringAndValue data[]={
272 { "august", 8 },
273 { "jan", 1 },
274 { "jan.", 1 },
275 { "jana", 1 },
276 { "janbb", 1 },
277 { "janc", 1 },
278 { "janddd", 1 },
279 { "janee", 1 },
280 { "janef", 1 },
281 { "janf", 1 },
282 { "jangg", 1 },
283 { "janh", 1 },
284 { "janiiii", 1 },
285 { "janj", 1 },
286 { "jankk", 1 },
287 { "jankl", 1 },
288 { "jankmm", 1 },
289 { "janl", 1 },
290 { "janm", 1 },
291 { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
292 { "jano", 1 },
293 { "janpp", 1 },
294 { "janqqq", 1 },
295 { "janr", 1 },
296 { "januar", 1 },
297 { "january", 1 },
298 { "july", 7 },
299 { "jun", 6 },
300 { "jun.", 6 },
301 { "june", 6 }
302 };
303 return buildTrie(data, LENGTHOF(data), buildOption);
304 }
305
306 void BytesTrieTest::TestHasUniqueValue() {
307 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
308 if(trie.isNull()) {
309 return; // buildTrie() reported an error
310 }
311 int32_t uniqueValue;
312 if(trie->hasUniqueValue(uniqueValue)) {
313 errln("unique value at root");
314 }
315 trie->next('j');
316 trie->next('a');
317 trie->next('n');
318 // hasUniqueValue() directly after next()
319 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
320 errln("not unique value 1 after \"jan\"");
321 }
322 trie->first('j');
323 trie->next('u');
324 if(trie->hasUniqueValue(uniqueValue)) {
325 errln("unique value after \"ju\"");
326 }
327 if(trie->next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
328 errln("not normal value 6 after \"jun\"");
329 }
330 // hasUniqueValue() after getValue()
331 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
332 errln("not unique value 6 after \"jun\"");
333 }
334 // hasUniqueValue() from within a linear-match node
335 trie->first('a');
336 trie->next('u');
337 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
338 errln("not unique value 8 after \"au\"");
339 }
340 }
341
342 void BytesTrieTest::TestGetNextBytes() {
343 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
344 if(trie.isNull()) {
345 return; // buildTrie() reported an error
346 }
347 char buffer[40];
348 CheckedArrayByteSink sink(buffer, LENGTHOF(buffer));
349 int32_t count=trie->getNextBytes(sink);
350 if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') {
351 errln("months getNextBytes()!=[aj] at root");
352 }
353 trie->next('j');
354 trie->next('a');
355 trie->next('n');
356 // getNextBytes() directly after next()
357 count=trie->getNextBytes(sink.Reset());
358 buffer[count]=0;
359 if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
360 errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"");
361 }
362 // getNextBytes() after getValue()
363 trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
364 memset(buffer, 0, sizeof(buffer));
365 count=trie->getNextBytes(sink.Reset());
366 if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
367 errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
368 }
369 // getNextBytes() from a linear-match node
370 trie->next('u');
371 memset(buffer, 0, sizeof(buffer));
372 count=trie->getNextBytes(sink.Reset());
373 if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') {
374 errln("months getNextBytes()!=[a] after \"janu\"");
375 }
376 trie->next('a');
377 memset(buffer, 0, sizeof(buffer));
378 count=trie->getNextBytes(sink.Reset());
379 if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') {
380 errln("months getNextBytes()!=[r] after \"janua\"");
381 }
382 trie->next('r');
383 trie->next('y');
384 // getNextBytes() after a final match
385 count=trie->getNextBytes(sink.Reset());
386 if(count!=0 || sink.NumberOfBytesAppended()!=0) {
387 errln("months getNextBytes()!=[] after \"january\"");
388 }
389 }
390
391 void BytesTrieTest::TestIteratorFromBranch() {
392 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
393 if(trie.isNull()) {
394 return; // buildTrie() reported an error
395 }
396 // Go to a branch node.
397 trie->next('j');
398 trie->next('a');
399 trie->next('n');
400 IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
401 BytesTrie::Iterator iter(*trie, 0, errorCode);
402 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
403 return;
404 }
405 // Expected data: Same as in buildMonthsTrie(), except only the suffixes
406 // following "jan".
407 static const StringAndValue data[]={
408 { "", 1 },
409 { ".", 1 },
410 { "a", 1 },
411 { "bb", 1 },
412 { "c", 1 },
413 { "ddd", 1 },
414 { "ee", 1 },
415 { "ef", 1 },
416 { "f", 1 },
417 { "gg", 1 },
418 { "h", 1 },
419 { "iiii", 1 },
420 { "j", 1 },
421 { "kk", 1 },
422 { "kl", 1 },
423 { "kmm", 1 },
424 { "l", 1 },
425 { "m", 1 },
426 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
427 { "o", 1 },
428 { "pp", 1 },
429 { "qqq", 1 },
430 { "r", 1 },
431 { "uar", 1 },
432 { "uary", 1 }
433 };
434 checkIterator(iter, data, LENGTHOF(data));
435 // Reset, and we should get the same result.
436 logln("after iter.reset()");
437 checkIterator(iter.reset(), data, LENGTHOF(data));
438 }
439
440 void BytesTrieTest::TestIteratorFromLinearMatch() {
441 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
442 if(trie.isNull()) {
443 return; // buildTrie() reported an error
444 }
445 // Go into a linear-match node.
446 trie->next('j');
447 trie->next('a');
448 trie->next('n');
449 trie->next('u');
450 trie->next('a');
451 IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
452 BytesTrie::Iterator iter(*trie, 0, errorCode);
453 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
454 return;
455 }
456 // Expected data: Same as in buildMonthsTrie(), except only the suffixes
457 // following "janua".
458 static const StringAndValue data[]={
459 { "r", 1 },
460 { "ry", 1 }
461 };
462 checkIterator(iter, data, LENGTHOF(data));
463 // Reset, and we should get the same result.
464 logln("after iter.reset()");
465 checkIterator(iter.reset(), data, LENGTHOF(data));
466 }
467
468 void BytesTrieTest::TestTruncatingIteratorFromRoot() {
469 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
470 if(trie.isNull()) {
471 return; // buildTrie() reported an error
472 }
473 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
474 BytesTrie::Iterator iter(*trie, 4, errorCode);
475 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
476 return;
477 }
478 // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
479 // of each string, and no string duplicates from the truncation.
480 static const StringAndValue data[]={
481 { "augu", -1 },
482 { "jan", 1 },
483 { "jan.", 1 },
484 { "jana", 1 },
485 { "janb", -1 },
486 { "janc", 1 },
487 { "jand", -1 },
488 { "jane", -1 },
489 { "janf", 1 },
490 { "jang", -1 },
491 { "janh", 1 },
492 { "jani", -1 },
493 { "janj", 1 },
494 { "jank", -1 },
495 { "janl", 1 },
496 { "janm", 1 },
497 { "jann", -1 },
498 { "jano", 1 },
499 { "janp", -1 },
500 { "janq", -1 },
501 { "janr", 1 },
502 { "janu", -1 },
503 { "july", 7 },
504 { "jun", 6 },
505 { "jun.", 6 },
506 { "june", 6 }
507 };
508 checkIterator(iter, data, LENGTHOF(data));
509 // Reset, and we should get the same result.
510 logln("after iter.reset()");
511 checkIterator(iter.reset(), data, LENGTHOF(data));
512 }
513
514 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
515 static const StringAndValue data[]={
516 { "abcdef", 10 },
517 { "abcdepq", 200 },
518 { "abcdeyz", 3000 }
519 };
520 LocalPointer<BytesTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
521 if(trie.isNull()) {
522 return; // buildTrie() reported an error
523 }
524 // Go into a linear-match node.
525 trie->next('a');
526 trie->next('b');
527 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
528 // Truncate within the linear-match node.
529 BytesTrie::Iterator iter(*trie, 2, errorCode);
530 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
531 return;
532 }
533 static const StringAndValue expected[]={
534 { "cd", -1 }
535 };
536 checkIterator(iter, expected, LENGTHOF(expected));
537 // Reset, and we should get the same result.
538 logln("after iter.reset()");
539 checkIterator(iter.reset(), expected, LENGTHOF(expected));
540 }
541
542 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
543 static const StringAndValue data[]={
544 { "abcdef", 10 },
545 { "abcdepq", 200 },
546 { "abcdeyz", 3000 }
547 };
548 LocalPointer<BytesTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
549 if(trie.isNull()) {
550 return; // buildTrie() reported an error
551 }
552 // Go into a linear-match node.
553 trie->next('a');
554 trie->next('b');
555 trie->next('c');
556 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
557 // Truncate after the linear-match node.
558 BytesTrie::Iterator iter(*trie, 3, errorCode);
559 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
560 return;
561 }
562 static const StringAndValue expected[]={
563 { "def", 10 },
564 { "dep", -1 },
565 { "dey", -1 }
566 };
567 checkIterator(iter, expected, LENGTHOF(expected));
568 // Reset, and we should get the same result.
569 logln("after iter.reset()");
570 checkIterator(iter.reset(), expected, LENGTHOF(expected));
571 }
572
573 void BytesTrieTest::TestIteratorFromBytes() {
574 static const StringAndValue data[]={
575 { "mm", 3 },
576 { "mmm", 33 },
577 { "mmnop", 333 }
578 };
579 builder_->clear();
580 IcuTestErrorCode errorCode(*this, "TestIteratorFromBytes()");
581 for(int32_t i=0; i<LENGTHOF(data); ++i) {
582 builder_->add(data[i].s, data[i].value, errorCode);
583 }
584 StringPiece trieBytes=builder_->buildStringPiece(USTRINGTRIE_BUILD_FAST, errorCode);
585 BytesTrie::Iterator iter(trieBytes.data(), 0, errorCode);
586 checkIterator(iter, data, LENGTHOF(data));
587 }
588
589 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
590 logln("checkData(dataLength=%d, fast)", (int)dataLength);
591 checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
592 logln("checkData(dataLength=%d, small)", (int)dataLength);
593 checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
594 }
595
596 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
597 LocalPointer<BytesTrie> trie(buildTrie(data, dataLength, buildOption));
598 if(trie.isNull()) {
599 return; // buildTrie() reported an error
600 }
601 checkFirst(*trie, data, dataLength);
602 checkNext(*trie, data, dataLength);
603 checkNextWithState(*trie, data, dataLength);
604 checkNextString(*trie, data, dataLength);
605 checkIterator(*trie, data, dataLength);
606 }
607
608 BytesTrie *BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
609 UStringTrieBuildOption buildOption) {
610 IcuTestErrorCode errorCode(*this, "buildTrie()");
611 // Add the items to the trie builder in an interesting (not trivial, not random) order.
612 int32_t index, step;
613 if(dataLength&1) {
614 // Odd number of items.
615 index=dataLength/2;
616 step=2;
617 } else if((dataLength%3)!=0) {
618 // Not a multiple of 3.
619 index=dataLength/5;
620 step=3;
621 } else {
622 index=dataLength-1;
623 step=-1;
624 }
625 builder_->clear();
626 for(int32_t i=0; i<dataLength; ++i) {
627 builder_->add(data[index].s, data[index].value, errorCode);
628 index=(index+step)%dataLength;
629 }
630 StringPiece sp=builder_->buildStringPiece(buildOption, errorCode);
631 LocalPointer<BytesTrie> trie(builder_->build(buildOption, errorCode));
632 if(!errorCode.logIfFailureAndReset("add()/build()")) {
633 builder_->add("zzz", 999, errorCode);
634 if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
635 errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
636 }
637 }
638 logln("serialized trie size: %ld bytes\n", (long)sp.length());
639 StringPiece sp2=builder_->buildStringPiece(buildOption, errorCode);
640 if(sp.data()==sp2.data()) {
641 errln("builder.buildStringPiece() before & after build() returned same array");
642 }
643 if(errorCode.isFailure()) {
644 return NULL;
645 }
646 // Tries from either build() method should be identical but
647 // BytesTrie does not implement equals().
648 // We just return either one.
649 if((dataLength&1)!=0) {
650 return trie.orphan();
651 } else {
652 return new BytesTrie(sp2.data());
653 }
654 }
655
656 void BytesTrieTest::checkFirst(BytesTrie &trie,
657 const StringAndValue data[], int32_t dataLength) {
658 for(int32_t i=0; i<dataLength; ++i) {
659 int c=*data[i].s;
660 if(c==0) {
661 continue; // skip empty string
662 }
663 UStringTrieResult firstResult=trie.first(c);
664 int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
665 UStringTrieResult nextResult=trie.next(data[i].s[1]);
666 if(firstResult!=trie.reset().next(c) ||
667 firstResult!=trie.current() ||
668 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
669 nextResult!=trie.next(data[i].s[1])
670 ) {
671 errln("trie.first(%c)!=trie.reset().next(same) for %s",
672 c, data[i].s);
673 }
674 }
675 trie.reset();
676 }
677
678 void BytesTrieTest::checkNext(BytesTrie &trie,
679 const StringAndValue data[], int32_t dataLength) {
680 BytesTrie::State state;
681 for(int32_t i=0; i<dataLength; ++i) {
682 int32_t stringLength= (i&1) ? -1 : strlen(data[i].s);
683 UStringTrieResult result;
684 if( !USTRINGTRIE_HAS_VALUE(result=trie.next(data[i].s, stringLength)) ||
685 result!=trie.current()
686 ) {
687 errln("trie does not seem to contain %s", data[i].s);
688 } else if(trie.getValue()!=data[i].value) {
689 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
690 data[i].s,
691 (long)trie.getValue(), (long)trie.getValue(),
692 (long)data[i].value, (long)data[i].value);
693 } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
694 errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
695 }
696 trie.reset();
697 stringLength=strlen(data[i].s);
698 result=trie.current();
699 for(int32_t j=0; j<stringLength; ++j) {
700 if(!USTRINGTRIE_HAS_NEXT(result)) {
701 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
702 break;
703 }
704 if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
705 trie.getValue();
706 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
707 errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
708 break;
709 }
710 }
711 result=trie.next(data[i].s[j]);
712 if(!USTRINGTRIE_MATCHES(result)) {
713 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
714 break;
715 }
716 if(result!=trie.current()) {
717 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
718 break;
719 }
720 }
721 if(!USTRINGTRIE_HAS_VALUE(result)) {
722 errln("trie.next()!=hasValue at the end of %s", data[i].s);
723 continue;
724 }
725 trie.getValue();
726 if(result!=trie.current()) {
727 errln("trie.current() != current()+getValue()+current() after end of %s",
728 data[i].s);
729 }
730 // Compare the final current() with whether next() can actually continue.
731 trie.saveState(state);
732 UBool nextContinues=FALSE;
733 // Try all graphic characters; we only use those in test strings in this file.
734 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
735 const int32_t minChar=0x20;
736 const int32_t maxChar=0x7e;
737 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
738 const int32_t minChar=0x40;
739 const int32_t maxChar=0xfe;
740 #else
741 const int32_t minChar=0;
742 const int32_t maxChar=0xff;
743 #endif
744 for(int32_t c=minChar; c<=maxChar; ++c) {
745 if(trie.resetToState(state).next(c)) {
746 nextContinues=TRUE;
747 break;
748 }
749 }
750 if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
751 errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
752 "(trie.next(some byte)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
753 }
754 trie.reset();
755 }
756 }
757
758 void BytesTrieTest::checkNextWithState(BytesTrie &trie,
759 const StringAndValue data[], int32_t dataLength) {
760 BytesTrie::State noState, state;
761 for(int32_t i=0; i<dataLength; ++i) {
762 if((i&1)==0) {
763 // This should have no effect.
764 trie.resetToState(noState);
765 }
766 const char *expectedString=data[i].s;
767 int32_t stringLength=strlen(expectedString);
768 int32_t partialLength=stringLength/3;
769 for(int32_t j=0; j<partialLength; ++j) {
770 if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
771 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
772 return;
773 }
774 }
775 trie.saveState(state);
776 UStringTrieResult resultAtState=trie.current();
777 UStringTrieResult result;
778 int32_t valueAtState=-99;
779 if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
780 valueAtState=trie.getValue();
781 }
782 result=trie.next(0); // mismatch
783 if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
784 errln("trie.next(0) matched after part of %s", data[i].s);
785 }
786 if( resultAtState!=trie.resetToState(state).current() ||
787 (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
788 ) {
789 errln("trie.next(part of %s) changes current()/getValue() after "
790 "saveState/next(0)/resetToState",
791 data[i].s);
792 } else if(!USTRINGTRIE_HAS_VALUE(
793 result=trie.next(expectedString+partialLength,
794 stringLength-partialLength)) ||
795 result!=trie.current()) {
796 errln("trie.next(rest of %s) does not seem to contain %s after "
797 "saveState/next(0)/resetToState",
798 data[i].s, data[i].s);
799 } else if(!USTRINGTRIE_HAS_VALUE(
800 result=trie.resetToState(state).
801 next(expectedString+partialLength,
802 stringLength-partialLength)) ||
803 result!=trie.current()) {
804 errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
805 data[i].s);
806 } else if(trie.getValue()!=data[i].value) {
807 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
808 data[i].s,
809 (long)trie.getValue(), (long)trie.getValue(),
810 (long)data[i].value, (long)data[i].value);
811 }
812 trie.reset();
813 }
814 }
815
816 // next(string) is also tested in other functions,
817 // but here we try to go partway through the string, and then beyond it.
818 void BytesTrieTest::checkNextString(BytesTrie &trie,
819 const StringAndValue data[], int32_t dataLength) {
820 for(int32_t i=0; i<dataLength; ++i) {
821 const char *expectedString=data[i].s;
822 int32_t stringLength=strlen(expectedString);
823 if(!trie.next(expectedString, stringLength/2)) {
824 errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
825 continue;
826 }
827 // Test that we stop properly at the end of the string.
828 if(trie.next(expectedString+stringLength/2, stringLength+1-stringLength/2)) {
829 errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
830 }
831 trie.reset();
832 }
833 }
834
835 void BytesTrieTest::checkIterator(const BytesTrie &trie,
836 const StringAndValue data[], int32_t dataLength) {
837 IcuTestErrorCode errorCode(*this, "checkIterator()");
838 BytesTrie::Iterator iter(trie, 0, errorCode);
839 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
840 return;
841 }
842 checkIterator(iter, data, dataLength);
843 }
844
845 void BytesTrieTest::checkIterator(BytesTrie::Iterator &iter,
846 const StringAndValue data[], int32_t dataLength) {
847 IcuTestErrorCode errorCode(*this, "checkIterator()");
848 for(int32_t i=0; i<dataLength; ++i) {
849 if(!iter.hasNext()) {
850 errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
851 break;
852 }
853 UBool hasNext=iter.next(errorCode);
854 if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
855 break;
856 }
857 if(!hasNext) {
858 errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
859 break;
860 }
861 if(iter.getString()!=StringPiece(data[i].s)) {
862 errln("trie iterator next().getString()=%s but expected %s for item %d",
863 iter.getString().data(), data[i].s, (int)i);
864 }
865 if(iter.getValue()!=data[i].value) {
866 errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
867 (long)iter.getValue(), (long)iter.getValue(),
868 (long)data[i].value, (long)data[i].value,
869 (int)i, data[i].s);
870 }
871 }
872 if(iter.hasNext()) {
873 errln("trie iterator hasNext()=TRUE after all items");
874 }
875 UBool hasNext=iter.next(errorCode);
876 errorCode.logIfFailureAndReset("trie iterator next() after all items");
877 if(hasNext) {
878 errln("trie iterator next()=TRUE after all items");
879 }
880 }