]>
Commit | Line | Data |
---|---|---|
1 | // © 2016 and later: Unicode, Inc. and others. | |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | /* | |
4 | ******************************************************************************* | |
5 | * Copyright (C) 2010-2014, International Business Machines | |
6 | * Corporation and others. All Rights Reserved. | |
7 | ******************************************************************************* | |
8 | * file name: ucharstrietest.cpp | |
9 | * encoding: UTF-8 | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2010nov16 | |
14 | * created by: Markus W. Scherer | |
15 | */ | |
16 | ||
17 | #include <string.h> | |
18 | ||
19 | #include "unicode/utypes.h" | |
20 | #include "unicode/appendable.h" | |
21 | #include "unicode/localpointer.h" | |
22 | #include "unicode/ucharstrie.h" | |
23 | #include "unicode/ucharstriebuilder.h" | |
24 | #include "unicode/uniset.h" | |
25 | #include "unicode/unistr.h" | |
26 | #include "intltest.h" | |
27 | #include "cmemory.h" | |
28 | ||
29 | struct StringAndValue { | |
30 | const char *s; | |
31 | int32_t value; | |
32 | }; | |
33 | ||
34 | class UCharsTrieTest : public IntlTest { | |
35 | public: | |
36 | UCharsTrieTest(); | |
37 | virtual ~UCharsTrieTest(); | |
38 | ||
39 | void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); | |
40 | void TestBuilder(); | |
41 | void TestEmpty(); | |
42 | void Test_a(); | |
43 | void Test_a_ab(); | |
44 | void TestShortestBranch(); | |
45 | void TestBranches(); | |
46 | void TestLongSequence(); | |
47 | void TestLongBranch(); | |
48 | void TestValuesForState(); | |
49 | void TestCompact(); | |
50 | void TestFirstForCodePoint(); | |
51 | void TestNextForCodePoint(); | |
52 | ||
53 | UCharsTrie *buildLargeTrie(int32_t numUniqueFirst); | |
54 | void TestLargeTrie(); | |
55 | ||
56 | UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption); | |
57 | void TestHasUniqueValue(); | |
58 | void TestGetNextUChars(); | |
59 | void TestIteratorFromBranch(); | |
60 | void TestIteratorFromLinearMatch(); | |
61 | void TestTruncatingIteratorFromRoot(); | |
62 | void TestTruncatingIteratorFromLinearMatchShort(); | |
63 | void TestTruncatingIteratorFromLinearMatchLong(); | |
64 | void TestIteratorFromUChars(); | |
65 | ||
66 | void checkData(const StringAndValue data[], int32_t dataLength); | |
67 | void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption); | |
68 | UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength, | |
69 | UStringTrieBuildOption buildOption); | |
70 | void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); | |
71 | void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); | |
72 | void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); | |
73 | void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); | |
74 | void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); | |
75 | void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength); | |
76 | ||
77 | private: | |
78 | UCharsTrieBuilder *builder_; | |
79 | }; | |
80 | ||
81 | extern IntlTest *createUCharsTrieTest() { | |
82 | return new UCharsTrieTest(); | |
83 | } | |
84 | ||
85 | UCharsTrieTest::UCharsTrieTest() : builder_(NULL) { | |
86 | IcuTestErrorCode errorCode(*this, "UCharsTrieTest()"); | |
87 | builder_=new UCharsTrieBuilder(errorCode); | |
88 | } | |
89 | ||
90 | UCharsTrieTest::~UCharsTrieTest() { | |
91 | delete builder_; | |
92 | } | |
93 | ||
94 | void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { | |
95 | if(exec) { | |
96 | logln("TestSuite UCharsTrieTest: "); | |
97 | } | |
98 | TESTCASE_AUTO_BEGIN; | |
99 | TESTCASE_AUTO(TestBuilder); | |
100 | TESTCASE_AUTO(TestEmpty); | |
101 | TESTCASE_AUTO(Test_a); | |
102 | TESTCASE_AUTO(Test_a_ab); | |
103 | TESTCASE_AUTO(TestShortestBranch); | |
104 | TESTCASE_AUTO(TestBranches); | |
105 | TESTCASE_AUTO(TestLongSequence); | |
106 | TESTCASE_AUTO(TestLongBranch); | |
107 | TESTCASE_AUTO(TestValuesForState); | |
108 | TESTCASE_AUTO(TestCompact); | |
109 | TESTCASE_AUTO(TestFirstForCodePoint); | |
110 | TESTCASE_AUTO(TestNextForCodePoint); | |
111 | TESTCASE_AUTO(TestLargeTrie); | |
112 | TESTCASE_AUTO(TestHasUniqueValue); | |
113 | TESTCASE_AUTO(TestGetNextUChars); | |
114 | TESTCASE_AUTO(TestIteratorFromBranch); | |
115 | TESTCASE_AUTO(TestIteratorFromLinearMatch); | |
116 | TESTCASE_AUTO(TestTruncatingIteratorFromRoot); | |
117 | TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort); | |
118 | TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong); | |
119 | TESTCASE_AUTO(TestIteratorFromUChars); | |
120 | TESTCASE_AUTO_END; | |
121 | } | |
122 | ||
123 | void UCharsTrieTest::TestBuilder() { | |
124 | IcuTestErrorCode errorCode(*this, "TestBuilder()"); | |
125 | delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode); | |
126 | if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) { | |
127 | errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR"); | |
128 | return; | |
129 | } | |
130 | // TODO: remove .build(...) once add() checks for duplicates. | |
131 | builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode); | |
132 | if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) { | |
133 | errln("UCharsTrieBuilder.add() did not detect duplicates"); | |
134 | return; | |
135 | } | |
136 | } | |
137 | ||
138 | void UCharsTrieTest::TestEmpty() { | |
139 | static const StringAndValue data[]={ | |
140 | { "", 0 } | |
141 | }; | |
142 | checkData(data, UPRV_LENGTHOF(data)); | |
143 | } | |
144 | ||
145 | void UCharsTrieTest::Test_a() { | |
146 | static const StringAndValue data[]={ | |
147 | { "a", 1 } | |
148 | }; | |
149 | checkData(data, UPRV_LENGTHOF(data)); | |
150 | } | |
151 | ||
152 | void UCharsTrieTest::Test_a_ab() { | |
153 | static const StringAndValue data[]={ | |
154 | { "a", 1 }, | |
155 | { "ab", 100 } | |
156 | }; | |
157 | checkData(data, UPRV_LENGTHOF(data)); | |
158 | } | |
159 | ||
160 | void UCharsTrieTest::TestShortestBranch() { | |
161 | static const StringAndValue data[]={ | |
162 | { "a", 1000 }, | |
163 | { "b", 2000 } | |
164 | }; | |
165 | checkData(data, UPRV_LENGTHOF(data)); | |
166 | } | |
167 | ||
168 | void UCharsTrieTest::TestBranches() { | |
169 | static const StringAndValue data[]={ | |
170 | { "a", 0x10 }, | |
171 | { "cc", 0x40 }, | |
172 | { "e", 0x100 }, | |
173 | { "ggg", 0x400 }, | |
174 | { "i", 0x1000 }, | |
175 | { "kkkk", 0x4000 }, | |
176 | { "n", 0x10000 }, | |
177 | { "ppppp", 0x40000 }, | |
178 | { "r", 0x100000 }, | |
179 | { "sss", 0x200000 }, | |
180 | { "t", 0x400000 }, | |
181 | { "uu", 0x800000 }, | |
182 | { "vv", 0x7fffffff }, | |
183 | { "zz", (int32_t)0x80000000 } | |
184 | }; | |
185 | for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) { | |
186 | logln("TestBranches length=%d", (int)length); | |
187 | checkData(data, length); | |
188 | } | |
189 | } | |
190 | ||
191 | void UCharsTrieTest::TestLongSequence() { | |
192 | static const StringAndValue data[]={ | |
193 | { "a", -1 }, | |
194 | // sequence of linear-match nodes | |
195 | { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 }, | |
196 | // more than 256 units | |
197 | { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
198 | "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
199 | "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
200 | "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
201 | "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
202 | "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 } | |
203 | }; | |
204 | checkData(data, UPRV_LENGTHOF(data)); | |
205 | } | |
206 | ||
207 | void UCharsTrieTest::TestLongBranch() { | |
208 | // Split-branch and interesting compact-integer values. | |
209 | static const StringAndValue data[]={ | |
210 | { "a", -2 }, | |
211 | { "b", -1 }, | |
212 | { "c", 0 }, | |
213 | { "d2", 1 }, | |
214 | { "f", 0x3f }, | |
215 | { "g", 0x40 }, | |
216 | { "h", 0x41 }, | |
217 | { "j23", 0x1900 }, | |
218 | { "j24", 0x19ff }, | |
219 | { "j25", 0x1a00 }, | |
220 | { "k2", 0x1a80 }, | |
221 | { "k3", 0x1aff }, | |
222 | { "l234567890", 0x1b00 }, | |
223 | { "l234567890123", 0x1b01 }, | |
224 | { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff }, | |
225 | { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 }, | |
226 | { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 }, | |
227 | { "r", 0x333333 }, | |
228 | { "s2345", 0x4444444 }, | |
229 | { "t234567890", 0x77777777 }, | |
230 | { "z", (int32_t)0x80000001 } | |
231 | }; | |
232 | checkData(data, UPRV_LENGTHOF(data)); | |
233 | } | |
234 | ||
235 | void UCharsTrieTest::TestValuesForState() { | |
236 | // Check that saveState() and resetToState() interact properly | |
237 | // with next() and current(). | |
238 | static const StringAndValue data[]={ | |
239 | { "a", -1 }, | |
240 | { "ab", -2 }, | |
241 | { "abc", -3 }, | |
242 | { "abcd", -4 }, | |
243 | { "abcde", -5 }, | |
244 | { "abcdef", -6 } | |
245 | }; | |
246 | checkData(data, UPRV_LENGTHOF(data)); | |
247 | } | |
248 | ||
249 | void UCharsTrieTest::TestCompact() { | |
250 | // Duplicate trailing strings and values provide opportunities for compacting. | |
251 | static const StringAndValue data[]={ | |
252 | { "+", 0 }, | |
253 | { "+august", 8 }, | |
254 | { "+december", 12 }, | |
255 | { "+july", 7 }, | |
256 | { "+june", 6 }, | |
257 | { "+november", 11 }, | |
258 | { "+october", 10 }, | |
259 | { "+september", 9 }, | |
260 | { "-", 0 }, | |
261 | { "-august", 8 }, | |
262 | { "-december", 12 }, | |
263 | { "-july", 7 }, | |
264 | { "-june", 6 }, | |
265 | { "-november", 11 }, | |
266 | { "-october", 10 }, | |
267 | { "-september", 9 }, | |
268 | // The l+n branch (with its sub-nodes) is a duplicate but will be written | |
269 | // both times because each time it follows a different linear-match node. | |
270 | { "xjuly", 7 }, | |
271 | { "xjune", 6 } | |
272 | }; | |
273 | checkData(data, UPRV_LENGTHOF(data)); | |
274 | } | |
275 | ||
276 | void UCharsTrieTest::TestFirstForCodePoint() { | |
277 | static const StringAndValue data[]={ | |
278 | { "a", 1 }, | |
279 | { "a\\ud800", 2 }, | |
280 | { "a\\U00010000", 3 }, | |
281 | { "\\ud840", 4 }, | |
282 | { "\\U00020000\\udbff", 5 }, | |
283 | { "\\U00020000\\U0010ffff", 6 }, | |
284 | { "\\U00020000\\U0010ffffz", 7 }, | |
285 | { "\\U00050000xy", 8 }, | |
286 | { "\\U00050000xyz", 9 } | |
287 | }; | |
288 | checkData(data, UPRV_LENGTHOF(data)); | |
289 | } | |
290 | ||
291 | void UCharsTrieTest::TestNextForCodePoint() { | |
292 | static const StringAndValue data[]={ | |
293 | { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 }, | |
294 | { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 }, | |
295 | { "\\u4dff\\U000103ff", 99999 } | |
296 | }; | |
297 | LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); | |
298 | if(trie.isNull()) { | |
299 | return; // buildTrie() reported an error | |
300 | } | |
301 | UStringTrieResult result; | |
302 | if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
303 | (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
304 | (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
305 | (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
306 | (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
307 | (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || | |
308 | trie->getValue()!=2000000000 | |
309 | ) { | |
310 | errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s); | |
311 | } | |
312 | if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
313 | (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
314 | (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
315 | (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || | |
316 | trie->getValue()!=44444 | |
317 | ) { | |
318 | errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s); | |
319 | } | |
320 | if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
321 | (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
322 | (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
323 | (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current() // no match for trail surrogate | |
324 | ) { | |
325 | errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222"); | |
326 | } | |
327 | if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
328 | (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || | |
329 | trie->getValue()!=99999 | |
330 | ) { | |
331 | errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s); | |
332 | } | |
333 | } | |
334 | ||
335 | // Definitions in the anonymous namespace are invisible outside this file. | |
336 | namespace { | |
337 | ||
338 | // Generate (string, value) pairs. | |
339 | // The first string (before next()) will be empty. | |
340 | class Generator { | |
341 | public: | |
342 | Generator() : value(4711), num(0) {} | |
343 | void next() { | |
344 | UChar c; | |
345 | s.truncate(0); | |
346 | s.append(c=(UChar)(value>>16)); | |
347 | s.append((UChar)(value>>4)); | |
348 | if(value&1) { | |
349 | s.append((UChar)value); | |
350 | } | |
351 | set.add(c); | |
352 | value+=((value>>5)&0x7ff)*3+1; | |
353 | ++num; | |
354 | } | |
355 | const UnicodeString &getString() const { return s; } | |
356 | int32_t getValue() const { return value; } | |
357 | int32_t countUniqueFirstChars() const { return set.size(); } | |
358 | int32_t getIndex() const { return num; } | |
359 | ||
360 | private: | |
361 | UnicodeString s; | |
362 | UnicodeSet set; | |
363 | int32_t value; | |
364 | int32_t num; | |
365 | }; | |
366 | ||
367 | } // end namespace | |
368 | ||
369 | UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) { | |
370 | IcuTestErrorCode errorCode(*this, "buildLargeTrie()"); | |
371 | Generator gen; | |
372 | builder_->clear(); | |
373 | while(gen.countUniqueFirstChars()<numUniqueFirst) { | |
374 | builder_->add(gen.getString(), gen.getValue(), errorCode); | |
375 | gen.next(); | |
376 | } | |
377 | logln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex()); | |
378 | UnicodeString trieUChars; | |
379 | builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); | |
380 | logln("serialized trie size: %ld UChars\n", (long)trieUChars.length()); | |
381 | return new UCharsTrie(trieUChars.getBuffer()); | |
382 | } | |
383 | ||
384 | // Exercise a large branch node. | |
385 | void UCharsTrieTest::TestLargeTrie() { | |
386 | LocalPointer<UCharsTrie> trie(buildLargeTrie(1111)); | |
387 | if(trie.isNull()) { | |
388 | return; // buildTrie() reported an error | |
389 | } | |
390 | Generator gen; | |
391 | while(gen.countUniqueFirstChars()<1111) { | |
392 | UnicodeString x(gen.getString()); | |
393 | int32_t value=gen.getValue(); | |
394 | if(!x.isEmpty()) { | |
395 | if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) { | |
396 | errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n", | |
397 | x[0], (long)gen.getIndex()); | |
398 | break; | |
399 | } | |
400 | x.remove(0, 1); | |
401 | } | |
402 | UStringTrieResult result=trie->next(x.getBuffer(), x.length()); | |
403 | if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) { | |
404 | errln("next(%d chars U+%04X U+%04X)!=hasValue or " | |
405 | "next()!=current() or getValue() wrong " | |
406 | "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex()); | |
407 | break; | |
408 | } | |
409 | gen.next(); | |
410 | } | |
411 | } | |
412 | ||
413 | enum { | |
414 | u_a=0x61, | |
415 | u_b=0x62, | |
416 | u_c=0x63, | |
417 | u_j=0x6a, | |
418 | u_n=0x6e, | |
419 | u_r=0x72, | |
420 | u_u=0x75, | |
421 | u_y=0x79 | |
422 | }; | |
423 | ||
424 | UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) { | |
425 | // All types of nodes leading to the same value, | |
426 | // for code coverage of recursive functions. | |
427 | // In particular, we need a lot of branches on some single level | |
428 | // to exercise a split-branch node. | |
429 | static const StringAndValue data[]={ | |
430 | { "august", 8 }, | |
431 | { "jan", 1 }, | |
432 | { "jan.", 1 }, | |
433 | { "jana", 1 }, | |
434 | { "janbb", 1 }, | |
435 | { "janc", 1 }, | |
436 | { "janddd", 1 }, | |
437 | { "janee", 1 }, | |
438 | { "janef", 1 }, | |
439 | { "janf", 1 }, | |
440 | { "jangg", 1 }, | |
441 | { "janh", 1 }, | |
442 | { "janiiii", 1 }, | |
443 | { "janj", 1 }, | |
444 | { "jankk", 1 }, | |
445 | { "jankl", 1 }, | |
446 | { "jankmm", 1 }, | |
447 | { "janl", 1 }, | |
448 | { "janm", 1 }, | |
449 | { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, | |
450 | { "jano", 1 }, | |
451 | { "janpp", 1 }, | |
452 | { "janqqq", 1 }, | |
453 | { "janr", 1 }, | |
454 | { "januar", 1 }, | |
455 | { "january", 1 }, | |
456 | { "july", 7 }, | |
457 | { "jun", 6 }, | |
458 | { "jun.", 6 }, | |
459 | { "june", 6 } | |
460 | }; | |
461 | return buildTrie(data, UPRV_LENGTHOF(data), buildOption); | |
462 | } | |
463 | ||
464 | void UCharsTrieTest::TestHasUniqueValue() { | |
465 | LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); | |
466 | if(trie.isNull()) { | |
467 | return; // buildTrie() reported an error | |
468 | } | |
469 | int32_t uniqueValue; | |
470 | if(trie->hasUniqueValue(uniqueValue)) { | |
471 | errln("unique value at root"); | |
472 | } | |
473 | trie->next(u_j); | |
474 | trie->next(u_a); | |
475 | trie->next(u_n); | |
476 | // hasUniqueValue() directly after next() | |
477 | if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) { | |
478 | errln("not unique value 1 after \"jan\""); | |
479 | } | |
480 | trie->first(u_j); | |
481 | trie->next(u_u); | |
482 | if(trie->hasUniqueValue(uniqueValue)) { | |
483 | errln("unique value after \"ju\""); | |
484 | } | |
485 | if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) { | |
486 | errln("not normal value 6 after \"jun\""); | |
487 | } | |
488 | // hasUniqueValue() after getValue() | |
489 | if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) { | |
490 | errln("not unique value 6 after \"jun\""); | |
491 | } | |
492 | // hasUniqueValue() from within a linear-match node | |
493 | trie->first(u_a); | |
494 | trie->next(u_u); | |
495 | if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) { | |
496 | errln("not unique value 8 after \"au\""); | |
497 | } | |
498 | } | |
499 | ||
500 | void UCharsTrieTest::TestGetNextUChars() { | |
501 | LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); | |
502 | if(trie.isNull()) { | |
503 | return; // buildTrie() reported an error | |
504 | } | |
505 | UnicodeString buffer; | |
506 | UnicodeStringAppendable app(buffer); | |
507 | int32_t count=trie->getNextUChars(app); | |
508 | if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) { | |
509 | errln("months getNextUChars()!=[aj] at root"); | |
510 | } | |
511 | trie->next(u_j); | |
512 | trie->next(u_a); | |
513 | trie->next(u_n); | |
514 | // getNextUChars() directly after next() | |
515 | buffer.remove(); | |
516 | count=trie->getNextUChars(app); | |
517 | if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) { | |
518 | errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\""); | |
519 | } | |
520 | // getNextUChars() after getValue() | |
521 | trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE. | |
522 | buffer.remove(); | |
523 | count=trie->getNextUChars(app); | |
524 | if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) { | |
525 | errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()"); | |
526 | } | |
527 | // getNextUChars() from a linear-match node | |
528 | trie->next(u_u); | |
529 | buffer.remove(); | |
530 | count=trie->getNextUChars(app); | |
531 | if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) { | |
532 | errln("months getNextUChars()!=[a] after \"janu\""); | |
533 | } | |
534 | trie->next(u_a); | |
535 | buffer.remove(); | |
536 | count=trie->getNextUChars(app); | |
537 | if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) { | |
538 | errln("months getNextUChars()!=[r] after \"janua\""); | |
539 | } | |
540 | trie->next(u_r); | |
541 | trie->next(u_y); | |
542 | // getNextUChars() after a final match | |
543 | buffer.remove(); | |
544 | count=trie->getNextUChars(app); | |
545 | if(count!=0 || buffer.length()!=0) { | |
546 | errln("months getNextUChars()!=[] after \"january\""); | |
547 | } | |
548 | } | |
549 | ||
550 | void UCharsTrieTest::TestIteratorFromBranch() { | |
551 | LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); | |
552 | if(trie.isNull()) { | |
553 | return; // buildTrie() reported an error | |
554 | } | |
555 | // Go to a branch node. | |
556 | trie->next(u_j); | |
557 | trie->next(u_a); | |
558 | trie->next(u_n); | |
559 | IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()"); | |
560 | UCharsTrie::Iterator iter(*trie, 0, errorCode); | |
561 | if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { | |
562 | return; | |
563 | } | |
564 | // Expected data: Same as in buildMonthsTrie(), except only the suffixes | |
565 | // following "jan". | |
566 | static const StringAndValue data[]={ | |
567 | { "", 1 }, | |
568 | { ".", 1 }, | |
569 | { "a", 1 }, | |
570 | { "bb", 1 }, | |
571 | { "c", 1 }, | |
572 | { "ddd", 1 }, | |
573 | { "ee", 1 }, | |
574 | { "ef", 1 }, | |
575 | { "f", 1 }, | |
576 | { "gg", 1 }, | |
577 | { "h", 1 }, | |
578 | { "iiii", 1 }, | |
579 | { "j", 1 }, | |
580 | { "kk", 1 }, | |
581 | { "kl", 1 }, | |
582 | { "kmm", 1 }, | |
583 | { "l", 1 }, | |
584 | { "m", 1 }, | |
585 | { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, | |
586 | { "o", 1 }, | |
587 | { "pp", 1 }, | |
588 | { "qqq", 1 }, | |
589 | { "r", 1 }, | |
590 | { "uar", 1 }, | |
591 | { "uary", 1 } | |
592 | }; | |
593 | checkIterator(iter, data, UPRV_LENGTHOF(data)); | |
594 | // Reset, and we should get the same result. | |
595 | logln("after iter.reset()"); | |
596 | checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); | |
597 | } | |
598 | ||
599 | void UCharsTrieTest::TestIteratorFromLinearMatch() { | |
600 | LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); | |
601 | if(trie.isNull()) { | |
602 | return; // buildTrie() reported an error | |
603 | } | |
604 | // Go into a linear-match node. | |
605 | trie->next(u_j); | |
606 | trie->next(u_a); | |
607 | trie->next(u_n); | |
608 | trie->next(u_u); | |
609 | trie->next(u_a); | |
610 | IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()"); | |
611 | UCharsTrie::Iterator iter(*trie, 0, errorCode); | |
612 | if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { | |
613 | return; | |
614 | } | |
615 | // Expected data: Same as in buildMonthsTrie(), except only the suffixes | |
616 | // following "janua". | |
617 | static const StringAndValue data[]={ | |
618 | { "r", 1 }, | |
619 | { "ry", 1 } | |
620 | }; | |
621 | checkIterator(iter, data, UPRV_LENGTHOF(data)); | |
622 | // Reset, and we should get the same result. | |
623 | logln("after iter.reset()"); | |
624 | checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); | |
625 | } | |
626 | ||
627 | void UCharsTrieTest::TestTruncatingIteratorFromRoot() { | |
628 | LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); | |
629 | if(trie.isNull()) { | |
630 | return; // buildTrie() reported an error | |
631 | } | |
632 | IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()"); | |
633 | UCharsTrie::Iterator iter(*trie, 4, errorCode); | |
634 | if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { | |
635 | return; | |
636 | } | |
637 | // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters | |
638 | // of each string, and no string duplicates from the truncation. | |
639 | static const StringAndValue data[]={ | |
640 | { "augu", -1 }, | |
641 | { "jan", 1 }, | |
642 | { "jan.", 1 }, | |
643 | { "jana", 1 }, | |
644 | { "janb", -1 }, | |
645 | { "janc", 1 }, | |
646 | { "jand", -1 }, | |
647 | { "jane", -1 }, | |
648 | { "janf", 1 }, | |
649 | { "jang", -1 }, | |
650 | { "janh", 1 }, | |
651 | { "jani", -1 }, | |
652 | { "janj", 1 }, | |
653 | { "jank", -1 }, | |
654 | { "janl", 1 }, | |
655 | { "janm", 1 }, | |
656 | { "jann", -1 }, | |
657 | { "jano", 1 }, | |
658 | { "janp", -1 }, | |
659 | { "janq", -1 }, | |
660 | { "janr", 1 }, | |
661 | { "janu", -1 }, | |
662 | { "july", 7 }, | |
663 | { "jun", 6 }, | |
664 | { "jun.", 6 }, | |
665 | { "june", 6 } | |
666 | }; | |
667 | checkIterator(iter, data, UPRV_LENGTHOF(data)); | |
668 | // Reset, and we should get the same result. | |
669 | logln("after iter.reset()"); | |
670 | checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); | |
671 | } | |
672 | ||
673 | void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() { | |
674 | static const StringAndValue data[]={ | |
675 | { "abcdef", 10 }, | |
676 | { "abcdepq", 200 }, | |
677 | { "abcdeyz", 3000 } | |
678 | }; | |
679 | LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); | |
680 | if(trie.isNull()) { | |
681 | return; // buildTrie() reported an error | |
682 | } | |
683 | // Go into a linear-match node. | |
684 | trie->next(u_a); | |
685 | trie->next(u_b); | |
686 | IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()"); | |
687 | // Truncate within the linear-match node. | |
688 | UCharsTrie::Iterator iter(*trie, 2, errorCode); | |
689 | if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { | |
690 | return; | |
691 | } | |
692 | static const StringAndValue expected[]={ | |
693 | { "cd", -1 } | |
694 | }; | |
695 | checkIterator(iter, expected, UPRV_LENGTHOF(expected)); | |
696 | // Reset, and we should get the same result. | |
697 | logln("after iter.reset()"); | |
698 | checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected)); | |
699 | } | |
700 | ||
701 | void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() { | |
702 | static const StringAndValue data[]={ | |
703 | { "abcdef", 10 }, | |
704 | { "abcdepq", 200 }, | |
705 | { "abcdeyz", 3000 } | |
706 | }; | |
707 | LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); | |
708 | if(trie.isNull()) { | |
709 | return; // buildTrie() reported an error | |
710 | } | |
711 | // Go into a linear-match node. | |
712 | trie->next(u_a); | |
713 | trie->next(u_b); | |
714 | trie->next(u_c); | |
715 | IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()"); | |
716 | // Truncate after the linear-match node. | |
717 | UCharsTrie::Iterator iter(*trie, 3, errorCode); | |
718 | if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { | |
719 | return; | |
720 | } | |
721 | static const StringAndValue expected[]={ | |
722 | { "def", 10 }, | |
723 | { "dep", -1 }, | |
724 | { "dey", -1 } | |
725 | }; | |
726 | checkIterator(iter, expected, UPRV_LENGTHOF(expected)); | |
727 | // Reset, and we should get the same result. | |
728 | logln("after iter.reset()"); | |
729 | checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected)); | |
730 | } | |
731 | ||
732 | void UCharsTrieTest::TestIteratorFromUChars() { | |
733 | static const StringAndValue data[]={ | |
734 | { "mm", 3 }, | |
735 | { "mmm", 33 }, | |
736 | { "mmnop", 333 } | |
737 | }; | |
738 | builder_->clear(); | |
739 | IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()"); | |
740 | for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) { | |
741 | builder_->add(data[i].s, data[i].value, errorCode); | |
742 | } | |
743 | UnicodeString trieUChars; | |
744 | builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); | |
745 | UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode); | |
746 | checkIterator(iter, data, UPRV_LENGTHOF(data)); | |
747 | } | |
748 | ||
749 | void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) { | |
750 | logln("checkData(dataLength=%d, fast)", (int)dataLength); | |
751 | checkData(data, dataLength, USTRINGTRIE_BUILD_FAST); | |
752 | logln("checkData(dataLength=%d, small)", (int)dataLength); | |
753 | checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL); | |
754 | } | |
755 | ||
756 | void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) { | |
757 | LocalPointer<UCharsTrie> trie(buildTrie(data, dataLength, buildOption)); | |
758 | if(trie.isNull()) { | |
759 | return; // buildTrie() reported an error | |
760 | } | |
761 | checkFirst(*trie, data, dataLength); | |
762 | checkNext(*trie, data, dataLength); | |
763 | checkNextWithState(*trie, data, dataLength); | |
764 | checkNextString(*trie, data, dataLength); | |
765 | checkIterator(*trie, data, dataLength); | |
766 | } | |
767 | ||
768 | UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength, | |
769 | UStringTrieBuildOption buildOption) { | |
770 | IcuTestErrorCode errorCode(*this, "buildTrie()"); | |
771 | // Add the items to the trie builder in an interesting (not trivial, not random) order. | |
772 | int32_t index, step; | |
773 | if(dataLength&1) { | |
774 | // Odd number of items. | |
775 | index=dataLength/2; | |
776 | step=2; | |
777 | } else if((dataLength%3)!=0) { | |
778 | // Not a multiple of 3. | |
779 | index=dataLength/5; | |
780 | step=3; | |
781 | } else { | |
782 | index=dataLength-1; | |
783 | step=-1; | |
784 | } | |
785 | builder_->clear(); | |
786 | for(int32_t i=0; i<dataLength; ++i) { | |
787 | builder_->add(UnicodeString(data[index].s, -1, US_INV).unescape(), | |
788 | data[index].value, errorCode); | |
789 | index=(index+step)%dataLength; | |
790 | } | |
791 | UnicodeString trieUChars; | |
792 | builder_->buildUnicodeString(buildOption, trieUChars, errorCode); | |
793 | LocalPointer<UCharsTrie> trie(builder_->build(buildOption, errorCode)); | |
794 | if(!errorCode.logIfFailureAndReset("add()/build()")) { | |
795 | builder_->add("zzz", 999, errorCode); | |
796 | if(errorCode.reset()!=U_NO_WRITE_PERMISSION) { | |
797 | errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION"); | |
798 | } | |
799 | } | |
800 | logln("serialized trie size: %ld UChars\n", (long)trieUChars.length()); | |
801 | UnicodeString trieUChars2; | |
802 | builder_->buildUnicodeString(buildOption, trieUChars2, errorCode); | |
803 | if(trieUChars.getBuffer()==trieUChars2.getBuffer()) { | |
804 | errln("builder.buildUnicodeString() before & after build() returned same array"); | |
805 | } | |
806 | if(errorCode.isFailure()) { | |
807 | return NULL; | |
808 | } | |
809 | // Tries from either build() method should be identical but | |
810 | // UCharsTrie does not implement equals(). | |
811 | // We just return either one. | |
812 | if((dataLength&1)!=0) { | |
813 | return trie.orphan(); | |
814 | } else { | |
815 | return new UCharsTrie(trieUChars2.getBuffer()); | |
816 | } | |
817 | } | |
818 | ||
819 | void UCharsTrieTest::checkFirst(UCharsTrie &trie, | |
820 | const StringAndValue data[], int32_t dataLength) { | |
821 | for(int32_t i=0; i<dataLength; ++i) { | |
822 | if(*data[i].s==0) { | |
823 | continue; // skip empty string | |
824 | } | |
825 | UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); | |
826 | UChar32 c=expectedString[0]; | |
827 | UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0; | |
828 | UStringTrieResult firstResult=trie.first(c); | |
829 | int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1; | |
830 | UStringTrieResult nextResult=trie.next(nextCp); | |
831 | if(firstResult!=trie.reset().next(c) || | |
832 | firstResult!=trie.current() || | |
833 | firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) || | |
834 | nextResult!=trie.next(nextCp) | |
835 | ) { | |
836 | errln("trie.first(U+%04X)!=trie.reset().next(same) for %s", | |
837 | c, data[i].s); | |
838 | } | |
839 | c=expectedString.char32At(0); | |
840 | int32_t cLength=U16_LENGTH(c); | |
841 | nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0; | |
842 | firstResult=trie.firstForCodePoint(c); | |
843 | firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1; | |
844 | nextResult=trie.nextForCodePoint(nextCp); | |
845 | if(firstResult!=trie.reset().nextForCodePoint(c) || | |
846 | firstResult!=trie.current() || | |
847 | firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) || | |
848 | nextResult!=trie.nextForCodePoint(nextCp) | |
849 | ) { | |
850 | errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s", | |
851 | c, data[i].s); | |
852 | } | |
853 | } | |
854 | trie.reset(); | |
855 | } | |
856 | ||
857 | void UCharsTrieTest::checkNext(UCharsTrie &trie, | |
858 | const StringAndValue data[], int32_t dataLength) { | |
859 | UCharsTrie::State state; | |
860 | for(int32_t i=0; i<dataLength; ++i) { | |
861 | UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); | |
862 | int32_t stringLength= (i&1) ? -1 : expectedString.length(); | |
863 | UStringTrieResult result; | |
864 | if( !USTRINGTRIE_HAS_VALUE( | |
865 | result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) || | |
866 | result!=trie.current() | |
867 | ) { | |
868 | errln("trie does not seem to contain %s", data[i].s); | |
869 | } else if(trie.getValue()!=data[i].value) { | |
870 | errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx", | |
871 | data[i].s, | |
872 | (long)trie.getValue(), (long)trie.getValue(), | |
873 | (long)data[i].value, (long)data[i].value); | |
874 | } else if(result!=trie.current() || trie.getValue()!=data[i].value) { | |
875 | errln("trie value for %s changes when repeating current()/getValue()", data[i].s); | |
876 | } | |
877 | trie.reset(); | |
878 | stringLength=expectedString.length(); | |
879 | result=trie.current(); | |
880 | for(int32_t j=0; j<stringLength; ++j) { | |
881 | if(!USTRINGTRIE_HAS_NEXT(result)) { | |
882 | errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j); | |
883 | break; | |
884 | } | |
885 | if(result==USTRINGTRIE_INTERMEDIATE_VALUE) { | |
886 | trie.getValue(); | |
887 | if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) { | |
888 | errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j); | |
889 | break; | |
890 | } | |
891 | } | |
892 | result=trie.next(expectedString[j]); | |
893 | if(!USTRINGTRIE_MATCHES(result)) { | |
894 | errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j); | |
895 | break; | |
896 | } | |
897 | if(result!=trie.current()) { | |
898 | errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j); | |
899 | break; | |
900 | } | |
901 | } | |
902 | if(!USTRINGTRIE_HAS_VALUE(result)) { | |
903 | errln("trie.next()!=hasValue at the end of %s", data[i].s); | |
904 | continue; | |
905 | } | |
906 | trie.getValue(); | |
907 | if(result!=trie.current()) { | |
908 | errln("trie.current() != current()+getValue()+current() after end of %s", | |
909 | data[i].s); | |
910 | } | |
911 | // Compare the final current() with whether next() can actually continue. | |
912 | trie.saveState(state); | |
913 | UBool nextContinues=FALSE; | |
914 | for(int32_t c=0x20; c<0xe000; ++c) { | |
915 | if(c==0x80) { | |
916 | c=0xd800; // Check for ASCII and surrogates but not all of the BMP. | |
917 | } | |
918 | if(trie.resetToState(state).next(c)) { | |
919 | nextContinues=TRUE; | |
920 | break; | |
921 | } | |
922 | } | |
923 | if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) { | |
924 | errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts " | |
925 | "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s); | |
926 | } | |
927 | trie.reset(); | |
928 | } | |
929 | } | |
930 | ||
931 | void UCharsTrieTest::checkNextWithState(UCharsTrie &trie, | |
932 | const StringAndValue data[], int32_t dataLength) { | |
933 | UCharsTrie::State noState, state; | |
934 | for(int32_t i=0; i<dataLength; ++i) { | |
935 | if((i&1)==0) { | |
936 | // This should have no effect. | |
937 | trie.resetToState(noState); | |
938 | } | |
939 | UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); | |
940 | int32_t stringLength=expectedString.length(); | |
941 | int32_t partialLength=stringLength/3; | |
942 | for(int32_t j=0; j<partialLength; ++j) { | |
943 | if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) { | |
944 | errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s); | |
945 | return; | |
946 | } | |
947 | } | |
948 | trie.saveState(state); | |
949 | UStringTrieResult resultAtState=trie.current(); | |
950 | UStringTrieResult result; | |
951 | int32_t valueAtState=-99; | |
952 | if(USTRINGTRIE_HAS_VALUE(resultAtState)) { | |
953 | valueAtState=trie.getValue(); | |
954 | } | |
955 | result=trie.next(0); // mismatch | |
956 | if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) { | |
957 | errln("trie.next(0) matched after part of %s", data[i].s); | |
958 | } | |
959 | if( resultAtState!=trie.resetToState(state).current() || | |
960 | (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue()) | |
961 | ) { | |
962 | errln("trie.next(part of %s) changes current()/getValue() after " | |
963 | "saveState/next(0)/resetToState", | |
964 | data[i].s); | |
965 | } else if(!USTRINGTRIE_HAS_VALUE( | |
966 | result=trie.next(expectedString.getTerminatedBuffer()+partialLength, | |
967 | stringLength-partialLength)) || | |
968 | result!=trie.current()) { | |
969 | errln("trie.next(rest of %s) does not seem to contain %s after " | |
970 | "saveState/next(0)/resetToState", | |
971 | data[i].s, data[i].s); | |
972 | } else if(!USTRINGTRIE_HAS_VALUE( | |
973 | result=trie.resetToState(state). | |
974 | next(expectedString.getTerminatedBuffer()+partialLength, | |
975 | stringLength-partialLength)) || | |
976 | result!=trie.current()) { | |
977 | errln("trie does not seem to contain %s after saveState/next(rest)/resetToState", | |
978 | data[i].s); | |
979 | } else if(trie.getValue()!=data[i].value) { | |
980 | errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx", | |
981 | data[i].s, | |
982 | (long)trie.getValue(), (long)trie.getValue(), | |
983 | (long)data[i].value, (long)data[i].value); | |
984 | } | |
985 | trie.reset(); | |
986 | } | |
987 | } | |
988 | ||
989 | // next(string) is also tested in other functions, | |
990 | // but here we try to go partway through the string, and then beyond it. | |
991 | void UCharsTrieTest::checkNextString(UCharsTrie &trie, | |
992 | const StringAndValue data[], int32_t dataLength) { | |
993 | for(int32_t i=0; i<dataLength; ++i) { | |
994 | UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); | |
995 | int32_t stringLength=expectedString.length(); | |
996 | if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) { | |
997 | errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s); | |
998 | continue; | |
999 | } | |
1000 | // Test that we stop properly at the end of the string. | |
1001 | if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2, | |
1002 | stringLength+1-stringLength/2)) { | |
1003 | errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s); | |
1004 | } | |
1005 | trie.reset(); | |
1006 | } | |
1007 | } | |
1008 | ||
1009 | void UCharsTrieTest::checkIterator(UCharsTrie &trie, | |
1010 | const StringAndValue data[], int32_t dataLength) { | |
1011 | IcuTestErrorCode errorCode(*this, "checkIterator()"); | |
1012 | UCharsTrie::Iterator iter(trie, 0, errorCode); | |
1013 | if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) { | |
1014 | return; | |
1015 | } | |
1016 | checkIterator(iter, data, dataLength); | |
1017 | } | |
1018 | ||
1019 | void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter, | |
1020 | const StringAndValue data[], int32_t dataLength) { | |
1021 | IcuTestErrorCode errorCode(*this, "checkIterator()"); | |
1022 | for(int32_t i=0; i<dataLength; ++i) { | |
1023 | if(!iter.hasNext()) { | |
1024 | errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s); | |
1025 | break; | |
1026 | } | |
1027 | UBool hasNext=iter.next(errorCode); | |
1028 | if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) { | |
1029 | break; | |
1030 | } | |
1031 | if(!hasNext) { | |
1032 | errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s); | |
1033 | break; | |
1034 | } | |
1035 | UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); | |
1036 | if(iter.getString()!=expectedString) { | |
1037 | char buffer[1000]; | |
1038 | UnicodeString invString(prettify(iter.getString())); | |
1039 | invString.extract(0, invString.length(), buffer, UPRV_LENGTHOF(buffer), US_INV); | |
1040 | errln("trie iterator next().getString()=%s but expected %s for item %d", | |
1041 | buffer, data[i].s, (int)i); | |
1042 | } | |
1043 | if(iter.getValue()!=data[i].value) { | |
1044 | errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s", | |
1045 | (long)iter.getValue(), (long)iter.getValue(), | |
1046 | (long)data[i].value, (long)data[i].value, | |
1047 | (int)i, data[i].s); | |
1048 | } | |
1049 | } | |
1050 | if(iter.hasNext()) { | |
1051 | errln("trie iterator hasNext()=TRUE after all items"); | |
1052 | } | |
1053 | UBool hasNext=iter.next(errorCode); | |
1054 | errorCode.logIfFailureAndReset("trie iterator next() after all items"); | |
1055 | if(hasNext) { | |
1056 | errln("trie iterator next()=TRUE after all items"); | |
1057 | } | |
1058 | } |