]>
Commit | Line | Data |
---|---|---|
4388f060 A |
1 | /* |
2 | ******************************************************************************* | |
b331163b | 3 | * Copyright (C) 2010-2014, International Business Machines |
4388f060 A |
4 | * Corporation and others. All Rights Reserved. |
5 | ******************************************************************************* | |
6 | * file name: ucharstrietest.cpp | |
7 | * encoding: US-ASCII | |
8 | * tab size: 8 (not used) | |
9 | * indentation:4 | |
10 | * | |
11 | * created on: 2010nov16 | |
12 | * created by: Markus W. Scherer | |
13 | */ | |
14 | ||
15 | #include <string.h> | |
16 | ||
17 | #include "unicode/utypes.h" | |
18 | #include "unicode/appendable.h" | |
19 | #include "unicode/localpointer.h" | |
20 | #include "unicode/ucharstrie.h" | |
21 | #include "unicode/ucharstriebuilder.h" | |
22 | #include "unicode/uniset.h" | |
23 | #include "unicode/unistr.h" | |
24 | #include "intltest.h" | |
b331163b | 25 | #include "cmemory.h" |
4388f060 A |
26 | |
27 | struct StringAndValue { | |
28 | const char *s; | |
29 | int32_t value; | |
30 | }; | |
31 | ||
32 | class UCharsTrieTest : public IntlTest { | |
33 | public: | |
34 | UCharsTrieTest(); | |
35 | virtual ~UCharsTrieTest(); | |
36 | ||
37 | void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); | |
38 | void TestBuilder(); | |
39 | void TestEmpty(); | |
40 | void Test_a(); | |
41 | void Test_a_ab(); | |
42 | void TestShortestBranch(); | |
43 | void TestBranches(); | |
44 | void TestLongSequence(); | |
45 | void TestLongBranch(); | |
46 | void TestValuesForState(); | |
47 | void TestCompact(); | |
48 | void TestFirstForCodePoint(); | |
49 | void TestNextForCodePoint(); | |
50 | ||
51 | UCharsTrie *buildLargeTrie(int32_t numUniqueFirst); | |
52 | void TestLargeTrie(); | |
53 | ||
54 | UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption); | |
55 | void TestHasUniqueValue(); | |
56 | void TestGetNextUChars(); | |
57 | void TestIteratorFromBranch(); | |
58 | void TestIteratorFromLinearMatch(); | |
59 | void TestTruncatingIteratorFromRoot(); | |
60 | void TestTruncatingIteratorFromLinearMatchShort(); | |
61 | void TestTruncatingIteratorFromLinearMatchLong(); | |
62 | void TestIteratorFromUChars(); | |
63 | ||
64 | void checkData(const StringAndValue data[], int32_t dataLength); | |
65 | void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption); | |
66 | UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength, | |
67 | UStringTrieBuildOption buildOption); | |
68 | void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); | |
69 | void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); | |
70 | void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); | |
71 | void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); | |
72 | void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); | |
73 | void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength); | |
74 | ||
75 | private: | |
76 | UCharsTrieBuilder *builder_; | |
77 | }; | |
78 | ||
79 | extern IntlTest *createUCharsTrieTest() { | |
80 | return new UCharsTrieTest(); | |
81 | } | |
82 | ||
83 | UCharsTrieTest::UCharsTrieTest() : builder_(NULL) { | |
84 | IcuTestErrorCode errorCode(*this, "UCharsTrieTest()"); | |
85 | builder_=new UCharsTrieBuilder(errorCode); | |
86 | } | |
87 | ||
88 | UCharsTrieTest::~UCharsTrieTest() { | |
89 | delete builder_; | |
90 | } | |
91 | ||
92 | void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { | |
93 | if(exec) { | |
94 | logln("TestSuite UCharsTrieTest: "); | |
95 | } | |
96 | TESTCASE_AUTO_BEGIN; | |
97 | TESTCASE_AUTO(TestBuilder); | |
98 | TESTCASE_AUTO(TestEmpty); | |
99 | TESTCASE_AUTO(Test_a); | |
100 | TESTCASE_AUTO(Test_a_ab); | |
101 | TESTCASE_AUTO(TestShortestBranch); | |
102 | TESTCASE_AUTO(TestBranches); | |
103 | TESTCASE_AUTO(TestLongSequence); | |
104 | TESTCASE_AUTO(TestLongBranch); | |
105 | TESTCASE_AUTO(TestValuesForState); | |
106 | TESTCASE_AUTO(TestCompact); | |
107 | TESTCASE_AUTO(TestFirstForCodePoint); | |
108 | TESTCASE_AUTO(TestNextForCodePoint); | |
109 | TESTCASE_AUTO(TestLargeTrie); | |
110 | TESTCASE_AUTO(TestHasUniqueValue); | |
111 | TESTCASE_AUTO(TestGetNextUChars); | |
112 | TESTCASE_AUTO(TestIteratorFromBranch); | |
113 | TESTCASE_AUTO(TestIteratorFromLinearMatch); | |
114 | TESTCASE_AUTO(TestTruncatingIteratorFromRoot); | |
115 | TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort); | |
116 | TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong); | |
117 | TESTCASE_AUTO(TestIteratorFromUChars); | |
118 | TESTCASE_AUTO_END; | |
119 | } | |
120 | ||
121 | void UCharsTrieTest::TestBuilder() { | |
122 | IcuTestErrorCode errorCode(*this, "TestBuilder()"); | |
123 | delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode); | |
124 | if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) { | |
125 | errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR"); | |
126 | return; | |
127 | } | |
128 | // TODO: remove .build(...) once add() checks for duplicates. | |
129 | builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode); | |
130 | if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) { | |
131 | errln("UCharsTrieBuilder.add() did not detect duplicates"); | |
132 | return; | |
133 | } | |
134 | } | |
135 | ||
136 | void UCharsTrieTest::TestEmpty() { | |
137 | static const StringAndValue data[]={ | |
138 | { "", 0 } | |
139 | }; | |
b331163b | 140 | checkData(data, UPRV_LENGTHOF(data)); |
4388f060 A |
141 | } |
142 | ||
143 | void UCharsTrieTest::Test_a() { | |
144 | static const StringAndValue data[]={ | |
145 | { "a", 1 } | |
146 | }; | |
b331163b | 147 | checkData(data, UPRV_LENGTHOF(data)); |
4388f060 A |
148 | } |
149 | ||
150 | void UCharsTrieTest::Test_a_ab() { | |
151 | static const StringAndValue data[]={ | |
152 | { "a", 1 }, | |
153 | { "ab", 100 } | |
154 | }; | |
b331163b | 155 | checkData(data, UPRV_LENGTHOF(data)); |
4388f060 A |
156 | } |
157 | ||
158 | void UCharsTrieTest::TestShortestBranch() { | |
159 | static const StringAndValue data[]={ | |
160 | { "a", 1000 }, | |
161 | { "b", 2000 } | |
162 | }; | |
b331163b | 163 | checkData(data, UPRV_LENGTHOF(data)); |
4388f060 A |
164 | } |
165 | ||
166 | void UCharsTrieTest::TestBranches() { | |
167 | static const StringAndValue data[]={ | |
168 | { "a", 0x10 }, | |
169 | { "cc", 0x40 }, | |
170 | { "e", 0x100 }, | |
171 | { "ggg", 0x400 }, | |
172 | { "i", 0x1000 }, | |
173 | { "kkkk", 0x4000 }, | |
174 | { "n", 0x10000 }, | |
175 | { "ppppp", 0x40000 }, | |
176 | { "r", 0x100000 }, | |
177 | { "sss", 0x200000 }, | |
178 | { "t", 0x400000 }, | |
179 | { "uu", 0x800000 }, | |
180 | { "vv", 0x7fffffff }, | |
51004dcb | 181 | { "zz", (int32_t)0x80000000 } |
4388f060 | 182 | }; |
b331163b | 183 | for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) { |
57a6839d | 184 | logln("TestBranches length=%d", (int)length); |
4388f060 A |
185 | checkData(data, length); |
186 | } | |
187 | } | |
188 | ||
189 | void UCharsTrieTest::TestLongSequence() { | |
190 | static const StringAndValue data[]={ | |
191 | { "a", -1 }, | |
192 | // sequence of linear-match nodes | |
193 | { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 }, | |
194 | // more than 256 units | |
195 | { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
196 | "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
197 | "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
198 | "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
199 | "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
200 | "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 } | |
201 | }; | |
b331163b | 202 | checkData(data, UPRV_LENGTHOF(data)); |
4388f060 A |
203 | } |
204 | ||
205 | void UCharsTrieTest::TestLongBranch() { | |
206 | // Split-branch and interesting compact-integer values. | |
207 | static const StringAndValue data[]={ | |
208 | { "a", -2 }, | |
209 | { "b", -1 }, | |
210 | { "c", 0 }, | |
211 | { "d2", 1 }, | |
212 | { "f", 0x3f }, | |
213 | { "g", 0x40 }, | |
214 | { "h", 0x41 }, | |
215 | { "j23", 0x1900 }, | |
216 | { "j24", 0x19ff }, | |
217 | { "j25", 0x1a00 }, | |
218 | { "k2", 0x1a80 }, | |
219 | { "k3", 0x1aff }, | |
220 | { "l234567890", 0x1b00 }, | |
221 | { "l234567890123", 0x1b01 }, | |
222 | { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff }, | |
223 | { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 }, | |
224 | { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 }, | |
225 | { "r", 0x333333 }, | |
226 | { "s2345", 0x4444444 }, | |
227 | { "t234567890", 0x77777777 }, | |
51004dcb | 228 | { "z", (int32_t)0x80000001 } |
4388f060 | 229 | }; |
b331163b | 230 | checkData(data, UPRV_LENGTHOF(data)); |
4388f060 A |
231 | } |
232 | ||
233 | void UCharsTrieTest::TestValuesForState() { | |
234 | // Check that saveState() and resetToState() interact properly | |
235 | // with next() and current(). | |
236 | static const StringAndValue data[]={ | |
237 | { "a", -1 }, | |
238 | { "ab", -2 }, | |
239 | { "abc", -3 }, | |
240 | { "abcd", -4 }, | |
241 | { "abcde", -5 }, | |
242 | { "abcdef", -6 } | |
243 | }; | |
b331163b | 244 | checkData(data, UPRV_LENGTHOF(data)); |
4388f060 A |
245 | } |
246 | ||
247 | void UCharsTrieTest::TestCompact() { | |
248 | // Duplicate trailing strings and values provide opportunities for compacting. | |
249 | static const StringAndValue data[]={ | |
250 | { "+", 0 }, | |
251 | { "+august", 8 }, | |
252 | { "+december", 12 }, | |
253 | { "+july", 7 }, | |
254 | { "+june", 6 }, | |
255 | { "+november", 11 }, | |
256 | { "+october", 10 }, | |
257 | { "+september", 9 }, | |
258 | { "-", 0 }, | |
259 | { "-august", 8 }, | |
260 | { "-december", 12 }, | |
261 | { "-july", 7 }, | |
262 | { "-june", 6 }, | |
263 | { "-november", 11 }, | |
264 | { "-october", 10 }, | |
265 | { "-september", 9 }, | |
266 | // The l+n branch (with its sub-nodes) is a duplicate but will be written | |
267 | // both times because each time it follows a different linear-match node. | |
268 | { "xjuly", 7 }, | |
269 | { "xjune", 6 } | |
270 | }; | |
b331163b | 271 | checkData(data, UPRV_LENGTHOF(data)); |
4388f060 A |
272 | } |
273 | ||
274 | void UCharsTrieTest::TestFirstForCodePoint() { | |
275 | static const StringAndValue data[]={ | |
276 | { "a", 1 }, | |
277 | { "a\\ud800", 2 }, | |
278 | { "a\\U00010000", 3 }, | |
279 | { "\\ud840", 4 }, | |
280 | { "\\U00020000\\udbff", 5 }, | |
281 | { "\\U00020000\\U0010ffff", 6 }, | |
282 | { "\\U00020000\\U0010ffffz", 7 }, | |
283 | { "\\U00050000xy", 8 }, | |
284 | { "\\U00050000xyz", 9 } | |
285 | }; | |
b331163b | 286 | checkData(data, UPRV_LENGTHOF(data)); |
4388f060 A |
287 | } |
288 | ||
289 | void UCharsTrieTest::TestNextForCodePoint() { | |
290 | static const StringAndValue data[]={ | |
291 | { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 }, | |
292 | { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 }, | |
293 | { "\\u4dff\\U000103ff", 99999 } | |
294 | }; | |
b331163b | 295 | LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); |
4388f060 A |
296 | if(trie.isNull()) { |
297 | return; // buildTrie() reported an error | |
298 | } | |
299 | UStringTrieResult result; | |
300 | if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
301 | (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
302 | (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
303 | (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
304 | (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
305 | (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || | |
306 | trie->getValue()!=2000000000 | |
307 | ) { | |
308 | errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s); | |
309 | } | |
310 | if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
311 | (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
312 | (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
313 | (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || | |
314 | trie->getValue()!=44444 | |
315 | ) { | |
316 | errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s); | |
317 | } | |
318 | if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
319 | (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
320 | (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
321 | (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current() // no match for trail surrogate | |
322 | ) { | |
323 | errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222"); | |
324 | } | |
325 | if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || | |
326 | (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || | |
327 | trie->getValue()!=99999 | |
328 | ) { | |
329 | errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s); | |
330 | } | |
331 | } | |
332 | ||
333 | // Definitions in the anonymous namespace are invisible outside this file. | |
334 | namespace { | |
335 | ||
336 | // Generate (string, value) pairs. | |
337 | // The first string (before next()) will be empty. | |
338 | class Generator { | |
339 | public: | |
340 | Generator() : value(4711), num(0) {} | |
341 | void next() { | |
342 | UChar c; | |
343 | s.truncate(0); | |
344 | s.append(c=(UChar)(value>>16)); | |
345 | s.append((UChar)(value>>4)); | |
346 | if(value&1) { | |
347 | s.append((UChar)value); | |
348 | } | |
349 | set.add(c); | |
350 | value+=((value>>5)&0x7ff)*3+1; | |
351 | ++num; | |
352 | } | |
353 | const UnicodeString &getString() const { return s; } | |
354 | int32_t getValue() const { return value; } | |
355 | int32_t countUniqueFirstChars() const { return set.size(); } | |
356 | int32_t getIndex() const { return num; } | |
357 | ||
358 | private: | |
359 | UnicodeString s; | |
360 | UnicodeSet set; | |
361 | int32_t value; | |
362 | int32_t num; | |
363 | }; | |
364 | ||
365 | } // end namespace | |
366 | ||
367 | UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) { | |
368 | IcuTestErrorCode errorCode(*this, "buildLargeTrie()"); | |
369 | Generator gen; | |
370 | builder_->clear(); | |
371 | while(gen.countUniqueFirstChars()<numUniqueFirst) { | |
372 | builder_->add(gen.getString(), gen.getValue(), errorCode); | |
373 | gen.next(); | |
374 | } | |
57a6839d | 375 | logln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex()); |
4388f060 A |
376 | UnicodeString trieUChars; |
377 | builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); | |
378 | logln("serialized trie size: %ld UChars\n", (long)trieUChars.length()); | |
379 | return new UCharsTrie(trieUChars.getBuffer()); | |
380 | } | |
381 | ||
382 | // Exercise a large branch node. | |
383 | void UCharsTrieTest::TestLargeTrie() { | |
384 | LocalPointer<UCharsTrie> trie(buildLargeTrie(1111)); | |
385 | if(trie.isNull()) { | |
386 | return; // buildTrie() reported an error | |
387 | } | |
388 | Generator gen; | |
389 | while(gen.countUniqueFirstChars()<1111) { | |
390 | UnicodeString x(gen.getString()); | |
391 | int32_t value=gen.getValue(); | |
392 | if(!x.isEmpty()) { | |
393 | if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) { | |
394 | errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n", | |
395 | x[0], (long)gen.getIndex()); | |
396 | break; | |
397 | } | |
398 | x.remove(0, 1); | |
399 | } | |
400 | UStringTrieResult result=trie->next(x.getBuffer(), x.length()); | |
401 | if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) { | |
402 | errln("next(%d chars U+%04X U+%04X)!=hasValue or " | |
403 | "next()!=current() or getValue() wrong " | |
404 | "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex()); | |
405 | break; | |
406 | } | |
407 | gen.next(); | |
408 | } | |
409 | } | |
410 | ||
411 | enum { | |
412 | u_a=0x61, | |
413 | u_b=0x62, | |
414 | u_c=0x63, | |
415 | u_j=0x6a, | |
416 | u_n=0x6e, | |
417 | u_r=0x72, | |
418 | u_u=0x75, | |
419 | u_y=0x79 | |
420 | }; | |
421 | ||
422 | UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) { | |
423 | // All types of nodes leading to the same value, | |
424 | // for code coverage of recursive functions. | |
425 | // In particular, we need a lot of branches on some single level | |
426 | // to exercise a split-branch node. | |
427 | static const StringAndValue data[]={ | |
428 | { "august", 8 }, | |
429 | { "jan", 1 }, | |
430 | { "jan.", 1 }, | |
431 | { "jana", 1 }, | |
432 | { "janbb", 1 }, | |
433 | { "janc", 1 }, | |
434 | { "janddd", 1 }, | |
435 | { "janee", 1 }, | |
436 | { "janef", 1 }, | |
437 | { "janf", 1 }, | |
438 | { "jangg", 1 }, | |
439 | { "janh", 1 }, | |
440 | { "janiiii", 1 }, | |
441 | { "janj", 1 }, | |
442 | { "jankk", 1 }, | |
443 | { "jankl", 1 }, | |
444 | { "jankmm", 1 }, | |
445 | { "janl", 1 }, | |
446 | { "janm", 1 }, | |
447 | { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, | |
448 | { "jano", 1 }, | |
449 | { "janpp", 1 }, | |
450 | { "janqqq", 1 }, | |
451 | { "janr", 1 }, | |
452 | { "januar", 1 }, | |
453 | { "january", 1 }, | |
454 | { "july", 7 }, | |
455 | { "jun", 6 }, | |
456 | { "jun.", 6 }, | |
457 | { "june", 6 } | |
458 | }; | |
b331163b | 459 | return buildTrie(data, UPRV_LENGTHOF(data), buildOption); |
4388f060 A |
460 | } |
461 | ||
462 | void UCharsTrieTest::TestHasUniqueValue() { | |
463 | LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); | |
464 | if(trie.isNull()) { | |
465 | return; // buildTrie() reported an error | |
466 | } | |
467 | int32_t uniqueValue; | |
468 | if(trie->hasUniqueValue(uniqueValue)) { | |
469 | errln("unique value at root"); | |
470 | } | |
471 | trie->next(u_j); | |
472 | trie->next(u_a); | |
473 | trie->next(u_n); | |
474 | // hasUniqueValue() directly after next() | |
475 | if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) { | |
476 | errln("not unique value 1 after \"jan\""); | |
477 | } | |
478 | trie->first(u_j); | |
479 | trie->next(u_u); | |
480 | if(trie->hasUniqueValue(uniqueValue)) { | |
481 | errln("unique value after \"ju\""); | |
482 | } | |
483 | if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) { | |
484 | errln("not normal value 6 after \"jun\""); | |
485 | } | |
486 | // hasUniqueValue() after getValue() | |
487 | if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) { | |
488 | errln("not unique value 6 after \"jun\""); | |
489 | } | |
490 | // hasUniqueValue() from within a linear-match node | |
491 | trie->first(u_a); | |
492 | trie->next(u_u); | |
493 | if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) { | |
494 | errln("not unique value 8 after \"au\""); | |
495 | } | |
496 | } | |
497 | ||
498 | void UCharsTrieTest::TestGetNextUChars() { | |
499 | LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); | |
500 | if(trie.isNull()) { | |
501 | return; // buildTrie() reported an error | |
502 | } | |
503 | UnicodeString buffer; | |
504 | UnicodeStringAppendable app(buffer); | |
505 | int32_t count=trie->getNextUChars(app); | |
506 | if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) { | |
507 | errln("months getNextUChars()!=[aj] at root"); | |
508 | } | |
509 | trie->next(u_j); | |
510 | trie->next(u_a); | |
511 | trie->next(u_n); | |
512 | // getNextUChars() directly after next() | |
513 | buffer.remove(); | |
514 | count=trie->getNextUChars(app); | |
515 | if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) { | |
516 | errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\""); | |
517 | } | |
518 | // getNextUChars() after getValue() | |
519 | trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE. | |
520 | buffer.remove(); | |
521 | count=trie->getNextUChars(app); | |
522 | if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) { | |
523 | errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()"); | |
524 | } | |
525 | // getNextUChars() from a linear-match node | |
526 | trie->next(u_u); | |
527 | buffer.remove(); | |
528 | count=trie->getNextUChars(app); | |
529 | if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) { | |
530 | errln("months getNextUChars()!=[a] after \"janu\""); | |
531 | } | |
532 | trie->next(u_a); | |
533 | buffer.remove(); | |
534 | count=trie->getNextUChars(app); | |
535 | if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) { | |
536 | errln("months getNextUChars()!=[r] after \"janua\""); | |
537 | } | |
538 | trie->next(u_r); | |
539 | trie->next(u_y); | |
540 | // getNextUChars() after a final match | |
541 | buffer.remove(); | |
542 | count=trie->getNextUChars(app); | |
543 | if(count!=0 || buffer.length()!=0) { | |
544 | errln("months getNextUChars()!=[] after \"january\""); | |
545 | } | |
546 | } | |
547 | ||
548 | void UCharsTrieTest::TestIteratorFromBranch() { | |
549 | LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); | |
550 | if(trie.isNull()) { | |
551 | return; // buildTrie() reported an error | |
552 | } | |
553 | // Go to a branch node. | |
554 | trie->next(u_j); | |
555 | trie->next(u_a); | |
556 | trie->next(u_n); | |
557 | IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()"); | |
558 | UCharsTrie::Iterator iter(*trie, 0, errorCode); | |
559 | if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { | |
560 | return; | |
561 | } | |
562 | // Expected data: Same as in buildMonthsTrie(), except only the suffixes | |
563 | // following "jan". | |
564 | static const StringAndValue data[]={ | |
565 | { "", 1 }, | |
566 | { ".", 1 }, | |
567 | { "a", 1 }, | |
568 | { "bb", 1 }, | |
569 | { "c", 1 }, | |
570 | { "ddd", 1 }, | |
571 | { "ee", 1 }, | |
572 | { "ef", 1 }, | |
573 | { "f", 1 }, | |
574 | { "gg", 1 }, | |
575 | { "h", 1 }, | |
576 | { "iiii", 1 }, | |
577 | { "j", 1 }, | |
578 | { "kk", 1 }, | |
579 | { "kl", 1 }, | |
580 | { "kmm", 1 }, | |
581 | { "l", 1 }, | |
582 | { "m", 1 }, | |
583 | { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, | |
584 | { "o", 1 }, | |
585 | { "pp", 1 }, | |
586 | { "qqq", 1 }, | |
587 | { "r", 1 }, | |
588 | { "uar", 1 }, | |
589 | { "uary", 1 } | |
590 | }; | |
b331163b | 591 | checkIterator(iter, data, UPRV_LENGTHOF(data)); |
4388f060 A |
592 | // Reset, and we should get the same result. |
593 | logln("after iter.reset()"); | |
b331163b | 594 | checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); |
4388f060 A |
595 | } |
596 | ||
597 | void UCharsTrieTest::TestIteratorFromLinearMatch() { | |
598 | LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); | |
599 | if(trie.isNull()) { | |
600 | return; // buildTrie() reported an error | |
601 | } | |
602 | // Go into a linear-match node. | |
603 | trie->next(u_j); | |
604 | trie->next(u_a); | |
605 | trie->next(u_n); | |
606 | trie->next(u_u); | |
607 | trie->next(u_a); | |
608 | IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()"); | |
609 | UCharsTrie::Iterator iter(*trie, 0, errorCode); | |
610 | if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { | |
611 | return; | |
612 | } | |
613 | // Expected data: Same as in buildMonthsTrie(), except only the suffixes | |
614 | // following "janua". | |
615 | static const StringAndValue data[]={ | |
616 | { "r", 1 }, | |
617 | { "ry", 1 } | |
618 | }; | |
b331163b | 619 | checkIterator(iter, data, UPRV_LENGTHOF(data)); |
4388f060 A |
620 | // Reset, and we should get the same result. |
621 | logln("after iter.reset()"); | |
b331163b | 622 | checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); |
4388f060 A |
623 | } |
624 | ||
625 | void UCharsTrieTest::TestTruncatingIteratorFromRoot() { | |
626 | LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); | |
627 | if(trie.isNull()) { | |
628 | return; // buildTrie() reported an error | |
629 | } | |
630 | IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()"); | |
631 | UCharsTrie::Iterator iter(*trie, 4, errorCode); | |
632 | if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { | |
633 | return; | |
634 | } | |
635 | // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters | |
636 | // of each string, and no string duplicates from the truncation. | |
637 | static const StringAndValue data[]={ | |
638 | { "augu", -1 }, | |
639 | { "jan", 1 }, | |
640 | { "jan.", 1 }, | |
641 | { "jana", 1 }, | |
642 | { "janb", -1 }, | |
643 | { "janc", 1 }, | |
644 | { "jand", -1 }, | |
645 | { "jane", -1 }, | |
646 | { "janf", 1 }, | |
647 | { "jang", -1 }, | |
648 | { "janh", 1 }, | |
649 | { "jani", -1 }, | |
650 | { "janj", 1 }, | |
651 | { "jank", -1 }, | |
652 | { "janl", 1 }, | |
653 | { "janm", 1 }, | |
654 | { "jann", -1 }, | |
655 | { "jano", 1 }, | |
656 | { "janp", -1 }, | |
657 | { "janq", -1 }, | |
658 | { "janr", 1 }, | |
659 | { "janu", -1 }, | |
660 | { "july", 7 }, | |
661 | { "jun", 6 }, | |
662 | { "jun.", 6 }, | |
663 | { "june", 6 } | |
664 | }; | |
b331163b | 665 | checkIterator(iter, data, UPRV_LENGTHOF(data)); |
4388f060 A |
666 | // Reset, and we should get the same result. |
667 | logln("after iter.reset()"); | |
b331163b | 668 | checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); |
4388f060 A |
669 | } |
670 | ||
671 | void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() { | |
672 | static const StringAndValue data[]={ | |
673 | { "abcdef", 10 }, | |
674 | { "abcdepq", 200 }, | |
675 | { "abcdeyz", 3000 } | |
676 | }; | |
b331163b | 677 | LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); |
4388f060 A |
678 | if(trie.isNull()) { |
679 | return; // buildTrie() reported an error | |
680 | } | |
681 | // Go into a linear-match node. | |
682 | trie->next(u_a); | |
683 | trie->next(u_b); | |
684 | IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()"); | |
685 | // Truncate within the linear-match node. | |
686 | UCharsTrie::Iterator iter(*trie, 2, errorCode); | |
687 | if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { | |
688 | return; | |
689 | } | |
690 | static const StringAndValue expected[]={ | |
691 | { "cd", -1 } | |
692 | }; | |
b331163b | 693 | checkIterator(iter, expected, UPRV_LENGTHOF(expected)); |
4388f060 A |
694 | // Reset, and we should get the same result. |
695 | logln("after iter.reset()"); | |
b331163b | 696 | checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected)); |
4388f060 A |
697 | } |
698 | ||
699 | void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() { | |
700 | static const StringAndValue data[]={ | |
701 | { "abcdef", 10 }, | |
702 | { "abcdepq", 200 }, | |
703 | { "abcdeyz", 3000 } | |
704 | }; | |
b331163b | 705 | LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); |
4388f060 A |
706 | if(trie.isNull()) { |
707 | return; // buildTrie() reported an error | |
708 | } | |
709 | // Go into a linear-match node. | |
710 | trie->next(u_a); | |
711 | trie->next(u_b); | |
712 | trie->next(u_c); | |
713 | IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()"); | |
714 | // Truncate after the linear-match node. | |
715 | UCharsTrie::Iterator iter(*trie, 3, errorCode); | |
716 | if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { | |
717 | return; | |
718 | } | |
719 | static const StringAndValue expected[]={ | |
720 | { "def", 10 }, | |
721 | { "dep", -1 }, | |
722 | { "dey", -1 } | |
723 | }; | |
b331163b | 724 | checkIterator(iter, expected, UPRV_LENGTHOF(expected)); |
4388f060 A |
725 | // Reset, and we should get the same result. |
726 | logln("after iter.reset()"); | |
b331163b | 727 | checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected)); |
4388f060 A |
728 | } |
729 | ||
730 | void UCharsTrieTest::TestIteratorFromUChars() { | |
731 | static const StringAndValue data[]={ | |
732 | { "mm", 3 }, | |
733 | { "mmm", 33 }, | |
734 | { "mmnop", 333 } | |
735 | }; | |
736 | builder_->clear(); | |
737 | IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()"); | |
b331163b | 738 | for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) { |
4388f060 A |
739 | builder_->add(data[i].s, data[i].value, errorCode); |
740 | } | |
741 | UnicodeString trieUChars; | |
742 | builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); | |
743 | UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode); | |
b331163b | 744 | checkIterator(iter, data, UPRV_LENGTHOF(data)); |
4388f060 A |
745 | } |
746 | ||
747 | void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) { | |
748 | logln("checkData(dataLength=%d, fast)", (int)dataLength); | |
749 | checkData(data, dataLength, USTRINGTRIE_BUILD_FAST); | |
750 | logln("checkData(dataLength=%d, small)", (int)dataLength); | |
751 | checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL); | |
752 | } | |
753 | ||
754 | void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) { | |
755 | LocalPointer<UCharsTrie> trie(buildTrie(data, dataLength, buildOption)); | |
756 | if(trie.isNull()) { | |
757 | return; // buildTrie() reported an error | |
758 | } | |
759 | checkFirst(*trie, data, dataLength); | |
760 | checkNext(*trie, data, dataLength); | |
761 | checkNextWithState(*trie, data, dataLength); | |
762 | checkNextString(*trie, data, dataLength); | |
763 | checkIterator(*trie, data, dataLength); | |
764 | } | |
765 | ||
766 | UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength, | |
767 | UStringTrieBuildOption buildOption) { | |
768 | IcuTestErrorCode errorCode(*this, "buildTrie()"); | |
769 | // Add the items to the trie builder in an interesting (not trivial, not random) order. | |
770 | int32_t index, step; | |
771 | if(dataLength&1) { | |
772 | // Odd number of items. | |
773 | index=dataLength/2; | |
774 | step=2; | |
775 | } else if((dataLength%3)!=0) { | |
776 | // Not a multiple of 3. | |
777 | index=dataLength/5; | |
778 | step=3; | |
779 | } else { | |
780 | index=dataLength-1; | |
781 | step=-1; | |
782 | } | |
783 | builder_->clear(); | |
784 | for(int32_t i=0; i<dataLength; ++i) { | |
785 | builder_->add(UnicodeString(data[index].s, -1, US_INV).unescape(), | |
786 | data[index].value, errorCode); | |
787 | index=(index+step)%dataLength; | |
788 | } | |
789 | UnicodeString trieUChars; | |
790 | builder_->buildUnicodeString(buildOption, trieUChars, errorCode); | |
791 | LocalPointer<UCharsTrie> trie(builder_->build(buildOption, errorCode)); | |
792 | if(!errorCode.logIfFailureAndReset("add()/build()")) { | |
793 | builder_->add("zzz", 999, errorCode); | |
794 | if(errorCode.reset()!=U_NO_WRITE_PERMISSION) { | |
795 | errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION"); | |
796 | } | |
797 | } | |
798 | logln("serialized trie size: %ld UChars\n", (long)trieUChars.length()); | |
799 | UnicodeString trieUChars2; | |
800 | builder_->buildUnicodeString(buildOption, trieUChars2, errorCode); | |
801 | if(trieUChars.getBuffer()==trieUChars2.getBuffer()) { | |
802 | errln("builder.buildUnicodeString() before & after build() returned same array"); | |
803 | } | |
804 | if(errorCode.isFailure()) { | |
805 | return NULL; | |
806 | } | |
807 | // Tries from either build() method should be identical but | |
808 | // UCharsTrie does not implement equals(). | |
809 | // We just return either one. | |
810 | if((dataLength&1)!=0) { | |
811 | return trie.orphan(); | |
812 | } else { | |
813 | return new UCharsTrie(trieUChars2.getBuffer()); | |
814 | } | |
815 | } | |
816 | ||
817 | void UCharsTrieTest::checkFirst(UCharsTrie &trie, | |
818 | const StringAndValue data[], int32_t dataLength) { | |
819 | for(int32_t i=0; i<dataLength; ++i) { | |
820 | if(*data[i].s==0) { | |
821 | continue; // skip empty string | |
822 | } | |
823 | UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); | |
824 | UChar32 c=expectedString[0]; | |
825 | UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0; | |
826 | UStringTrieResult firstResult=trie.first(c); | |
827 | int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1; | |
828 | UStringTrieResult nextResult=trie.next(nextCp); | |
829 | if(firstResult!=trie.reset().next(c) || | |
830 | firstResult!=trie.current() || | |
831 | firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) || | |
832 | nextResult!=trie.next(nextCp) | |
833 | ) { | |
834 | errln("trie.first(U+%04X)!=trie.reset().next(same) for %s", | |
835 | c, data[i].s); | |
836 | } | |
837 | c=expectedString.char32At(0); | |
838 | int32_t cLength=U16_LENGTH(c); | |
839 | nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0; | |
840 | firstResult=trie.firstForCodePoint(c); | |
841 | firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1; | |
842 | nextResult=trie.nextForCodePoint(nextCp); | |
843 | if(firstResult!=trie.reset().nextForCodePoint(c) || | |
844 | firstResult!=trie.current() || | |
845 | firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) || | |
846 | nextResult!=trie.nextForCodePoint(nextCp) | |
847 | ) { | |
848 | errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s", | |
849 | c, data[i].s); | |
850 | } | |
851 | } | |
852 | trie.reset(); | |
853 | } | |
854 | ||
855 | void UCharsTrieTest::checkNext(UCharsTrie &trie, | |
856 | const StringAndValue data[], int32_t dataLength) { | |
857 | UCharsTrie::State state; | |
858 | for(int32_t i=0; i<dataLength; ++i) { | |
859 | UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); | |
860 | int32_t stringLength= (i&1) ? -1 : expectedString.length(); | |
861 | UStringTrieResult result; | |
862 | if( !USTRINGTRIE_HAS_VALUE( | |
863 | result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) || | |
864 | result!=trie.current() | |
865 | ) { | |
866 | errln("trie does not seem to contain %s", data[i].s); | |
867 | } else if(trie.getValue()!=data[i].value) { | |
868 | errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx", | |
869 | data[i].s, | |
870 | (long)trie.getValue(), (long)trie.getValue(), | |
871 | (long)data[i].value, (long)data[i].value); | |
872 | } else if(result!=trie.current() || trie.getValue()!=data[i].value) { | |
873 | errln("trie value for %s changes when repeating current()/getValue()", data[i].s); | |
874 | } | |
875 | trie.reset(); | |
876 | stringLength=expectedString.length(); | |
877 | result=trie.current(); | |
878 | for(int32_t j=0; j<stringLength; ++j) { | |
879 | if(!USTRINGTRIE_HAS_NEXT(result)) { | |
880 | errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j); | |
881 | break; | |
882 | } | |
883 | if(result==USTRINGTRIE_INTERMEDIATE_VALUE) { | |
884 | trie.getValue(); | |
885 | if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) { | |
886 | errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j); | |
887 | break; | |
888 | } | |
889 | } | |
890 | result=trie.next(expectedString[j]); | |
891 | if(!USTRINGTRIE_MATCHES(result)) { | |
892 | errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j); | |
893 | break; | |
894 | } | |
895 | if(result!=trie.current()) { | |
896 | errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j); | |
897 | break; | |
898 | } | |
899 | } | |
900 | if(!USTRINGTRIE_HAS_VALUE(result)) { | |
901 | errln("trie.next()!=hasValue at the end of %s", data[i].s); | |
902 | continue; | |
903 | } | |
904 | trie.getValue(); | |
905 | if(result!=trie.current()) { | |
906 | errln("trie.current() != current()+getValue()+current() after end of %s", | |
907 | data[i].s); | |
908 | } | |
909 | // Compare the final current() with whether next() can actually continue. | |
910 | trie.saveState(state); | |
911 | UBool nextContinues=FALSE; | |
912 | for(int32_t c=0x20; c<0xe000; ++c) { | |
913 | if(c==0x80) { | |
914 | c=0xd800; // Check for ASCII and surrogates but not all of the BMP. | |
915 | } | |
916 | if(trie.resetToState(state).next(c)) { | |
917 | nextContinues=TRUE; | |
918 | break; | |
919 | } | |
920 | } | |
921 | if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) { | |
922 | errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts " | |
923 | "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s); | |
924 | } | |
925 | trie.reset(); | |
926 | } | |
927 | } | |
928 | ||
929 | void UCharsTrieTest::checkNextWithState(UCharsTrie &trie, | |
930 | const StringAndValue data[], int32_t dataLength) { | |
931 | UCharsTrie::State noState, state; | |
932 | for(int32_t i=0; i<dataLength; ++i) { | |
933 | if((i&1)==0) { | |
934 | // This should have no effect. | |
935 | trie.resetToState(noState); | |
936 | } | |
937 | UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); | |
938 | int32_t stringLength=expectedString.length(); | |
939 | int32_t partialLength=stringLength/3; | |
940 | for(int32_t j=0; j<partialLength; ++j) { | |
941 | if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) { | |
942 | errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s); | |
943 | return; | |
944 | } | |
945 | } | |
946 | trie.saveState(state); | |
947 | UStringTrieResult resultAtState=trie.current(); | |
948 | UStringTrieResult result; | |
949 | int32_t valueAtState=-99; | |
950 | if(USTRINGTRIE_HAS_VALUE(resultAtState)) { | |
951 | valueAtState=trie.getValue(); | |
952 | } | |
953 | result=trie.next(0); // mismatch | |
954 | if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) { | |
955 | errln("trie.next(0) matched after part of %s", data[i].s); | |
956 | } | |
957 | if( resultAtState!=trie.resetToState(state).current() || | |
958 | (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue()) | |
959 | ) { | |
960 | errln("trie.next(part of %s) changes current()/getValue() after " | |
961 | "saveState/next(0)/resetToState", | |
962 | data[i].s); | |
963 | } else if(!USTRINGTRIE_HAS_VALUE( | |
964 | result=trie.next(expectedString.getTerminatedBuffer()+partialLength, | |
965 | stringLength-partialLength)) || | |
966 | result!=trie.current()) { | |
967 | errln("trie.next(rest of %s) does not seem to contain %s after " | |
968 | "saveState/next(0)/resetToState", | |
969 | data[i].s, data[i].s); | |
970 | } else if(!USTRINGTRIE_HAS_VALUE( | |
971 | result=trie.resetToState(state). | |
972 | next(expectedString.getTerminatedBuffer()+partialLength, | |
973 | stringLength-partialLength)) || | |
974 | result!=trie.current()) { | |
975 | errln("trie does not seem to contain %s after saveState/next(rest)/resetToState", | |
976 | data[i].s); | |
977 | } else if(trie.getValue()!=data[i].value) { | |
978 | errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx", | |
979 | data[i].s, | |
980 | (long)trie.getValue(), (long)trie.getValue(), | |
981 | (long)data[i].value, (long)data[i].value); | |
982 | } | |
983 | trie.reset(); | |
984 | } | |
985 | } | |
986 | ||
987 | // next(string) is also tested in other functions, | |
988 | // but here we try to go partway through the string, and then beyond it. | |
989 | void UCharsTrieTest::checkNextString(UCharsTrie &trie, | |
990 | const StringAndValue data[], int32_t dataLength) { | |
991 | for(int32_t i=0; i<dataLength; ++i) { | |
992 | UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); | |
993 | int32_t stringLength=expectedString.length(); | |
994 | if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) { | |
995 | errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s); | |
996 | continue; | |
997 | } | |
998 | // Test that we stop properly at the end of the string. | |
999 | if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2, | |
1000 | stringLength+1-stringLength/2)) { | |
1001 | errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s); | |
1002 | } | |
1003 | trie.reset(); | |
1004 | } | |
1005 | } | |
1006 | ||
1007 | void UCharsTrieTest::checkIterator(UCharsTrie &trie, | |
1008 | const StringAndValue data[], int32_t dataLength) { | |
1009 | IcuTestErrorCode errorCode(*this, "checkIterator()"); | |
1010 | UCharsTrie::Iterator iter(trie, 0, errorCode); | |
1011 | if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) { | |
1012 | return; | |
1013 | } | |
1014 | checkIterator(iter, data, dataLength); | |
1015 | } | |
1016 | ||
1017 | void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter, | |
1018 | const StringAndValue data[], int32_t dataLength) { | |
1019 | IcuTestErrorCode errorCode(*this, "checkIterator()"); | |
1020 | for(int32_t i=0; i<dataLength; ++i) { | |
1021 | if(!iter.hasNext()) { | |
1022 | errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s); | |
1023 | break; | |
1024 | } | |
1025 | UBool hasNext=iter.next(errorCode); | |
1026 | if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) { | |
1027 | break; | |
1028 | } | |
1029 | if(!hasNext) { | |
1030 | errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s); | |
1031 | break; | |
1032 | } | |
1033 | UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); | |
1034 | if(iter.getString()!=expectedString) { | |
1035 | char buffer[1000]; | |
1036 | UnicodeString invString(prettify(iter.getString())); | |
b331163b | 1037 | invString.extract(0, invString.length(), buffer, UPRV_LENGTHOF(buffer), US_INV); |
4388f060 A |
1038 | errln("trie iterator next().getString()=%s but expected %s for item %d", |
1039 | buffer, data[i].s, (int)i); | |
1040 | } | |
1041 | if(iter.getValue()!=data[i].value) { | |
1042 | errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s", | |
1043 | (long)iter.getValue(), (long)iter.getValue(), | |
1044 | (long)data[i].value, (long)data[i].value, | |
1045 | (int)i, data[i].s); | |
1046 | } | |
1047 | } | |
1048 | if(iter.hasNext()) { | |
1049 | errln("trie iterator hasNext()=TRUE after all items"); | |
1050 | } | |
1051 | UBool hasNext=iter.next(errorCode); | |
1052 | errorCode.logIfFailureAndReset("trie iterator next() after all items"); | |
1053 | if(hasNext) { | |
1054 | errln("trie iterator next()=TRUE after all items"); | |
1055 | } | |
1056 | } |