]>
Commit | Line | Data |
---|---|---|
3d1f044b A |
1 | // © 2017 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | ||
4 | // ucptrietest.c (modified from trie2test.c) | |
5 | // created: 2017dec29 Markus W. Scherer | |
6 | ||
7 | #include <stdio.h> | |
8 | #include "unicode/utypes.h" | |
9 | #include "unicode/ucptrie.h" | |
10 | #include "unicode/umutablecptrie.h" | |
11 | #include "unicode/utf.h" | |
12 | #include "unicode/utf16.h" | |
13 | #include "unicode/utf8.h" | |
14 | #include "uassert.h" | |
15 | #include "ucptrie_impl.h" | |
16 | #include "utrie.h" | |
17 | #include "cstring.h" | |
18 | #include "cmemory.h" | |
19 | #include "udataswp.h" | |
20 | #include "cintltst.h" | |
21 | ||
22 | void addUCPTrieTest(TestNode** root); | |
23 | ||
24 | /* Values for setting possibly overlapping, out-of-order ranges of values */ | |
25 | typedef struct SetRange { | |
26 | UChar32 start, limit; | |
27 | uint32_t value; | |
28 | } SetRange; | |
29 | ||
30 | /* | |
31 | * Values for testing: | |
32 | * value is set from the previous boundary's limit to before | |
33 | * this boundary's limit | |
34 | * | |
35 | * There must be an entry with limit 0 and the intialValue. | |
36 | * It may be preceded by an entry with negative limit and the errorValue. | |
37 | */ | |
38 | typedef struct CheckRange { | |
39 | UChar32 limit; | |
40 | uint32_t value; | |
41 | } CheckRange; | |
42 | ||
43 | static int32_t | |
44 | skipSpecialValues(const CheckRange checkRanges[], int32_t countCheckRanges) { | |
45 | int32_t i; | |
46 | for(i=0; i<countCheckRanges && checkRanges[i].limit<=0; ++i) {} | |
47 | return i; | |
48 | } | |
49 | ||
50 | static int32_t | |
51 | getSpecialValues(const CheckRange checkRanges[], int32_t countCheckRanges, | |
52 | uint32_t *pInitialValue, uint32_t *pErrorValue) { | |
53 | int32_t i=0; | |
54 | if(i<countCheckRanges && checkRanges[i].limit<0) { | |
55 | *pErrorValue=checkRanges[i++].value; | |
56 | } else { | |
57 | *pErrorValue=0xad; | |
58 | } | |
59 | if(i<countCheckRanges && checkRanges[i].limit==0) { | |
60 | *pInitialValue=checkRanges[i++].value; | |
61 | } else { | |
62 | *pInitialValue=0; | |
63 | } | |
64 | return i; | |
65 | } | |
66 | ||
67 | /* ucptrie_enum() callback, modifies a value */ | |
68 | static uint32_t U_CALLCONV | |
69 | testFilter(const void *context, uint32_t value) { | |
340931cb | 70 | (void)context; // suppress compiler warnings about unused variable |
3d1f044b A |
71 | return value ^ 0x5555; |
72 | } | |
73 | ||
74 | static UBool | |
75 | doCheckRange(const char *name, const char *variant, | |
76 | UChar32 start, UChar32 end, uint32_t value, | |
77 | UChar32 expEnd, uint32_t expValue) { | |
78 | if (end < 0) { | |
79 | if (expEnd >= 0) { | |
80 | log_err("error: %s getRanges (%s) fails to deliver range [U+%04lx..U+%04lx].0x%lx\n", | |
81 | name, variant, (long)start, (long)expEnd, (long)expValue); | |
82 | } | |
83 | return FALSE; | |
84 | } | |
85 | if (expEnd < 0) { | |
86 | log_err("error: %s getRanges (%s) delivers unexpected range [U+%04lx..U+%04lx].0x%lx\n", | |
87 | name, variant, (long)start, (long)end, (long)value); | |
88 | return FALSE; | |
89 | } | |
90 | if (end != expEnd || value != expValue) { | |
91 | log_err("error: %s getRanges (%s) delivers wrong range [U+%04lx..U+%04lx].0x%lx " | |
92 | "instead of [U+%04lx..U+%04lx].0x%lx\n", | |
93 | name, variant, (long)start, (long)end, (long)value, | |
94 | (long)start, (long)expEnd, (long)expValue); | |
95 | return FALSE; | |
96 | } | |
97 | return TRUE; | |
98 | } | |
99 | ||
100 | // Test iteration starting from various UTF-8/16 and trie structure boundaries. | |
101 | // Also test starting partway through lead & trail surrogates for fixed-surrogate-value options, | |
102 | // and partway through supplementary code points. | |
103 | static UChar32 iterStarts[] = { | |
104 | 0, 0x7f, 0x80, 0x7ff, 0x800, 0xfff, 0x1000, | |
105 | 0xd7ff, 0xd800, 0xd888, 0xdddd, 0xdfff, 0xe000, | |
106 | 0xffff, 0x10000, 0x12345, 0x10ffff, 0x110000 | |
107 | }; | |
108 | ||
109 | static void | |
110 | testTrieGetRanges(const char *testName, const UCPTrie *trie, const UMutableCPTrie *mutableTrie, | |
111 | UCPMapRangeOption option, uint32_t surrValue, | |
112 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
113 | const char *const typeName = trie == NULL ? "mutableTrie" : "trie"; | |
114 | const char *const optionName = option == UCPMAP_RANGE_NORMAL ? "normal" : | |
115 | option == UCPMAP_RANGE_FIXED_LEAD_SURROGATES ? "fixedLeadSurr" : "fixedAllSurr"; | |
116 | char name[80]; | |
117 | int32_t s; | |
118 | for (s = 0; s < UPRV_LENGTHOF(iterStarts); ++s) { | |
119 | UChar32 start = iterStarts[s]; | |
120 | int32_t i, i0; | |
121 | UChar32 end, expEnd; | |
122 | uint32_t value, expValue; | |
123 | // No need to go from each iteration start to the very end. | |
124 | int32_t innerLoopCount; | |
125 | ||
126 | sprintf(name, "%s/%s(%s) min=U+%04lx", typeName, optionName, testName, (long)start); | |
127 | ||
128 | // Skip over special values and low ranges. | |
129 | for (i = 0; i < countCheckRanges && checkRanges[i].limit <= start; ++i) {} | |
130 | i0 = i; | |
131 | // without value handler | |
132 | for (innerLoopCount = 0;; ++i, start = end + 1) { | |
133 | if (i < countCheckRanges) { | |
134 | expEnd = checkRanges[i].limit - 1; | |
135 | expValue = checkRanges[i].value; | |
136 | } else { | |
137 | expEnd = -1; | |
138 | expValue = value = 0x5005; | |
139 | } | |
140 | end = trie != NULL ? | |
141 | ucptrie_getRange(trie, start, option, surrValue, NULL, NULL, &value) : | |
142 | umutablecptrie_getRange(mutableTrie, start, option, surrValue, NULL, NULL, &value); | |
143 | if (!doCheckRange(name, "without value handler", start, end, value, expEnd, expValue)) { | |
144 | break; | |
145 | } | |
146 | if (s != 0 && ++innerLoopCount == 5) { break; } | |
147 | } | |
148 | // with value handler | |
149 | for (i = i0, start = iterStarts[s], innerLoopCount = 0;; ++i, start = end + 1) { | |
150 | if (i < countCheckRanges) { | |
151 | expEnd = checkRanges[i].limit - 1; | |
152 | expValue = checkRanges[i].value ^ 0x5555; | |
153 | } else { | |
154 | expEnd = -1; | |
155 | expValue = value = 0x5005; | |
156 | } | |
157 | end = trie != NULL ? | |
158 | ucptrie_getRange(trie, start, option, surrValue ^ 0x5555, testFilter, NULL, &value) : | |
159 | umutablecptrie_getRange(mutableTrie, start, option, surrValue ^ 0x5555, | |
160 | testFilter, NULL, &value); | |
161 | if (!doCheckRange(name, "with value handler", start, end, value, expEnd, expValue)) { | |
162 | break; | |
163 | } | |
164 | if (s != 0 && ++innerLoopCount == 5) { break; } | |
165 | } | |
166 | // without value | |
167 | for (i = i0, start = iterStarts[s], innerLoopCount = 0;; ++i, start = end + 1) { | |
168 | if (i < countCheckRanges) { | |
169 | expEnd = checkRanges[i].limit - 1; | |
170 | } else { | |
171 | expEnd = -1; | |
172 | } | |
173 | end = trie != NULL ? | |
174 | ucptrie_getRange(trie, start, option, surrValue, NULL, NULL, NULL) : | |
175 | umutablecptrie_getRange(mutableTrie, start, option, surrValue, NULL, NULL, NULL); | |
176 | if (!doCheckRange(name, "without value", start, end, 0, expEnd, 0)) { | |
177 | break; | |
178 | } | |
179 | if (s != 0 && ++innerLoopCount == 5) { break; } | |
180 | } | |
181 | } | |
182 | } | |
183 | ||
184 | static void | |
185 | testTrieGetters(const char *testName, const UCPTrie *trie, | |
186 | UCPTrieType type, UCPTrieValueWidth valueWidth, | |
187 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
188 | uint32_t initialValue, errorValue; | |
189 | uint32_t value, value2; | |
190 | UChar32 start, limit; | |
191 | int32_t i, countSpecials; | |
192 | int32_t countErrors=0; | |
193 | ||
194 | const char *const typeName = "trie"; | |
195 | ||
196 | countSpecials=getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue); | |
197 | ||
198 | start=0; | |
199 | for(i=countSpecials; i<countCheckRanges; ++i) { | |
200 | limit=checkRanges[i].limit; | |
201 | value=checkRanges[i].value; | |
202 | ||
203 | while(start<limit) { | |
204 | if (start <= 0x7f) { | |
205 | if (valueWidth == UCPTRIE_VALUE_BITS_16) { | |
206 | value2 = UCPTRIE_ASCII_GET(trie, UCPTRIE_16, start); | |
207 | } else if (valueWidth == UCPTRIE_VALUE_BITS_32) { | |
208 | value2 = UCPTRIE_ASCII_GET(trie, UCPTRIE_32, start); | |
209 | } else { | |
210 | value2 = UCPTRIE_ASCII_GET(trie, UCPTRIE_8, start); | |
211 | } | |
212 | if (value != value2) { | |
213 | log_err("error: %s(%s).fromASCII(U+%04lx)==0x%lx instead of 0x%lx\n", | |
214 | typeName, testName, (long)start, (long)value2, (long)value); | |
215 | ++countErrors; | |
216 | } | |
217 | } | |
218 | if (type == UCPTRIE_TYPE_FAST) { | |
219 | if(start<=0xffff) { | |
220 | if(valueWidth==UCPTRIE_VALUE_BITS_16) { | |
221 | value2=UCPTRIE_FAST_BMP_GET(trie, UCPTRIE_16, start); | |
222 | } else if(valueWidth==UCPTRIE_VALUE_BITS_32) { | |
223 | value2=UCPTRIE_FAST_BMP_GET(trie, UCPTRIE_32, start); | |
224 | } else { | |
225 | value2=UCPTRIE_FAST_BMP_GET(trie, UCPTRIE_8, start); | |
226 | } | |
227 | if(value!=value2) { | |
228 | log_err("error: %s(%s).fromBMP(U+%04lx)==0x%lx instead of 0x%lx\n", | |
229 | typeName, testName, (long)start, (long)value2, (long)value); | |
230 | ++countErrors; | |
231 | } | |
232 | } else { | |
233 | if(valueWidth==UCPTRIE_VALUE_BITS_16) { | |
234 | value2 = UCPTRIE_FAST_SUPP_GET(trie, UCPTRIE_16, start); | |
235 | } else if(valueWidth==UCPTRIE_VALUE_BITS_32) { | |
236 | value2 = UCPTRIE_FAST_SUPP_GET(trie, UCPTRIE_32, start); | |
237 | } else { | |
238 | value2 = UCPTRIE_FAST_SUPP_GET(trie, UCPTRIE_8, start); | |
239 | } | |
240 | if(value!=value2) { | |
241 | log_err("error: %s(%s).fromSupp(U+%04lx)==0x%lx instead of 0x%lx\n", | |
242 | typeName, testName, (long)start, (long)value2, (long)value); | |
243 | ++countErrors; | |
244 | } | |
245 | } | |
246 | if(valueWidth==UCPTRIE_VALUE_BITS_16) { | |
247 | value2 = UCPTRIE_FAST_GET(trie, UCPTRIE_16, start); | |
248 | } else if(valueWidth==UCPTRIE_VALUE_BITS_32) { | |
249 | value2 = UCPTRIE_FAST_GET(trie, UCPTRIE_32, start); | |
250 | } else { | |
251 | value2 = UCPTRIE_FAST_GET(trie, UCPTRIE_8, start); | |
252 | } | |
253 | } else { | |
254 | if(valueWidth==UCPTRIE_VALUE_BITS_16) { | |
255 | value2 = UCPTRIE_SMALL_GET(trie, UCPTRIE_16, start); | |
256 | } else if(valueWidth==UCPTRIE_VALUE_BITS_32) { | |
257 | value2 = UCPTRIE_SMALL_GET(trie, UCPTRIE_32, start); | |
258 | } else { | |
259 | value2 = UCPTRIE_SMALL_GET(trie, UCPTRIE_8, start); | |
260 | } | |
261 | } | |
262 | if(value!=value2) { | |
263 | log_err("error: %s(%s).get(U+%04lx)==0x%lx instead of 0x%lx\n", | |
264 | typeName, testName, (long)start, (long)value2, (long)value); | |
265 | ++countErrors; | |
266 | } | |
267 | value2=ucptrie_get(trie, start); | |
268 | if(value!=value2) { | |
269 | log_err("error: %s(%s).get(U+%04lx)==0x%lx instead of 0x%lx\n", | |
270 | typeName, testName, (long)start, (long)value2, (long)value); | |
271 | ++countErrors; | |
272 | } | |
273 | ++start; | |
274 | if(countErrors>10) { | |
275 | return; | |
276 | } | |
277 | } | |
278 | } | |
279 | ||
280 | /* test linear ASCII range from the data array pointer (access to "internal" field) */ | |
281 | start=0; | |
282 | for(i=countSpecials; i<countCheckRanges && start<=0x7f; ++i) { | |
283 | limit=checkRanges[i].limit; | |
284 | value=checkRanges[i].value; | |
285 | ||
286 | while(start<limit && start<=0x7f) { | |
287 | if(valueWidth==UCPTRIE_VALUE_BITS_16) { | |
288 | value2=trie->data.ptr16[start]; | |
289 | } else if(valueWidth==UCPTRIE_VALUE_BITS_32) { | |
290 | value2=trie->data.ptr32[start]; | |
291 | } else { | |
292 | value2=trie->data.ptr8[start]; | |
293 | } | |
294 | if(value!=value2) { | |
295 | log_err("error: %s(%s).asciiData[U+%04lx]==0x%lx instead of 0x%lx\n", | |
296 | typeName, testName, (long)start, (long)value2, (long)value); | |
297 | ++countErrors; | |
298 | } | |
299 | ++start; | |
300 | if(countErrors>10) { | |
301 | return; | |
302 | } | |
303 | } | |
304 | } | |
305 | ||
306 | /* test errorValue */ | |
307 | if (type == UCPTRIE_TYPE_FAST) { | |
308 | if(valueWidth==UCPTRIE_VALUE_BITS_16) { | |
309 | value = UCPTRIE_FAST_GET(trie, UCPTRIE_16, -1); | |
310 | value2 = UCPTRIE_FAST_GET(trie, UCPTRIE_16, 0x110000); | |
311 | } else if(valueWidth==UCPTRIE_VALUE_BITS_32) { | |
312 | value = UCPTRIE_FAST_GET(trie, UCPTRIE_32, -1); | |
313 | value2 = UCPTRIE_FAST_GET(trie, UCPTRIE_32, 0x110000); | |
314 | } else { | |
315 | value = UCPTRIE_FAST_GET(trie, UCPTRIE_8, -1); | |
316 | value2 = UCPTRIE_FAST_GET(trie, UCPTRIE_8, 0x110000); | |
317 | } | |
318 | } else { | |
319 | if(valueWidth==UCPTRIE_VALUE_BITS_16) { | |
320 | value = UCPTRIE_SMALL_GET(trie, UCPTRIE_16, -1); | |
321 | value2 = UCPTRIE_SMALL_GET(trie, UCPTRIE_16, 0x110000); | |
322 | } else if(valueWidth==UCPTRIE_VALUE_BITS_32) { | |
323 | value = UCPTRIE_SMALL_GET(trie, UCPTRIE_32, -1); | |
324 | value2 = UCPTRIE_SMALL_GET(trie, UCPTRIE_32, 0x110000); | |
325 | } else { | |
326 | value = UCPTRIE_SMALL_GET(trie, UCPTRIE_8, -1); | |
327 | value2 = UCPTRIE_SMALL_GET(trie, UCPTRIE_8, 0x110000); | |
328 | } | |
329 | } | |
330 | if(value!=errorValue || value2!=errorValue) { | |
331 | log_err("error: %s(%s).get(out of range) != errorValue\n", | |
332 | typeName, testName); | |
333 | } | |
334 | value=ucptrie_get(trie, -1); | |
335 | value2=ucptrie_get(trie, 0x110000); | |
336 | if(value!=errorValue || value2!=errorValue) { | |
337 | log_err("error: %s(%s).get(out of range) != errorValue\n", | |
338 | typeName, testName); | |
339 | } | |
340 | } | |
341 | ||
342 | static void | |
343 | testBuilderGetters(const char *testName, const UMutableCPTrie *mutableTrie, | |
344 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
345 | uint32_t initialValue, errorValue; | |
346 | uint32_t value, value2; | |
347 | UChar32 start, limit; | |
348 | int32_t i, countSpecials; | |
349 | int32_t countErrors=0; | |
350 | ||
351 | const char *const typeName = "mutableTrie"; | |
352 | ||
353 | countSpecials=getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue); | |
354 | ||
355 | start=0; | |
356 | for(i=countSpecials; i<countCheckRanges; ++i) { | |
357 | limit=checkRanges[i].limit; | |
358 | value=checkRanges[i].value; | |
359 | ||
360 | while(start<limit) { | |
361 | value2=umutablecptrie_get(mutableTrie, start); | |
362 | if(value!=value2) { | |
363 | log_err("error: %s(%s).get(U+%04lx)==0x%lx instead of 0x%lx\n", | |
364 | typeName, testName, (long)start, (long)value2, (long)value); | |
365 | ++countErrors; | |
366 | } | |
367 | ++start; | |
368 | if(countErrors>10) { | |
369 | return; | |
370 | } | |
371 | } | |
372 | } | |
373 | ||
374 | /* test errorValue */ | |
375 | value=umutablecptrie_get(mutableTrie, -1); | |
376 | value2=umutablecptrie_get(mutableTrie, 0x110000); | |
377 | if(value!=errorValue || value2!=errorValue) { | |
378 | log_err("error: %s(%s).get(out of range) != errorValue\n", | |
379 | typeName, testName); | |
380 | } | |
381 | } | |
382 | ||
383 | #define ACCIDENTAL_SURROGATE_PAIR(s, length, cp) (length > 0 && U16_IS_LEAD(s[length-1]) && U_IS_TRAIL(cp)) | |
384 | ||
385 | static void | |
386 | testTrieUTF16(const char *testName, | |
387 | const UCPTrie *trie, UCPTrieValueWidth valueWidth, | |
388 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
389 | UChar s[30000]; | |
390 | uint32_t values[16000]; | |
391 | ||
392 | const UChar *p, *limit; | |
393 | ||
394 | uint32_t errorValue = ucptrie_get(trie, -1); | |
395 | uint32_t value, expected; | |
396 | UChar32 prevCP, c, c2; | |
397 | int32_t i, length, sIndex, countValues; | |
398 | ||
399 | /* write a string */ | |
400 | prevCP=0; | |
401 | length=countValues=0; | |
402 | for(i=skipSpecialValues(checkRanges, countCheckRanges); i<countCheckRanges; ++i) { | |
403 | value=checkRanges[i].value; | |
404 | /* write three code points */ | |
405 | if(!ACCIDENTAL_SURROGATE_PAIR(s, length, prevCP)) { | |
406 | U16_APPEND_UNSAFE(s, length, prevCP); /* start of the range */ | |
407 | values[countValues++]=value; | |
408 | } | |
409 | U_ASSERT(length < UPRV_LENGTHOF(s) && countValues < UPRV_LENGTHOF(values)); | |
410 | c=checkRanges[i].limit; | |
411 | prevCP=(prevCP+c)/2; /* middle of the range */ | |
412 | if(!ACCIDENTAL_SURROGATE_PAIR(s, length, prevCP)) { | |
413 | U16_APPEND_UNSAFE(s, length, prevCP); | |
414 | values[countValues++]=value; | |
415 | } | |
416 | prevCP=c; | |
417 | --c; /* end of the range */ | |
418 | if(!ACCIDENTAL_SURROGATE_PAIR(s, length, c)) { | |
419 | U16_APPEND_UNSAFE(s, length, c); | |
420 | values[countValues++]=value; | |
421 | } | |
422 | } | |
423 | limit=s+length; | |
424 | if(length>UPRV_LENGTHOF(s)) { | |
425 | log_err("UTF-16 test string length %d > capacity %d\n", (int)length, (int)UPRV_LENGTHOF(s)); | |
426 | return; | |
427 | } | |
428 | if(countValues>UPRV_LENGTHOF(values)) { | |
429 | log_err("UTF-16 test values length %d > capacity %d\n", (int)countValues, (int)UPRV_LENGTHOF(values)); | |
430 | return; | |
431 | } | |
432 | ||
433 | /* try forward */ | |
434 | p=s; | |
435 | i=0; | |
436 | while(p<limit) { | |
437 | sIndex=(int32_t)(p-s); | |
438 | U16_NEXT(s, sIndex, length, c2); | |
439 | c=0x33; | |
440 | if(valueWidth==UCPTRIE_VALUE_BITS_16) { | |
441 | UCPTRIE_FAST_U16_NEXT(trie, UCPTRIE_16, p, limit, c, value); | |
442 | } else if(valueWidth==UCPTRIE_VALUE_BITS_32) { | |
443 | UCPTRIE_FAST_U16_NEXT(trie, UCPTRIE_32, p, limit, c, value); | |
444 | } else { | |
445 | UCPTRIE_FAST_U16_NEXT(trie, UCPTRIE_8, p, limit, c, value); | |
446 | } | |
447 | expected = U_IS_SURROGATE(c) ? errorValue : values[i]; | |
448 | if(value!=expected) { | |
449 | log_err("error: wrong value from UCPTRIE_NEXT(%s)(U+%04lx): 0x%lx instead of 0x%lx\n", | |
450 | testName, (long)c, (long)value, (long)expected); | |
451 | } | |
452 | if(c!=c2) { | |
453 | log_err("error: wrong code point from UCPTRIE_NEXT(%s): U+%04lx != U+%04lx\n", | |
454 | testName, (long)c, (long)c2); | |
455 | continue; | |
456 | } | |
457 | ++i; | |
458 | } | |
459 | ||
460 | /* try backward */ | |
461 | p=limit; | |
462 | i=countValues; | |
463 | while(s<p) { | |
464 | --i; | |
465 | sIndex=(int32_t)(p-s); | |
466 | U16_PREV(s, 0, sIndex, c2); | |
467 | c=0x33; | |
468 | if(valueWidth==UCPTRIE_VALUE_BITS_16) { | |
469 | UCPTRIE_FAST_U16_PREV(trie, UCPTRIE_16, s, p, c, value); | |
470 | } else if(valueWidth==UCPTRIE_VALUE_BITS_32) { | |
471 | UCPTRIE_FAST_U16_PREV(trie, UCPTRIE_32, s, p, c, value); | |
472 | } else { | |
473 | UCPTRIE_FAST_U16_PREV(trie, UCPTRIE_8, s, p, c, value); | |
474 | } | |
475 | expected = U_IS_SURROGATE(c) ? errorValue : values[i]; | |
476 | if(value!=expected) { | |
477 | log_err("error: wrong value from UCPTRIE_PREV(%s)(U+%04lx): 0x%lx instead of 0x%lx\n", | |
478 | testName, (long)c, (long)value, (long)expected); | |
479 | } | |
480 | if(c!=c2) { | |
481 | log_err("error: wrong code point from UCPTRIE_PREV(%s): U+%04lx != U+%04lx\n", | |
482 | testName, c, c2); | |
483 | } | |
484 | } | |
485 | } | |
486 | ||
487 | static void | |
488 | testTrieUTF8(const char *testName, | |
489 | const UCPTrie *trie, UCPTrieValueWidth valueWidth, | |
490 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
491 | // Note: The byte sequence comments refer to the original UTF-8 definition. | |
492 | // Starting with ICU 60, any sequence that is not a prefix of a valid one | |
493 | // is treated as multiple single-byte errors. | |
494 | // For testing, we only rely on U8_... and UCPTrie UTF-8 macros | |
495 | // iterating consistently. | |
496 | static const uint8_t illegal[]={ | |
497 | 0xc0, 0x80, /* non-shortest U+0000 */ | |
498 | 0xc1, 0xbf, /* non-shortest U+007f */ | |
499 | 0xc2, /* truncated */ | |
500 | 0xe0, 0x90, 0x80, /* non-shortest U+0400 */ | |
501 | 0xe0, 0xa0, /* truncated */ | |
502 | 0xed, 0xa0, 0x80, /* lead surrogate U+d800 */ | |
503 | 0xed, 0xbf, 0xbf, /* trail surrogate U+dfff */ | |
504 | 0xf0, 0x8f, 0xbf, 0xbf, /* non-shortest U+ffff */ | |
505 | 0xf0, 0x90, 0x80, /* truncated */ | |
506 | 0xf4, 0x90, 0x80, 0x80, /* beyond-Unicode U+110000 */ | |
507 | 0xf8, 0x80, 0x80, 0x80, /* truncated */ | |
508 | 0xf8, 0x80, 0x80, 0x80, 0x80, /* 5-byte UTF-8 */ | |
509 | 0xfd, 0xbf, 0xbf, 0xbf, 0xbf, /* truncated */ | |
510 | 0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, /* 6-byte UTF-8 */ | |
511 | 0xfe, | |
512 | 0xff | |
513 | }; | |
514 | uint8_t s[60000]; | |
515 | uint32_t values[16000]; | |
516 | ||
517 | const uint8_t *p, *limit; | |
518 | ||
519 | uint32_t initialValue, errorValue; | |
520 | uint32_t value, expectedBytes, actualBytes; | |
521 | UChar32 prevCP, c; | |
522 | int32_t i, countSpecials, length, countValues; | |
523 | int32_t prev8, i8; | |
524 | ||
525 | countSpecials=getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue); | |
526 | ||
527 | /* write a string */ | |
528 | prevCP=0; | |
529 | length=countValues=0; | |
530 | /* first a couple of trail bytes in lead position */ | |
531 | s[length++]=0x80; | |
532 | values[countValues++]=errorValue; | |
533 | s[length++]=0xbf; | |
534 | values[countValues++]=errorValue; | |
535 | prev8=i8=0; | |
536 | for(i=countSpecials; i<countCheckRanges; ++i) { | |
537 | value=checkRanges[i].value; | |
538 | /* write three legal (or surrogate) code points */ | |
539 | U8_APPEND_UNSAFE(s, length, prevCP); /* start of the range */ | |
540 | if(U_IS_SURROGATE(prevCP)) { | |
541 | // A surrogate byte sequence counts as 3 single-byte errors. | |
542 | values[countValues++]=errorValue; | |
543 | values[countValues++]=errorValue; | |
544 | values[countValues++]=errorValue; | |
545 | } else { | |
546 | values[countValues++]=value; | |
547 | } | |
548 | U_ASSERT(length < UPRV_LENGTHOF(s) && countValues < UPRV_LENGTHOF(values)); | |
549 | c=checkRanges[i].limit; | |
550 | prevCP=(prevCP+c)/2; /* middle of the range */ | |
551 | U8_APPEND_UNSAFE(s, length, prevCP); | |
552 | if(U_IS_SURROGATE(prevCP)) { | |
553 | // A surrogate byte sequence counts as 3 single-byte errors. | |
554 | values[countValues++]=errorValue; | |
555 | values[countValues++]=errorValue; | |
556 | values[countValues++]=errorValue; | |
557 | } else { | |
558 | values[countValues++]=value; | |
559 | } | |
560 | prevCP=c; | |
561 | --c; /* end of the range */ | |
562 | U8_APPEND_UNSAFE(s, length, c); | |
563 | if(U_IS_SURROGATE(c)) { | |
564 | // A surrogate byte sequence counts as 3 single-byte errors. | |
565 | values[countValues++]=errorValue; | |
566 | values[countValues++]=errorValue; | |
567 | values[countValues++]=errorValue; | |
568 | } else { | |
569 | values[countValues++]=value; | |
570 | } | |
571 | /* write an illegal byte sequence */ | |
340931cb | 572 | if(i8<(int32_t)sizeof(illegal)) { |
3d1f044b A |
573 | U8_FWD_1(illegal, i8, sizeof(illegal)); |
574 | while(prev8<i8) { | |
575 | s[length++]=illegal[prev8++]; | |
576 | } | |
577 | values[countValues++]=errorValue; | |
578 | } | |
579 | } | |
580 | /* write the remaining illegal byte sequences */ | |
340931cb | 581 | while(i8<(int32_t)sizeof(illegal)) { |
3d1f044b A |
582 | U8_FWD_1(illegal, i8, sizeof(illegal)); |
583 | while(prev8<i8) { | |
584 | s[length++]=illegal[prev8++]; | |
585 | } | |
586 | values[countValues++]=errorValue; | |
587 | } | |
588 | limit=s+length; | |
589 | if(length>UPRV_LENGTHOF(s)) { | |
590 | log_err("UTF-8 test string length %d > capacity %d\n", (int)length, (int)UPRV_LENGTHOF(s)); | |
591 | return; | |
592 | } | |
593 | if(countValues>UPRV_LENGTHOF(values)) { | |
594 | log_err("UTF-8 test values length %d > capacity %d\n", (int)countValues, (int)UPRV_LENGTHOF(values)); | |
595 | return; | |
596 | } | |
597 | ||
598 | /* try forward */ | |
599 | p=s; | |
600 | i=0; | |
601 | while(p<limit) { | |
602 | prev8=i8=(int32_t)(p-s); | |
603 | U8_NEXT(s, i8, length, c); | |
604 | if(valueWidth==UCPTRIE_VALUE_BITS_16) { | |
605 | UCPTRIE_FAST_U8_NEXT(trie, UCPTRIE_16, p, limit, value); | |
606 | } else if(valueWidth==UCPTRIE_VALUE_BITS_32) { | |
607 | UCPTRIE_FAST_U8_NEXT(trie, UCPTRIE_32, p, limit, value); | |
608 | } else { | |
609 | UCPTRIE_FAST_U8_NEXT(trie, UCPTRIE_8, p, limit, value); | |
610 | } | |
611 | expectedBytes=0; | |
612 | if(value!=values[i] || i8!=(p-s)) { | |
613 | int32_t k=prev8; | |
614 | while(k<i8) { | |
615 | expectedBytes=(expectedBytes<<8)|s[k++]; | |
616 | } | |
617 | } | |
618 | if(i8==(p-s)) { | |
619 | actualBytes=expectedBytes; | |
620 | } else { | |
621 | actualBytes=0; | |
622 | int32_t k=prev8; | |
623 | while(k<(p-s)) { | |
624 | actualBytes=(actualBytes<<8)|s[k++]; | |
625 | } | |
626 | } | |
627 | if(value!=values[i]) { | |
628 | log_err("error: wrong value from UCPTRIE_FAST_U8_NEXT(%s)(from %d %lx->U+%04lx) (read %d bytes): " | |
629 | "0x%lx instead of 0x%lx (from bytes %lx)\n", | |
630 | testName, (int)prev8, (unsigned long)actualBytes, (long)c, (int)((p-s)-prev8), | |
631 | (long)value, (long)values[i], (unsigned long)expectedBytes); | |
632 | } | |
633 | if(i8!=(p-s)) { | |
634 | log_err("error: wrong end index from UCPTRIE_FAST_U8_NEXT(%s)(from %d %lx->U+%04lx): " | |
635 | "%ld != %ld (bytes %lx)\n", | |
636 | testName, (int)prev8, (unsigned long)actualBytes, (long)c, | |
637 | (long)(p-s), (long)i8, (unsigned long)expectedBytes); | |
638 | break; | |
639 | } | |
640 | ++i; | |
641 | } | |
642 | ||
643 | /* try backward */ | |
644 | p=limit; | |
645 | i=countValues; | |
646 | while(s<p) { | |
647 | --i; | |
648 | prev8=i8=(int32_t)(p-s); | |
649 | U8_PREV(s, 0, i8, c); | |
650 | if(valueWidth==UCPTRIE_VALUE_BITS_16) { | |
651 | UCPTRIE_FAST_U8_PREV(trie, UCPTRIE_16, s, p, value); | |
652 | } else if(valueWidth==UCPTRIE_VALUE_BITS_32) { | |
653 | UCPTRIE_FAST_U8_PREV(trie, UCPTRIE_32, s, p, value); | |
654 | } else { | |
655 | UCPTRIE_FAST_U8_PREV(trie, UCPTRIE_8, s, p, value); | |
656 | } | |
657 | expectedBytes=0; | |
658 | if(value!=values[i] || i8!=(p-s)) { | |
659 | int32_t k=i8; | |
660 | while(k<prev8) { | |
661 | expectedBytes=(expectedBytes<<8)|s[k++]; | |
662 | } | |
663 | } | |
664 | if(i8==(p-s)) { | |
665 | actualBytes=expectedBytes; | |
666 | } else { | |
667 | actualBytes=0; | |
668 | int32_t k=(int32_t)(p-s); | |
669 | while(k<prev8) { | |
670 | actualBytes=(actualBytes<<8)|s[k++]; | |
671 | } | |
672 | } | |
673 | if(value!=values[i]) { | |
674 | log_err("error: wrong value from UCPTRIE_FAST_U8_PREV(%s)(from %d %lx->U+%04lx) (read %d bytes): " | |
675 | "0x%lx instead of 0x%lx (from bytes %lx)\n", | |
676 | testName, (int)prev8, (unsigned long)actualBytes, (long)c, (int)(prev8-(p-s)), | |
677 | (long)value, (long)values[i], (unsigned long)expectedBytes); | |
678 | } | |
679 | if(i8!=(p-s)) { | |
680 | log_err("error: wrong end index from UCPTRIE_FAST_U8_PREV(%s)(from %d %lx->U+%04lx): " | |
681 | "%ld != %ld (bytes %lx)\n", | |
682 | testName, (int)prev8, (unsigned long)actualBytes, (long)c, | |
683 | (long)(p-s), (long)i8, (unsigned long)expectedBytes); | |
684 | break; | |
685 | } | |
686 | } | |
687 | } | |
688 | ||
689 | static void | |
690 | testTrie(const char *testName, const UCPTrie *trie, | |
691 | UCPTrieType type, UCPTrieValueWidth valueWidth, | |
692 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
693 | testTrieGetters(testName, trie, type, valueWidth, checkRanges, countCheckRanges); | |
694 | testTrieGetRanges(testName, trie, NULL, UCPMAP_RANGE_NORMAL, 0, checkRanges, countCheckRanges); | |
695 | if (type == UCPTRIE_TYPE_FAST) { | |
696 | testTrieUTF16(testName, trie, valueWidth, checkRanges, countCheckRanges); | |
697 | testTrieUTF8(testName, trie, valueWidth, checkRanges, countCheckRanges); | |
698 | } | |
699 | } | |
700 | ||
701 | static void | |
702 | testBuilder(const char *testName, const UMutableCPTrie *mutableTrie, | |
703 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
704 | testBuilderGetters(testName, mutableTrie, checkRanges, countCheckRanges); | |
705 | testTrieGetRanges(testName, NULL, mutableTrie, UCPMAP_RANGE_NORMAL, 0, checkRanges, countCheckRanges); | |
706 | } | |
707 | ||
708 | static uint32_t storage[120000]; | |
709 | static uint32_t swapped[120000]; | |
710 | ||
711 | static void | |
712 | testTrieSerialize(const char *testName, UMutableCPTrie *mutableTrie, | |
713 | UCPTrieType type, UCPTrieValueWidth valueWidth, UBool withSwap, | |
714 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
715 | UCPTrie *trie; | |
716 | int32_t length1, length2, length3; | |
717 | UErrorCode errorCode; | |
718 | ||
719 | /* clone the trie so that the caller can reuse the original */ | |
720 | errorCode=U_ZERO_ERROR; | |
721 | mutableTrie = umutablecptrie_clone(mutableTrie, &errorCode); | |
722 | if(U_FAILURE(errorCode)) { | |
723 | log_err("error: umutablecptrie_clone(%s) failed - %s\n", | |
724 | testName, u_errorName(errorCode)); | |
725 | return; | |
726 | } | |
727 | ||
728 | /* | |
729 | * This is not a loop, but simply a block that we can exit with "break" | |
730 | * when something goes wrong. | |
731 | */ | |
732 | do { | |
733 | errorCode=U_ZERO_ERROR; | |
734 | trie = umutablecptrie_buildImmutable(mutableTrie, type, valueWidth, &errorCode); | |
735 | if (U_FAILURE(errorCode)) { | |
736 | log_err("error: umutablecptrie_buildImmutable(%s) failed: %s\n", | |
737 | testName, u_errorName(errorCode)); | |
738 | break; | |
739 | } | |
740 | errorCode=U_ZERO_ERROR; | |
741 | length1=ucptrie_toBinary(trie, NULL, 0, &errorCode); | |
742 | if(errorCode!=U_BUFFER_OVERFLOW_ERROR) { | |
743 | log_err("error: ucptrie_toBinary(%s) preflighting set %s != U_BUFFER_OVERFLOW_ERROR\n", | |
744 | testName, u_errorName(errorCode)); | |
745 | break; | |
746 | } | |
747 | errorCode=U_ZERO_ERROR; | |
748 | length2=ucptrie_toBinary(trie, storage, sizeof(storage), &errorCode); | |
749 | if(errorCode==U_BUFFER_OVERFLOW_ERROR) { | |
750 | log_err("error: ucptrie_toBinary(%s) needs more memory\n", testName); | |
751 | break; | |
752 | } | |
753 | if(U_FAILURE(errorCode)) { | |
754 | log_err("error: ucptrie_toBinary(%s) failed: %s\n", testName, u_errorName(errorCode)); | |
755 | break; | |
756 | } | |
757 | if(length1!=length2) { | |
758 | log_err("error: trie serialization (%s) lengths different: " | |
759 | "preflight vs. serialize\n", testName); | |
760 | break; | |
761 | } | |
762 | ||
763 | testTrie(testName, trie, type, valueWidth, checkRanges, countCheckRanges); | |
764 | ucptrie_close(trie); | |
765 | trie=NULL; | |
766 | ||
767 | if(withSwap) { | |
768 | int32_t swappedLength; | |
769 | ||
770 | UDataSwapper *ds; | |
771 | ||
772 | /* swap to opposite-endian */ | |
773 | uprv_memset(swapped, 0x55, length2); | |
774 | ds=udata_openSwapper(U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, | |
775 | !U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); | |
776 | swappedLength=ucptrie_swap(ds, storage, -1, NULL, &errorCode); | |
777 | if(U_FAILURE(errorCode) || swappedLength!=length2) { | |
778 | log_err("error: ucptrie_swap(%s to OE preflighting) failed (%s) " | |
779 | "or before/after lengths different\n", | |
780 | testName, u_errorName(errorCode)); | |
781 | udata_closeSwapper(ds); | |
782 | break; | |
783 | } | |
784 | swappedLength=ucptrie_swap(ds, storage, length2, swapped, &errorCode); | |
785 | udata_closeSwapper(ds); | |
786 | if(U_FAILURE(errorCode) || swappedLength!=length2) { | |
787 | log_err("error: ucptrie_swap(%s to OE) failed (%s) or before/after lengths different\n", | |
788 | testName, u_errorName(errorCode)); | |
789 | break; | |
790 | } | |
791 | ||
792 | /* swap back to platform-endian */ | |
793 | uprv_memset(storage, 0xaa, length2); | |
794 | ds=udata_openSwapper(!U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, | |
795 | U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); | |
796 | swappedLength=ucptrie_swap(ds, swapped, -1, NULL, &errorCode); | |
797 | if(U_FAILURE(errorCode) || swappedLength!=length2) { | |
798 | log_err("error: ucptrie_swap(%s to PE preflighting) failed (%s) " | |
799 | "or before/after lengths different\n", | |
800 | testName, u_errorName(errorCode)); | |
801 | udata_closeSwapper(ds); | |
802 | break; | |
803 | } | |
804 | swappedLength=ucptrie_swap(ds, swapped, length2, storage, &errorCode); | |
805 | udata_closeSwapper(ds); | |
806 | if(U_FAILURE(errorCode) || swappedLength!=length2) { | |
807 | log_err("error: ucptrie_swap(%s to PE) failed (%s) or before/after lengths different\n", | |
808 | testName, u_errorName(errorCode)); | |
809 | break; | |
810 | } | |
811 | } | |
812 | ||
813 | trie = ucptrie_openFromBinary(type, valueWidth, storage, length2, &length3, &errorCode); | |
814 | if(U_FAILURE(errorCode)) { | |
815 | log_err("error: ucptrie_openFromBinary(%s) failed, %s\n", testName, u_errorName(errorCode)); | |
816 | break; | |
817 | } | |
818 | if(type != ucptrie_getType(trie)) { | |
819 | log_err("error: trie serialization (%s) did not preserve trie type\n", testName); | |
820 | break; | |
821 | } | |
822 | if(valueWidth != ucptrie_getValueWidth(trie)) { | |
823 | log_err("error: trie serialization (%s) did not preserve data value width\n", testName); | |
824 | break; | |
825 | } | |
826 | if(length2!=length3) { | |
827 | log_err("error: trie serialization (%s) lengths different: " | |
828 | "serialize vs. unserialize\n", testName); | |
829 | break; | |
830 | } | |
831 | /* overwrite the storage that is not supposed to be needed */ | |
832 | uprv_memset((char *)storage+length3, 0xfa, (int32_t)(sizeof(storage)-length3)); | |
833 | ||
834 | { | |
835 | errorCode=U_ZERO_ERROR; | |
836 | UCPTrie *any = ucptrie_openFromBinary(UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY, | |
837 | storage, length3, NULL, &errorCode); | |
838 | if (U_SUCCESS(errorCode)) { | |
839 | if (type != ucptrie_getType(any)) { | |
840 | log_err("error: ucptrie_openFromBinary(" | |
841 | "UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY).getType() wrong\n"); | |
842 | } | |
843 | if (valueWidth != ucptrie_getValueWidth(any)) { | |
844 | log_err("error: ucptrie_openFromBinary(" | |
845 | "UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY).getValueWidth() wrong\n"); | |
846 | } | |
847 | ucptrie_close(any); | |
848 | } else { | |
849 | log_err("error: ucptrie_openFromBinary(" | |
850 | "UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY) failed - %s\n", | |
851 | u_errorName(errorCode)); | |
852 | } | |
853 | } | |
854 | ||
855 | errorCode=U_ZERO_ERROR; | |
856 | testTrie(testName, trie, type, valueWidth, checkRanges, countCheckRanges); | |
857 | { | |
858 | /* make a mutable trie from an immutable one */ | |
859 | uint32_t value, value2; | |
860 | UMutableCPTrie *mutable2 = umutablecptrie_fromUCPTrie(trie, &errorCode); | |
861 | if(U_FAILURE(errorCode)) { | |
862 | log_err("error: umutablecptrie_fromUCPTrie(unserialized %s) failed - %s\n", | |
863 | testName, u_errorName(errorCode)); | |
864 | break; | |
865 | } | |
866 | ||
867 | value=umutablecptrie_get(mutable2, 0xa1); | |
868 | umutablecptrie_set(mutable2, 0xa1, 789, &errorCode); | |
869 | value2=umutablecptrie_get(mutable2, 0xa1); | |
870 | umutablecptrie_set(mutable2, 0xa1, value, &errorCode); | |
871 | if(U_FAILURE(errorCode) || value2!=789) { | |
872 | log_err("error: modifying a mutableTrie-from-UCPTrie (%s) failed - %s\n", | |
873 | testName, u_errorName(errorCode)); | |
874 | } | |
875 | testBuilder(testName, mutable2, checkRanges, countCheckRanges); | |
876 | umutablecptrie_close(mutable2); | |
877 | } | |
878 | } while(0); | |
879 | ||
880 | umutablecptrie_close(mutableTrie); | |
881 | ucptrie_close(trie); | |
882 | } | |
883 | ||
884 | static UMutableCPTrie * | |
885 | testTrieSerializeAllValueWidth(const char *testName, | |
886 | UMutableCPTrie *mutableTrie, UBool withClone, | |
887 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
888 | char name[40]; | |
889 | uint32_t oredValues = 0; | |
890 | int32_t i; | |
891 | for (i = 0; i < countCheckRanges; ++i) { | |
892 | oredValues |= checkRanges[i].value; | |
893 | } | |
894 | ||
895 | testBuilder(testName, mutableTrie, checkRanges, countCheckRanges); | |
896 | ||
897 | if (oredValues <= 0xffff) { | |
898 | uprv_strcpy(name, testName); | |
899 | uprv_strcat(name, ".16"); | |
900 | testTrieSerialize(name, mutableTrie, | |
901 | UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16, withClone, | |
902 | checkRanges, countCheckRanges); | |
903 | } | |
904 | ||
905 | uprv_strcpy(name, testName); | |
906 | uprv_strcat(name, ".32"); | |
907 | testTrieSerialize(name, mutableTrie, | |
908 | UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_32, withClone, | |
909 | checkRanges, countCheckRanges); | |
910 | ||
911 | if (oredValues <= 0xff) { | |
912 | uprv_strcpy(name, testName); | |
913 | uprv_strcat(name, ".8"); | |
914 | testTrieSerialize(name, mutableTrie, | |
915 | UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_8, withClone, | |
916 | checkRanges, countCheckRanges); | |
917 | } | |
918 | ||
919 | if (oredValues <= 0xffff) { | |
920 | uprv_strcpy(name, testName); | |
921 | uprv_strcat(name, ".small16"); | |
922 | testTrieSerialize(name, mutableTrie, | |
923 | UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_16, withClone, | |
924 | checkRanges, countCheckRanges); | |
925 | } | |
926 | ||
927 | return mutableTrie; | |
928 | } | |
929 | ||
930 | static UMutableCPTrie * | |
931 | makeTrieWithRanges(const char *testName, UBool withClone, | |
932 | const SetRange setRanges[], int32_t countSetRanges, | |
933 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
934 | UMutableCPTrie *mutableTrie; | |
935 | uint32_t initialValue, errorValue; | |
936 | uint32_t value; | |
937 | UChar32 start, limit; | |
938 | int32_t i; | |
939 | UErrorCode errorCode; | |
940 | ||
941 | log_verbose("\ntesting Trie '%s'\n", testName); | |
942 | errorCode=U_ZERO_ERROR; | |
943 | getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue); | |
944 | mutableTrie = umutablecptrie_open(initialValue, errorValue, &errorCode); | |
945 | if(U_FAILURE(errorCode)) { | |
946 | log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode)); | |
947 | return NULL; | |
948 | } | |
949 | ||
950 | /* set values from setRanges[] */ | |
951 | for(i=0; i<countSetRanges; ++i) { | |
952 | if(withClone && i==countSetRanges/2) { | |
953 | /* switch to a clone in the middle of setting values */ | |
954 | UMutableCPTrie *clone = umutablecptrie_clone(mutableTrie, &errorCode); | |
955 | if(U_FAILURE(errorCode)) { | |
956 | log_err("error: umutablecptrie_clone(%s) failed - %s\n", | |
957 | testName, u_errorName(errorCode)); | |
958 | errorCode=U_ZERO_ERROR; /* continue with the original */ | |
959 | } else { | |
960 | umutablecptrie_close(mutableTrie); | |
961 | mutableTrie = clone; | |
962 | } | |
963 | } | |
964 | start=setRanges[i].start; | |
965 | limit=setRanges[i].limit; | |
966 | value=setRanges[i].value; | |
967 | if ((limit - start) == 1) { | |
968 | umutablecptrie_set(mutableTrie, start, value, &errorCode); | |
969 | } else { | |
970 | umutablecptrie_setRange(mutableTrie, start, limit-1, value, &errorCode); | |
971 | } | |
972 | } | |
973 | ||
974 | if(U_SUCCESS(errorCode)) { | |
975 | return mutableTrie; | |
976 | } else { | |
977 | log_err("error: setting values into a mutable trie (%s) failed - %s\n", | |
978 | testName, u_errorName(errorCode)); | |
979 | umutablecptrie_close(mutableTrie); | |
980 | return NULL; | |
981 | } | |
982 | } | |
983 | ||
984 | static void | |
985 | testTrieRanges(const char *testName, UBool withClone, | |
986 | const SetRange setRanges[], int32_t countSetRanges, | |
987 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
988 | UMutableCPTrie *mutableTrie = makeTrieWithRanges( | |
989 | testName, withClone, setRanges, countSetRanges, checkRanges, countCheckRanges); | |
990 | if (mutableTrie != NULL) { | |
991 | mutableTrie = testTrieSerializeAllValueWidth(testName, mutableTrie, withClone, | |
992 | checkRanges, countCheckRanges); | |
993 | umutablecptrie_close(mutableTrie); | |
994 | } | |
995 | } | |
996 | ||
997 | /* test data ----------------------------------------------------------------*/ | |
998 | ||
999 | /* set consecutive ranges, even with value 0 */ | |
1000 | static const SetRange | |
1001 | setRanges1[]={ | |
1002 | { 0, 0x40, 0 }, | |
1003 | { 0x40, 0xe7, 0x34 }, | |
1004 | { 0xe7, 0x3400, 0 }, | |
1005 | { 0x3400, 0x9fa6, 0x61 }, | |
1006 | { 0x9fa6, 0xda9e, 0x31 }, | |
1007 | { 0xdada, 0xeeee, 0xff }, | |
1008 | { 0xeeee, 0x11111, 1 }, | |
1009 | { 0x11111, 0x44444, 0x61 }, | |
1010 | { 0x44444, 0x60003, 0 }, | |
1011 | { 0xf0003, 0xf0004, 0xf }, | |
1012 | { 0xf0004, 0xf0006, 0x10 }, | |
1013 | { 0xf0006, 0xf0007, 0x11 }, | |
1014 | { 0xf0007, 0xf0040, 0x12 }, | |
1015 | { 0xf0040, 0x110000, 0 } | |
1016 | }; | |
1017 | ||
1018 | static const CheckRange | |
1019 | checkRanges1[]={ | |
1020 | { 0, 0 }, | |
1021 | { 0x40, 0 }, | |
1022 | { 0xe7, 0x34 }, | |
1023 | { 0x3400, 0 }, | |
1024 | { 0x9fa6, 0x61 }, | |
1025 | { 0xda9e, 0x31 }, | |
1026 | { 0xdada, 0 }, | |
1027 | { 0xeeee, 0xff }, | |
1028 | { 0x11111, 1 }, | |
1029 | { 0x44444, 0x61 }, | |
1030 | { 0xf0003, 0 }, | |
1031 | { 0xf0004, 0xf }, | |
1032 | { 0xf0006, 0x10 }, | |
1033 | { 0xf0007, 0x11 }, | |
1034 | { 0xf0040, 0x12 }, | |
1035 | { 0x110000, 0 } | |
1036 | }; | |
1037 | ||
1038 | /* set some interesting overlapping ranges */ | |
1039 | static const SetRange | |
1040 | setRanges2[]={ | |
1041 | { 0x21, 0x7f, 0x5555 }, | |
1042 | { 0x2f800, 0x2fedc, 0x7a }, | |
1043 | { 0x72, 0xdd, 3 }, | |
1044 | { 0xdd, 0xde, 4 }, | |
1045 | { 0x201, 0x240, 6 }, /* 3 consecutive blocks with the same pattern but */ | |
1046 | { 0x241, 0x280, 6 }, /* discontiguous value ranges, testing iteration */ | |
1047 | { 0x281, 0x2c0, 6 }, | |
1048 | { 0x2f987, 0x2fa98, 5 }, | |
1049 | { 0x2f777, 0x2f883, 0 }, | |
1050 | { 0x2fedc, 0x2ffaa, 1 }, | |
1051 | { 0x2ffaa, 0x2ffab, 2 }, | |
1052 | { 0x2ffbb, 0x2ffc0, 7 } | |
1053 | }; | |
1054 | ||
1055 | static const CheckRange | |
1056 | checkRanges2[]={ | |
1057 | { 0, 0 }, | |
1058 | { 0x21, 0 }, | |
1059 | { 0x72, 0x5555 }, | |
1060 | { 0xdd, 3 }, | |
1061 | { 0xde, 4 }, | |
1062 | { 0x201, 0 }, | |
1063 | { 0x240, 6 }, | |
1064 | { 0x241, 0 }, | |
1065 | { 0x280, 6 }, | |
1066 | { 0x281, 0 }, | |
1067 | { 0x2c0, 6 }, | |
1068 | { 0x2f883, 0 }, | |
1069 | { 0x2f987, 0x7a }, | |
1070 | { 0x2fa98, 5 }, | |
1071 | { 0x2fedc, 0x7a }, | |
1072 | { 0x2ffaa, 1 }, | |
1073 | { 0x2ffab, 2 }, | |
1074 | { 0x2ffbb, 0 }, | |
1075 | { 0x2ffc0, 7 }, | |
1076 | { 0x110000, 0 } | |
1077 | }; | |
1078 | ||
1079 | /* use a non-zero initial value */ | |
1080 | static const SetRange | |
1081 | setRanges3[]={ | |
1082 | { 0x31, 0xa4, 1 }, | |
1083 | { 0x3400, 0x6789, 2 }, | |
1084 | { 0x8000, 0x89ab, 9 }, | |
1085 | { 0x9000, 0xa000, 4 }, | |
1086 | { 0xabcd, 0xbcde, 3 }, | |
1087 | { 0x55555, 0x110000, 6 }, /* highStart<U+ffff with non-initialValue */ | |
1088 | { 0xcccc, 0x55555, 6 } | |
1089 | }; | |
1090 | ||
1091 | static const CheckRange | |
1092 | checkRanges3[]={ | |
1093 | { 0, 9 }, /* non-zero initialValue */ | |
1094 | { 0x31, 9 }, | |
1095 | { 0xa4, 1 }, | |
1096 | { 0x3400, 9 }, | |
1097 | { 0x6789, 2 }, | |
1098 | { 0x9000, 9 }, | |
1099 | { 0xa000, 4 }, | |
1100 | { 0xabcd, 9 }, | |
1101 | { 0xbcde, 3 }, | |
1102 | { 0xcccc, 9 }, | |
1103 | { 0x110000, 6 } | |
1104 | }; | |
1105 | ||
1106 | /* empty or single-value tries, testing highStart==0 */ | |
1107 | static const SetRange | |
1108 | setRangesEmpty[]={ | |
1109 | { 0, 0, 0 }, /* need some values for it to compile */ | |
1110 | }; | |
1111 | ||
1112 | static const CheckRange | |
1113 | checkRangesEmpty[]={ | |
1114 | { 0, 3 }, | |
1115 | { 0x110000, 3 } | |
1116 | }; | |
1117 | ||
1118 | static const SetRange | |
1119 | setRangesSingleValue[]={ | |
1120 | { 0, 0x110000, 5 }, | |
1121 | }; | |
1122 | ||
1123 | static const CheckRange | |
1124 | checkRangesSingleValue[]={ | |
1125 | { 0, 3 }, | |
1126 | { 0x110000, 5 } | |
1127 | }; | |
1128 | ||
1129 | static void | |
1130 | TrieTestSet1(void) { | |
1131 | testTrieRanges("set1", FALSE, | |
1132 | setRanges1, UPRV_LENGTHOF(setRanges1), | |
1133 | checkRanges1, UPRV_LENGTHOF(checkRanges1)); | |
1134 | } | |
1135 | ||
1136 | static void | |
1137 | TrieTestSet2Overlap(void) { | |
1138 | testTrieRanges("set2-overlap", FALSE, | |
1139 | setRanges2, UPRV_LENGTHOF(setRanges2), | |
1140 | checkRanges2, UPRV_LENGTHOF(checkRanges2)); | |
1141 | } | |
1142 | ||
1143 | static void | |
1144 | TrieTestSet3Initial9(void) { | |
1145 | testTrieRanges("set3-initial-9", FALSE, | |
1146 | setRanges3, UPRV_LENGTHOF(setRanges3), | |
1147 | checkRanges3, UPRV_LENGTHOF(checkRanges3)); | |
1148 | } | |
1149 | ||
1150 | static void | |
1151 | TrieTestSetEmpty(void) { | |
1152 | testTrieRanges("set-empty", FALSE, | |
1153 | setRangesEmpty, 0, | |
1154 | checkRangesEmpty, UPRV_LENGTHOF(checkRangesEmpty)); | |
1155 | } | |
1156 | ||
1157 | static void | |
1158 | TrieTestSetSingleValue(void) { | |
1159 | testTrieRanges("set-single-value", FALSE, | |
1160 | setRangesSingleValue, UPRV_LENGTHOF(setRangesSingleValue), | |
1161 | checkRangesSingleValue, UPRV_LENGTHOF(checkRangesSingleValue)); | |
1162 | } | |
1163 | ||
1164 | static void | |
1165 | TrieTestSet2OverlapWithClone(void) { | |
1166 | testTrieRanges("set2-overlap.withClone", TRUE, | |
1167 | setRanges2, UPRV_LENGTHOF(setRanges2), | |
1168 | checkRanges2, UPRV_LENGTHOF(checkRanges2)); | |
1169 | } | |
1170 | ||
1171 | /* test mutable-trie memory management -------------------------------------- */ | |
1172 | ||
1173 | static void | |
1174 | FreeBlocksTest(void) { | |
1175 | static const CheckRange | |
1176 | checkRanges[]={ | |
1177 | { 0, 1 }, | |
1178 | { 0x740, 1 }, | |
1179 | { 0x780, 2 }, | |
1180 | { 0x880, 3 }, | |
1181 | { 0x110000, 1 } | |
1182 | }; | |
1183 | static const char *const testName="free-blocks"; | |
1184 | ||
1185 | UMutableCPTrie *mutableTrie; | |
1186 | int32_t i; | |
1187 | UErrorCode errorCode; | |
1188 | ||
1189 | errorCode=U_ZERO_ERROR; | |
1190 | mutableTrie=umutablecptrie_open(1, 0xad, &errorCode); | |
1191 | if(U_FAILURE(errorCode)) { | |
1192 | log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode)); | |
1193 | return; | |
1194 | } | |
1195 | ||
1196 | /* | |
1197 | * Repeatedly set overlapping same-value ranges to stress the free-data-block management. | |
1198 | * If it fails, it will overflow the data array. | |
1199 | */ | |
1200 | for(i=0; i<(0x120000>>4)/2; ++i) { // 4=UCPTRIE_SHIFT_3 | |
1201 | umutablecptrie_setRange(mutableTrie, 0x740, 0x840-1, 1, &errorCode); | |
1202 | umutablecptrie_setRange(mutableTrie, 0x780, 0x880-1, 1, &errorCode); | |
1203 | umutablecptrie_setRange(mutableTrie, 0x740, 0x840-1, 2, &errorCode); | |
1204 | umutablecptrie_setRange(mutableTrie, 0x780, 0x880-1, 3, &errorCode); | |
1205 | } | |
1206 | /* make blocks that will be free during compaction */ | |
1207 | umutablecptrie_setRange(mutableTrie, 0x1000, 0x3000-1, 2, &errorCode); | |
1208 | umutablecptrie_setRange(mutableTrie, 0x2000, 0x4000-1, 3, &errorCode); | |
1209 | umutablecptrie_setRange(mutableTrie, 0x1000, 0x4000-1, 1, &errorCode); | |
1210 | if(U_FAILURE(errorCode)) { | |
1211 | log_err("error: setting lots of ranges into a mutable trie (%s) failed - %s\n", | |
1212 | testName, u_errorName(errorCode)); | |
1213 | umutablecptrie_close(mutableTrie); | |
1214 | return; | |
1215 | } | |
1216 | ||
1217 | mutableTrie = testTrieSerializeAllValueWidth(testName, mutableTrie, FALSE, | |
1218 | checkRanges, UPRV_LENGTHOF(checkRanges)); | |
1219 | umutablecptrie_close(mutableTrie); | |
1220 | } | |
1221 | ||
1222 | static void | |
1223 | GrowDataArrayTest(void) { | |
1224 | static const CheckRange | |
1225 | checkRanges[]={ | |
1226 | { 0, 1 }, | |
1227 | { 0x720, 2 }, | |
1228 | { 0x7a0, 3 }, | |
1229 | { 0x8a0, 4 }, | |
1230 | { 0x110000, 5 } | |
1231 | }; | |
1232 | static const char *const testName="grow-data"; | |
1233 | ||
1234 | UMutableCPTrie *mutableTrie; | |
1235 | int32_t i; | |
1236 | UErrorCode errorCode; | |
1237 | ||
1238 | errorCode=U_ZERO_ERROR; | |
1239 | mutableTrie=umutablecptrie_open(1, 0xad, &errorCode); | |
1240 | if(U_FAILURE(errorCode)) { | |
1241 | log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode)); | |
1242 | return; | |
1243 | } | |
1244 | ||
1245 | /* | |
1246 | * Use umutablecptrie_set() not umutablecptrie_setRange() to write non-initialValue-data. | |
1247 | * Should grow/reallocate the data array to a sufficient length. | |
1248 | */ | |
1249 | for(i=0; i<0x1000; ++i) { | |
1250 | umutablecptrie_set(mutableTrie, i, 2, &errorCode); | |
1251 | } | |
1252 | for(i=0x720; i<0x1100; ++i) { /* some overlap */ | |
1253 | umutablecptrie_set(mutableTrie, i, 3, &errorCode); | |
1254 | } | |
1255 | for(i=0x7a0; i<0x900; ++i) { | |
1256 | umutablecptrie_set(mutableTrie, i, 4, &errorCode); | |
1257 | } | |
1258 | for(i=0x8a0; i<0x110000; ++i) { | |
1259 | umutablecptrie_set(mutableTrie, i, 5, &errorCode); | |
1260 | } | |
1261 | if(U_FAILURE(errorCode)) { | |
1262 | log_err("error: setting lots of values into a mutable trie (%s) failed - %s\n", | |
1263 | testName, u_errorName(errorCode)); | |
1264 | umutablecptrie_close(mutableTrie); | |
1265 | return; | |
1266 | } | |
1267 | ||
1268 | mutableTrie = testTrieSerializeAllValueWidth(testName, mutableTrie, FALSE, | |
1269 | checkRanges, UPRV_LENGTHOF(checkRanges)); | |
1270 | umutablecptrie_close(mutableTrie); | |
1271 | } | |
1272 | ||
1273 | static void | |
1274 | ManyAllSameBlocksTest(void) { | |
1275 | static const char *const testName="many-all-same"; | |
1276 | ||
1277 | UMutableCPTrie *mutableTrie; | |
1278 | int32_t i; | |
1279 | UErrorCode errorCode; | |
1280 | CheckRange checkRanges[(0x110000 >> 12) + 1]; | |
1281 | ||
1282 | errorCode = U_ZERO_ERROR; | |
1283 | mutableTrie = umutablecptrie_open(0xff33, 0xad, &errorCode); | |
1284 | if (U_FAILURE(errorCode)) { | |
1285 | log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode)); | |
1286 | return; | |
1287 | } | |
1288 | checkRanges[0].limit = 0; | |
1289 | checkRanges[0].value = 0xff33; // initialValue | |
1290 | ||
1291 | // Many all-same-value blocks. | |
1292 | for (i = 0; i < 0x110000; i += 0x1000) { | |
1293 | uint32_t value = i >> 12; | |
1294 | umutablecptrie_setRange(mutableTrie, i, i + 0xfff, value, &errorCode); | |
1295 | checkRanges[value + 1].limit = i + 0x1000; | |
1296 | checkRanges[value + 1].value = value; | |
1297 | } | |
1298 | for (i = 0; i < 0x110000; i += 0x1000) { | |
1299 | uint32_t expected = i >> 12; | |
1300 | uint32_t v0 = umutablecptrie_get(mutableTrie, i); | |
1301 | uint32_t vfff = umutablecptrie_get(mutableTrie, i + 0xfff); | |
1302 | if (v0 != expected || vfff != expected) { | |
1303 | log_err("error: UMutableCPTrie U+%04lx unexpected value\n", (long)i); | |
1304 | } | |
1305 | } | |
1306 | ||
1307 | mutableTrie = testTrieSerializeAllValueWidth(testName, mutableTrie, FALSE, | |
1308 | checkRanges, UPRV_LENGTHOF(checkRanges)); | |
1309 | umutablecptrie_close(mutableTrie); | |
1310 | } | |
1311 | ||
1312 | static void | |
1313 | MuchDataTest(void) { | |
1314 | static const char *const testName="much-data"; | |
1315 | ||
1316 | UMutableCPTrie *mutableTrie; | |
1317 | int32_t r, c; | |
1318 | UErrorCode errorCode = U_ZERO_ERROR; | |
1319 | CheckRange checkRanges[(0x10000 >> 6) + (0x10240 >> 4) + 10]; | |
1320 | ||
1321 | mutableTrie = umutablecptrie_open(0xff33, 0xad, &errorCode); | |
1322 | if (U_FAILURE(errorCode)) { | |
1323 | log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode)); | |
1324 | return; | |
1325 | } | |
1326 | checkRanges[0].limit = 0; | |
1327 | checkRanges[0].value = 0xff33; // initialValue | |
1328 | r = 1; | |
1329 | ||
1330 | // Add much data that does not compact well, | |
1331 | // to get more than 128k data values after compaction. | |
1332 | for (c = 0; c < 0x10000; c += 0x40) { | |
1333 | uint32_t value = c >> 4; | |
1334 | umutablecptrie_setRange(mutableTrie, c, c + 0x3f, value, &errorCode); | |
1335 | checkRanges[r].limit = c + 0x40; | |
1336 | checkRanges[r++].value = value; | |
1337 | } | |
1338 | checkRanges[r].limit = 0x20000; | |
1339 | checkRanges[r++].value = 0xff33; | |
1340 | for (c = 0x20000; c < 0x30230; c += 0x10) { | |
1341 | uint32_t value = c >> 4; | |
1342 | umutablecptrie_setRange(mutableTrie, c, c + 0xf, value, &errorCode); | |
1343 | checkRanges[r].limit = c + 0x10; | |
1344 | checkRanges[r++].value = value; | |
1345 | } | |
1346 | umutablecptrie_setRange(mutableTrie, 0x30230, 0x30233, 0x3023, &errorCode); | |
1347 | checkRanges[r].limit = 0x30234; | |
1348 | checkRanges[r++].value = 0x3023; | |
1349 | umutablecptrie_setRange(mutableTrie, 0x30234, 0xdffff, 0x5005, &errorCode); | |
1350 | checkRanges[r].limit = 0xe0000; | |
1351 | checkRanges[r++].value = 0x5005; | |
1352 | umutablecptrie_setRange(mutableTrie, 0xe0000, 0x10ffff, 0x9009, &errorCode); | |
1353 | checkRanges[r].limit = 0x110000; | |
1354 | checkRanges[r++].value = 0x9009; | |
1355 | if (U_FAILURE(errorCode)) { | |
1356 | log_err("error: setting lots of values into a mutable trie (%s) failed - %s\n", | |
1357 | testName, u_errorName(errorCode)); | |
1358 | umutablecptrie_close(mutableTrie); | |
1359 | return; | |
1360 | } | |
1361 | U_ASSERT(r <= UPRV_LENGTHOF(checkRanges)); | |
1362 | ||
1363 | testBuilder(testName, mutableTrie, checkRanges, r); | |
1364 | testTrieSerialize("much-data.16", mutableTrie, | |
1365 | UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16, FALSE, checkRanges, r); | |
1366 | umutablecptrie_close(mutableTrie); | |
1367 | } | |
1368 | ||
1369 | static void testGetRangesFixedSurr(const char *testName, const UMutableCPTrie *mutableTrie, | |
1370 | UCPMapRangeOption option, | |
1371 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
1372 | testTrieGetRanges(testName, NULL, mutableTrie, option, 5, checkRanges, countCheckRanges); | |
1373 | UErrorCode errorCode = U_ZERO_ERROR; | |
1374 | UMutableCPTrie *clone = umutablecptrie_clone(mutableTrie, &errorCode); | |
1375 | UCPTrie *trie; | |
1376 | if (U_FAILURE(errorCode)) { | |
1377 | log_err("error: umutablecptrie_clone(%s) failed: %s\n", testName, u_errorName(errorCode)); | |
1378 | return; | |
1379 | } | |
1380 | trie = umutablecptrie_buildImmutable(clone, UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16, &errorCode); | |
1381 | umutablecptrie_close(clone); | |
1382 | if (U_FAILURE(errorCode)) { | |
1383 | log_err("error: umutablecptrie_buildImmutable(%s) failed: %s\n", testName, u_errorName(errorCode)); | |
1384 | return; | |
1385 | } | |
1386 | testTrieGetRanges(testName, trie, NULL, option, 5, checkRanges, countCheckRanges); | |
1387 | ucptrie_close(trie); | |
1388 | } | |
1389 | ||
1390 | static void | |
1391 | TrieTestGetRangesFixedSurr(void) { | |
1392 | static const SetRange | |
1393 | setRangesFixedSurr[]={ | |
1394 | { 0xd000, 0xd7ff, 5 }, | |
1395 | { 0xd7ff, 0xe001, 3 }, | |
1396 | { 0xe001, 0xf900, 5 }, | |
1397 | }; | |
1398 | ||
1399 | static const CheckRange | |
1400 | checkRangesFixedLeadSurr1[]={ | |
1401 | { 0, 0 }, | |
1402 | { 0xd000, 0 }, | |
1403 | { 0xd7ff, 5 }, | |
1404 | { 0xd800, 3 }, | |
1405 | { 0xdc00, 5 }, | |
1406 | { 0xe001, 3 }, | |
1407 | { 0xf900, 5 }, | |
1408 | { 0x110000, 0 } | |
1409 | }; | |
1410 | ||
1411 | static const CheckRange | |
1412 | checkRangesFixedAllSurr1[]={ | |
1413 | { 0, 0 }, | |
1414 | { 0xd000, 0 }, | |
1415 | { 0xd7ff, 5 }, | |
1416 | { 0xd800, 3 }, | |
1417 | { 0xe000, 5 }, | |
1418 | { 0xe001, 3 }, | |
1419 | { 0xf900, 5 }, | |
1420 | { 0x110000, 0 } | |
1421 | }; | |
1422 | ||
1423 | static const CheckRange | |
1424 | checkRangesFixedLeadSurr3[]={ | |
1425 | { 0, 0 }, | |
1426 | { 0xd000, 0 }, | |
1427 | { 0xdc00, 5 }, | |
1428 | { 0xe001, 3 }, | |
1429 | { 0xf900, 5 }, | |
1430 | { 0x110000, 0 } | |
1431 | }; | |
1432 | ||
1433 | static const CheckRange | |
1434 | checkRangesFixedAllSurr3[]={ | |
1435 | { 0, 0 }, | |
1436 | { 0xd000, 0 }, | |
1437 | { 0xe000, 5 }, | |
1438 | { 0xe001, 3 }, | |
1439 | { 0xf900, 5 }, | |
1440 | { 0x110000, 0 } | |
1441 | }; | |
1442 | ||
1443 | static const CheckRange | |
1444 | checkRangesFixedSurr4[]={ | |
1445 | { 0, 0 }, | |
1446 | { 0xd000, 0 }, | |
1447 | { 0xf900, 5 }, | |
1448 | { 0x110000, 0 } | |
1449 | }; | |
1450 | ||
1451 | UMutableCPTrie *mutableTrie = makeTrieWithRanges( | |
1452 | "fixedSurr", FALSE, setRangesFixedSurr, UPRV_LENGTHOF(setRangesFixedSurr), | |
1453 | checkRangesFixedLeadSurr1, UPRV_LENGTHOF(checkRangesFixedLeadSurr1)); | |
1454 | UErrorCode errorCode = U_ZERO_ERROR; | |
1455 | if (mutableTrie == NULL) { | |
1456 | return; | |
1457 | } | |
1458 | testGetRangesFixedSurr("fixedLeadSurr1", mutableTrie, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, | |
1459 | checkRangesFixedLeadSurr1, UPRV_LENGTHOF(checkRangesFixedLeadSurr1)); | |
1460 | testGetRangesFixedSurr("fixedAllSurr1", mutableTrie, UCPMAP_RANGE_FIXED_ALL_SURROGATES, | |
1461 | checkRangesFixedAllSurr1, UPRV_LENGTHOF(checkRangesFixedAllSurr1)); | |
1462 | // Setting a range in the middle of lead surrogates makes no difference. | |
1463 | umutablecptrie_setRange(mutableTrie, 0xd844, 0xd899, 5, &errorCode); | |
1464 | if (U_FAILURE(errorCode)) { | |
1465 | log_err("error: umutablecptrie_setRange(fixedSurr2) failed: %s\n", u_errorName(errorCode)); | |
1466 | umutablecptrie_close(mutableTrie); | |
1467 | return; | |
1468 | } | |
1469 | testGetRangesFixedSurr("fixedLeadSurr2", mutableTrie, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, | |
1470 | checkRangesFixedLeadSurr1, UPRV_LENGTHOF(checkRangesFixedLeadSurr1)); | |
1471 | // Bridge the gap before the lead surrogates. | |
1472 | umutablecptrie_set(mutableTrie, 0xd7ff, 5, &errorCode); | |
1473 | if (U_FAILURE(errorCode)) { | |
1474 | log_err("error: umutablecptrie_set(fixedSurr3) failed: %s\n", u_errorName(errorCode)); | |
1475 | umutablecptrie_close(mutableTrie); | |
1476 | return; | |
1477 | } | |
1478 | testGetRangesFixedSurr("fixedLeadSurr3", mutableTrie, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, | |
1479 | checkRangesFixedLeadSurr3, UPRV_LENGTHOF(checkRangesFixedLeadSurr3)); | |
1480 | testGetRangesFixedSurr("fixedAllSurr3", mutableTrie, UCPMAP_RANGE_FIXED_ALL_SURROGATES, | |
1481 | checkRangesFixedAllSurr3, UPRV_LENGTHOF(checkRangesFixedAllSurr3)); | |
1482 | // Bridge the gap after the trail surrogates. | |
1483 | umutablecptrie_set(mutableTrie, 0xe000, 5, &errorCode); | |
1484 | if (U_FAILURE(errorCode)) { | |
1485 | log_err("error: umutablecptrie_set(fixedSurr4) failed: %s\n", u_errorName(errorCode)); | |
1486 | umutablecptrie_close(mutableTrie); | |
1487 | return; | |
1488 | } | |
1489 | testGetRangesFixedSurr("fixedSurr4", mutableTrie, UCPMAP_RANGE_FIXED_ALL_SURROGATES, | |
1490 | checkRangesFixedSurr4, UPRV_LENGTHOF(checkRangesFixedSurr4)); | |
1491 | umutablecptrie_close(mutableTrie); | |
1492 | } | |
1493 | ||
1494 | static void TestSmallNullBlockMatchesFast(void) { | |
1495 | // The initial builder+getRange code had a bug: | |
1496 | // When there is no null data block in the fast-index range, | |
1497 | // but a fast-range data block starts with enough values to match a small data block, | |
1498 | // then getRange() got confused. | |
1499 | // The builder must prevent this. | |
1500 | static const SetRange setRanges[] = { | |
1501 | { 0, 0x880, 1 }, | |
1502 | // U+0880..U+088F map to initial value 0, potential match for small null data block. | |
1503 | { 0x890, 0x1040, 2 }, | |
1504 | // U+1040..U+1050 map to 0. | |
1505 | // First small null data block in a small-type trie. | |
1506 | // In a fast-type trie, it is ok to match a small null data block at U+1041 | |
1507 | // but not at U+1040. | |
1508 | { 0x1051, 0x10000, 3 }, | |
1509 | // No fast data block (block length 64) filled with 0 regardless of trie type. | |
1510 | // Need more blocks filled with 0 than the largest range above, | |
1511 | // and need a highStart above that so that it actually counts. | |
1512 | { 0x20000, 0x110000, 9 } | |
1513 | }; | |
1514 | ||
1515 | static const CheckRange checkRanges[] = { | |
1516 | { 0x0880, 1 }, | |
1517 | { 0x0890, 0 }, | |
1518 | { 0x1040, 2 }, | |
1519 | { 0x1051, 0 }, | |
1520 | { 0x10000, 3 }, | |
1521 | { 0x20000, 0 }, | |
1522 | { 0x110000, 9 } | |
1523 | }; | |
1524 | ||
1525 | testTrieRanges("small0-in-fast", FALSE, | |
1526 | setRanges, UPRV_LENGTHOF(setRanges), | |
1527 | checkRanges, UPRV_LENGTHOF(checkRanges)); | |
1528 | } | |
1529 | ||
1530 | static void ShortAllSameBlocksTest(void) { | |
1531 | static const char *const testName = "short-all-same"; | |
1532 | // Many all-same-value blocks but only of the small block length used in the mutable trie. | |
1533 | // The builder code needs to turn a group of short ALL_SAME blocks below fastLimit | |
1534 | // into a MIXED block, and reserve data array capacity for that. | |
1535 | UErrorCode errorCode = U_ZERO_ERROR; | |
1536 | UMutableCPTrie *mutableTrie = umutablecptrie_open(0, 0xad, &errorCode); | |
1537 | CheckRange checkRanges[0x101]; | |
1538 | int32_t i; | |
1539 | if (U_FAILURE(errorCode)) { | |
1540 | log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode)); | |
1541 | return; | |
1542 | } | |
1543 | for (i = 0; i < 0x1000; i += 0x10) { | |
1544 | uint32_t value = i >> 4; | |
1545 | umutablecptrie_setRange(mutableTrie, i, i + 0xf, value, &errorCode); | |
1546 | checkRanges[value].limit = i + 0x10; | |
1547 | checkRanges[value].value = value; | |
1548 | } | |
1549 | checkRanges[0x100].limit = 0x110000; | |
1550 | checkRanges[0x100].value = 0; | |
1551 | if (U_FAILURE(errorCode)) { | |
1552 | log_err("error: setting values into a mutable trie (%s) failed - %s\n", | |
1553 | testName, u_errorName(errorCode)); | |
1554 | umutablecptrie_close(mutableTrie); | |
1555 | return; | |
1556 | } | |
1557 | ||
1558 | mutableTrie = testTrieSerializeAllValueWidth(testName, mutableTrie, FALSE, | |
1559 | checkRanges, UPRV_LENGTHOF(checkRanges)); | |
1560 | umutablecptrie_close(mutableTrie); | |
1561 | } | |
1562 | ||
1563 | void | |
1564 | addUCPTrieTest(TestNode** root) { | |
1565 | addTest(root, &TrieTestSet1, "tsutil/ucptrietest/TrieTestSet1"); | |
1566 | addTest(root, &TrieTestSet2Overlap, "tsutil/ucptrietest/TrieTestSet2Overlap"); | |
1567 | addTest(root, &TrieTestSet3Initial9, "tsutil/ucptrietest/TrieTestSet3Initial9"); | |
1568 | addTest(root, &TrieTestSetEmpty, "tsutil/ucptrietest/TrieTestSetEmpty"); | |
1569 | addTest(root, &TrieTestSetSingleValue, "tsutil/ucptrietest/TrieTestSetSingleValue"); | |
1570 | addTest(root, &TrieTestSet2OverlapWithClone, "tsutil/ucptrietest/TrieTestSet2OverlapWithClone"); | |
1571 | addTest(root, &FreeBlocksTest, "tsutil/ucptrietest/FreeBlocksTest"); | |
1572 | addTest(root, &GrowDataArrayTest, "tsutil/ucptrietest/GrowDataArrayTest"); | |
1573 | addTest(root, &ManyAllSameBlocksTest, "tsutil/ucptrietest/ManyAllSameBlocksTest"); | |
1574 | addTest(root, &MuchDataTest, "tsutil/ucptrietest/MuchDataTest"); | |
1575 | addTest(root, &TrieTestGetRangesFixedSurr, "tsutil/ucptrietest/TrieTestGetRangesFixedSurr"); | |
1576 | addTest(root, &TestSmallNullBlockMatchesFast, "tsutil/ucptrietest/TestSmallNullBlockMatchesFast"); | |
1577 | addTest(root, &ShortAllSameBlocksTest, "tsutil/ucptrietest/ShortAllSameBlocksTest"); | |
1578 | } |