]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
729e4ab9 A |
3 | /* |
4 | ****************************************************************************** | |
5 | * | |
b331163b | 6 | * Copyright (C) 2001-2014, International Business Machines |
729e4ab9 A |
7 | * Corporation and others. All Rights Reserved. |
8 | * | |
9 | ****************************************************************************** | |
10 | * file name: trietest.c | |
f3c0d7a5 | 11 | * encoding: UTF-8 |
729e4ab9 A |
12 | * tab size: 8 (not used) |
13 | * indentation:4 | |
14 | * | |
15 | * created on: 2008sep01 (starting from a copy of trietest.c) | |
16 | * created by: Markus W. Scherer | |
17 | */ | |
18 | ||
19 | #include <stdio.h> | |
20 | #include "unicode/utypes.h" | |
0f5d89e8 | 21 | #include "unicode/utf8.h" |
729e4ab9 A |
22 | #include "utrie2.h" |
23 | #include "utrie.h" | |
24 | #include "cstring.h" | |
25 | #include "cmemory.h" | |
26 | #include "udataswp.h" | |
27 | #include "cintltst.h" | |
28 | ||
729e4ab9 A |
29 | void addTrie2Test(TestNode** root); |
30 | ||
31 | /* Values for setting possibly overlapping, out-of-order ranges of values */ | |
32 | typedef struct SetRange { | |
33 | UChar32 start, limit; | |
34 | uint32_t value; | |
35 | UBool overwrite; | |
36 | } SetRange; | |
37 | ||
38 | /* | |
39 | * Values for testing: | |
40 | * value is set from the previous boundary's limit to before | |
41 | * this boundary's limit | |
42 | * | |
43 | * There must be an entry with limit 0 and the intialValue. | |
44 | * It may be preceded by an entry with negative limit and the errorValue. | |
45 | */ | |
46 | typedef struct CheckRange { | |
47 | UChar32 limit; | |
48 | uint32_t value; | |
49 | } CheckRange; | |
50 | ||
51 | static int32_t | |
52 | skipSpecialValues(const CheckRange checkRanges[], int32_t countCheckRanges) { | |
53 | int32_t i; | |
54 | for(i=0; i<countCheckRanges && checkRanges[i].limit<=0; ++i) {} | |
55 | return i; | |
56 | } | |
57 | ||
58 | static int32_t | |
59 | getSpecialValues(const CheckRange checkRanges[], int32_t countCheckRanges, | |
60 | uint32_t *pInitialValue, uint32_t *pErrorValue) { | |
61 | int32_t i=0; | |
62 | if(i<countCheckRanges && checkRanges[i].limit<0) { | |
63 | *pErrorValue=checkRanges[i++].value; | |
64 | } else { | |
65 | *pErrorValue=0xbad; | |
66 | } | |
67 | if(i<countCheckRanges && checkRanges[i].limit==0) { | |
68 | *pInitialValue=checkRanges[i++].value; | |
69 | } else { | |
70 | *pInitialValue=0; | |
71 | } | |
72 | return i; | |
73 | } | |
74 | ||
75 | /* utrie2_enum() callback, modifies a value */ | |
76 | static uint32_t U_CALLCONV | |
77 | testEnumValue(const void *context, uint32_t value) { | |
340931cb | 78 | (void)context; // suppress compiler warnings about unused variable |
729e4ab9 A |
79 | return value^0x5555; |
80 | } | |
81 | ||
82 | /* utrie2_enum() callback, verifies a range */ | |
83 | static UBool U_CALLCONV | |
84 | testEnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { | |
85 | const CheckRange **pb=(const CheckRange **)context; | |
86 | const CheckRange *b=(*pb)++; | |
87 | UChar32 limit=end+1; | |
88 | ||
89 | value^=0x5555; | |
90 | if(start!=(b-1)->limit || limit!=b->limit || value!=b->value) { | |
91 | log_err("error: utrie2_enum() delivers wrong range [U+%04lx..U+%04lx].0x%lx instead of [U+%04lx..U+%04lx].0x%lx\n", | |
92 | (long)start, (long)end, (long)value, | |
93 | (long)(b-1)->limit, (long)b->limit-1, (long)b->value); | |
94 | } | |
95 | return TRUE; | |
96 | } | |
97 | ||
98 | static void | |
99 | testTrieEnum(const char *testName, | |
100 | const UTrie2 *trie, | |
101 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
340931cb | 102 | (void)testName; // suppress compiler warnings about unused variable |
729e4ab9 A |
103 | /* skip over special values */ |
104 | while(countCheckRanges>0 && checkRanges[0].limit<=0) { | |
105 | ++checkRanges; | |
106 | --countCheckRanges; | |
107 | } | |
108 | utrie2_enum(trie, testEnumValue, testEnumRange, &checkRanges); | |
109 | } | |
110 | ||
111 | /* verify all expected values via UTRIE2_GETxx() */ | |
112 | static void | |
113 | testTrieGetters(const char *testName, | |
114 | const UTrie2 *trie, UTrie2ValueBits valueBits, | |
115 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
116 | uint32_t initialValue, errorValue; | |
117 | uint32_t value, value2; | |
118 | UChar32 start, limit; | |
119 | int32_t i, countSpecials; | |
120 | ||
121 | UBool isFrozen=utrie2_isFrozen(trie); | |
122 | const char *const typeName= isFrozen ? "frozen trie" : "newTrie"; | |
123 | ||
124 | countSpecials=getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue); | |
125 | ||
126 | start=0; | |
127 | for(i=countSpecials; i<countCheckRanges; ++i) { | |
128 | limit=checkRanges[i].limit; | |
129 | value=checkRanges[i].value; | |
130 | ||
131 | while(start<limit) { | |
132 | if(isFrozen) { | |
133 | if(start<=0xffff) { | |
134 | if(!U_IS_LEAD(start)) { | |
135 | if(valueBits==UTRIE2_16_VALUE_BITS) { | |
136 | value2=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, start); | |
137 | } else { | |
138 | value2=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, start); | |
139 | } | |
140 | if(value!=value2) { | |
141 | log_err("error: %s(%s).fromBMP(U+%04lx)==0x%lx instead of 0x%lx\n", | |
142 | typeName, testName, (long)start, (long)value2, (long)value); | |
143 | } | |
144 | } | |
145 | } else { | |
146 | if(valueBits==UTRIE2_16_VALUE_BITS) { | |
147 | value2=UTRIE2_GET16_FROM_SUPP(trie, start); | |
148 | } else { | |
149 | value2=UTRIE2_GET32_FROM_SUPP(trie, start); | |
150 | } | |
151 | if(value!=value2) { | |
152 | log_err("error: %s(%s).fromSupp(U+%04lx)==0x%lx instead of 0x%lx\n", | |
153 | typeName, testName, (long)start, (long)value2, (long)value); | |
154 | } | |
155 | } | |
156 | if(valueBits==UTRIE2_16_VALUE_BITS) { | |
157 | value2=UTRIE2_GET16(trie, start); | |
158 | } else { | |
159 | value2=UTRIE2_GET32(trie, start); | |
160 | } | |
161 | if(value!=value2) { | |
162 | log_err("error: %s(%s).get(U+%04lx)==0x%lx instead of 0x%lx\n", | |
163 | typeName, testName, (long)start, (long)value2, (long)value); | |
164 | } | |
165 | } | |
166 | value2=utrie2_get32(trie, start); | |
167 | if(value!=value2) { | |
168 | log_err("error: %s(%s).get32(U+%04lx)==0x%lx instead of 0x%lx\n", | |
169 | typeName, testName, (long)start, (long)value2, (long)value); | |
170 | } | |
171 | ++start; | |
172 | } | |
173 | } | |
174 | ||
175 | if(isFrozen) { | |
176 | /* test linear ASCII range from the data array pointer (access to "internal" field) */ | |
177 | start=0; | |
178 | for(i=countSpecials; i<countCheckRanges && start<=0x7f; ++i) { | |
179 | limit=checkRanges[i].limit; | |
180 | value=checkRanges[i].value; | |
181 | ||
182 | while(start<limit && start<=0x7f) { | |
183 | if(valueBits==UTRIE2_16_VALUE_BITS) { | |
184 | value2=trie->data16[start]; | |
185 | } else { | |
186 | value2=trie->data32[start]; | |
187 | } | |
188 | if(value!=value2) { | |
189 | log_err("error: %s(%s).asciiData[U+%04lx]==0x%lx instead of 0x%lx\n", | |
190 | typeName, testName, (long)start, (long)value2, (long)value); | |
191 | } | |
192 | ++start; | |
193 | } | |
194 | } | |
195 | while(start<=0xbf) { | |
196 | if(valueBits==UTRIE2_16_VALUE_BITS) { | |
197 | value2=trie->data16[start]; | |
198 | } else { | |
199 | value2=trie->data32[start]; | |
200 | } | |
201 | if(errorValue!=value2) { | |
202 | log_err("error: %s(%s).badData[U+%04lx]==0x%lx instead of 0x%lx\n", | |
203 | typeName, testName, (long)start, (long)value2, (long)errorValue); | |
204 | } | |
205 | ++start; | |
206 | } | |
207 | } | |
208 | ||
209 | if(0!=strncmp(testName, "dummy", 5) && 0!=strncmp(testName, "trie1", 5)) { | |
210 | /* test values for lead surrogate code units */ | |
211 | for(start=0xd7ff; start<0xdc01; ++start) { | |
212 | switch(start) { | |
213 | case 0xd7ff: | |
214 | case 0xdc00: | |
215 | value=errorValue; | |
216 | break; | |
217 | case 0xd800: | |
218 | value=90; | |
219 | break; | |
220 | case 0xd999: | |
221 | value=94; | |
222 | break; | |
223 | case 0xdbff: | |
224 | value=99; | |
225 | break; | |
226 | default: | |
227 | value=initialValue; | |
228 | break; | |
229 | } | |
230 | if(isFrozen && U_IS_LEAD(start)) { | |
231 | if(valueBits==UTRIE2_16_VALUE_BITS) { | |
232 | value2=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, start); | |
233 | } else { | |
234 | value2=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, start); | |
235 | } | |
236 | if(value2!=value) { | |
237 | log_err("error: %s(%s).LSCU(U+%04lx)==0x%lx instead of 0x%lx\n", | |
238 | typeName, testName, (long)start, (long)value2, (long)value); | |
239 | } | |
240 | } | |
241 | value2=utrie2_get32FromLeadSurrogateCodeUnit(trie, start); | |
242 | if(value2!=value) { | |
243 | log_err("error: %s(%s).lscu(U+%04lx)==0x%lx instead of 0x%lx\n", | |
244 | typeName, testName, (long)start, (long)value2, (long)value); | |
245 | } | |
246 | } | |
247 | } | |
248 | ||
249 | /* test errorValue */ | |
250 | if(isFrozen) { | |
251 | if(valueBits==UTRIE2_16_VALUE_BITS) { | |
252 | value=UTRIE2_GET16(trie, -1); | |
253 | value2=UTRIE2_GET16(trie, 0x110000); | |
254 | } else { | |
255 | value=UTRIE2_GET32(trie, -1); | |
256 | value2=UTRIE2_GET32(trie, 0x110000); | |
257 | } | |
258 | if(value!=errorValue || value2!=errorValue) { | |
259 | log_err("error: %s(%s).get(out of range) != errorValue\n", | |
260 | typeName, testName); | |
261 | } | |
262 | } | |
263 | value=utrie2_get32(trie, -1); | |
264 | value2=utrie2_get32(trie, 0x110000); | |
265 | if(value!=errorValue || value2!=errorValue) { | |
266 | log_err("error: %s(%s).get32(out of range) != errorValue\n", | |
267 | typeName, testName); | |
268 | } | |
269 | } | |
270 | ||
271 | static void | |
272 | testTrieUTF16(const char *testName, | |
273 | const UTrie2 *trie, UTrie2ValueBits valueBits, | |
274 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
275 | UChar s[200]; | |
276 | uint32_t values[100]; | |
277 | ||
278 | const UChar *p, *limit; | |
279 | ||
280 | uint32_t value; | |
281 | UChar32 prevCP, c, c2; | |
282 | int32_t i, length, sIndex, countValues; | |
283 | ||
284 | /* write a string */ | |
285 | prevCP=0; | |
286 | length=countValues=0; | |
287 | for(i=skipSpecialValues(checkRanges, countCheckRanges); i<countCheckRanges; ++i) { | |
288 | value=checkRanges[i].value; | |
289 | /* write three code points */ | |
290 | U16_APPEND_UNSAFE(s, length, prevCP); /* start of the range */ | |
291 | values[countValues++]=value; | |
292 | c=checkRanges[i].limit; | |
293 | prevCP=(prevCP+c)/2; /* middle of the range */ | |
294 | U16_APPEND_UNSAFE(s, length, prevCP); | |
295 | values[countValues++]=value; | |
296 | prevCP=c; | |
297 | --c; /* end of the range */ | |
298 | U16_APPEND_UNSAFE(s, length, c); | |
299 | values[countValues++]=value; | |
300 | } | |
301 | limit=s+length; | |
302 | ||
303 | /* try forward */ | |
304 | p=s; | |
305 | i=0; | |
306 | while(p<limit) { | |
307 | sIndex=(int32_t)(p-s); | |
308 | U16_NEXT(s, sIndex, length, c2); | |
309 | c=0x33; | |
310 | if(valueBits==UTRIE2_16_VALUE_BITS) { | |
311 | UTRIE2_U16_NEXT16(trie, p, limit, c, value); | |
312 | } else { | |
313 | UTRIE2_U16_NEXT32(trie, p, limit, c, value); | |
314 | } | |
315 | if(value!=values[i]) { | |
316 | log_err("error: wrong value from UTRIE2_NEXT(%s)(U+%04lx): 0x%lx instead of 0x%lx\n", | |
317 | testName, (long)c, (long)value, (long)values[i]); | |
318 | } | |
319 | if(c!=c2) { | |
320 | log_err("error: wrong code point from UTRIE2_NEXT(%s): U+%04lx != U+%04lx\n", | |
321 | testName, (long)c, (long)c2); | |
322 | continue; | |
323 | } | |
324 | ++i; | |
325 | } | |
326 | ||
327 | /* try backward */ | |
328 | p=limit; | |
329 | i=countValues; | |
330 | while(s<p) { | |
331 | --i; | |
332 | sIndex=(int32_t)(p-s); | |
333 | U16_PREV(s, 0, sIndex, c2); | |
334 | c=0x33; | |
335 | if(valueBits==UTRIE2_16_VALUE_BITS) { | |
336 | UTRIE2_U16_PREV16(trie, s, p, c, value); | |
337 | } else { | |
338 | UTRIE2_U16_PREV32(trie, s, p, c, value); | |
339 | } | |
340 | if(value!=values[i]) { | |
341 | log_err("error: wrong value from UTRIE2_PREV(%s)(U+%04lx): 0x%lx instead of 0x%lx\n", | |
342 | testName, (long)c, (long)value, (long)values[i]); | |
343 | } | |
344 | if(c!=c2) { | |
345 | log_err("error: wrong code point from UTRIE2_PREV(%s): U+%04lx != U+%04lx\n", | |
346 | testName, c, c2); | |
347 | } | |
348 | } | |
349 | } | |
350 | ||
351 | static void | |
352 | testTrieUTF8(const char *testName, | |
353 | const UTrie2 *trie, UTrie2ValueBits valueBits, | |
354 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
0f5d89e8 A |
355 | // Note: The byte sequence comments refer to the original UTF-8 definition. |
356 | // Starting with ICU 60, any sequence that is not a prefix of a valid one | |
357 | // is treated as multiple single-byte errors. | |
358 | // For testing, we only rely on U8_... and UTrie2 UTF-8 macros | |
359 | // iterating consistently. | |
729e4ab9 A |
360 | static const uint8_t illegal[]={ |
361 | 0xc0, 0x80, /* non-shortest U+0000 */ | |
362 | 0xc1, 0xbf, /* non-shortest U+007f */ | |
363 | 0xc2, /* truncated */ | |
364 | 0xe0, 0x90, 0x80, /* non-shortest U+0400 */ | |
365 | 0xe0, 0xa0, /* truncated */ | |
366 | 0xed, 0xa0, 0x80, /* lead surrogate U+d800 */ | |
367 | 0xed, 0xbf, 0xbf, /* trail surrogate U+dfff */ | |
368 | 0xf0, 0x8f, 0xbf, 0xbf, /* non-shortest U+ffff */ | |
369 | 0xf0, 0x90, 0x80, /* truncated */ | |
370 | 0xf4, 0x90, 0x80, 0x80, /* beyond-Unicode U+110000 */ | |
371 | 0xf8, 0x80, 0x80, 0x80, /* truncated */ | |
372 | 0xf8, 0x80, 0x80, 0x80, 0x80, /* 5-byte UTF-8 */ | |
373 | 0xfd, 0xbf, 0xbf, 0xbf, 0xbf, /* truncated */ | |
374 | 0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, /* 6-byte UTF-8 */ | |
375 | 0xfe, | |
376 | 0xff | |
377 | }; | |
378 | uint8_t s[600]; | |
379 | uint32_t values[200]; | |
380 | ||
381 | const uint8_t *p, *limit; | |
382 | ||
383 | uint32_t initialValue, errorValue; | |
384 | uint32_t value, bytes; | |
385 | UChar32 prevCP, c; | |
386 | int32_t i, countSpecials, length, countValues; | |
387 | int32_t prev8, i8; | |
388 | ||
389 | countSpecials=getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue); | |
390 | ||
391 | /* write a string */ | |
392 | prevCP=0; | |
393 | length=countValues=0; | |
394 | /* first a couple of trail bytes in lead position */ | |
395 | s[length++]=0x80; | |
396 | values[countValues++]=errorValue; | |
397 | s[length++]=0xbf; | |
398 | values[countValues++]=errorValue; | |
399 | prev8=i8=0; | |
400 | for(i=countSpecials; i<countCheckRanges; ++i) { | |
401 | value=checkRanges[i].value; | |
402 | /* write three legal (or surrogate) code points */ | |
403 | U8_APPEND_UNSAFE(s, length, prevCP); /* start of the range */ | |
0f5d89e8 A |
404 | if(U_IS_SURROGATE(prevCP)) { |
405 | // A surrogate byte sequence counts as 3 single-byte errors. | |
406 | values[countValues++]=errorValue; | |
407 | values[countValues++]=errorValue; | |
408 | values[countValues++]=errorValue; | |
409 | } else { | |
410 | values[countValues++]=value; | |
411 | } | |
729e4ab9 A |
412 | c=checkRanges[i].limit; |
413 | prevCP=(prevCP+c)/2; /* middle of the range */ | |
414 | U8_APPEND_UNSAFE(s, length, prevCP); | |
0f5d89e8 A |
415 | if(U_IS_SURROGATE(prevCP)) { |
416 | // A surrogate byte sequence counts as 3 single-byte errors. | |
417 | values[countValues++]=errorValue; | |
418 | values[countValues++]=errorValue; | |
419 | values[countValues++]=errorValue; | |
420 | } else { | |
421 | values[countValues++]=value; | |
422 | } | |
729e4ab9 A |
423 | prevCP=c; |
424 | --c; /* end of the range */ | |
425 | U8_APPEND_UNSAFE(s, length, c); | |
3d1f044b | 426 | if(U_IS_SURROGATE(c)) { |
0f5d89e8 A |
427 | // A surrogate byte sequence counts as 3 single-byte errors. |
428 | values[countValues++]=errorValue; | |
429 | values[countValues++]=errorValue; | |
430 | values[countValues++]=errorValue; | |
431 | } else { | |
432 | values[countValues++]=value; | |
433 | } | |
729e4ab9 | 434 | /* write an illegal byte sequence */ |
340931cb | 435 | if(i8<(int32_t)sizeof(illegal)) { |
729e4ab9 A |
436 | U8_FWD_1(illegal, i8, sizeof(illegal)); |
437 | while(prev8<i8) { | |
438 | s[length++]=illegal[prev8++]; | |
439 | } | |
440 | values[countValues++]=errorValue; | |
441 | } | |
442 | } | |
443 | /* write the remaining illegal byte sequences */ | |
340931cb | 444 | while(i8<(int32_t)sizeof(illegal)) { |
729e4ab9 A |
445 | U8_FWD_1(illegal, i8, sizeof(illegal)); |
446 | while(prev8<i8) { | |
447 | s[length++]=illegal[prev8++]; | |
448 | } | |
449 | values[countValues++]=errorValue; | |
450 | } | |
451 | limit=s+length; | |
452 | ||
453 | /* try forward */ | |
454 | p=s; | |
455 | i=0; | |
456 | while(p<limit) { | |
457 | prev8=i8=(int32_t)(p-s); | |
458 | U8_NEXT(s, i8, length, c); | |
459 | if(valueBits==UTRIE2_16_VALUE_BITS) { | |
460 | UTRIE2_U8_NEXT16(trie, p, limit, value); | |
461 | } else { | |
462 | UTRIE2_U8_NEXT32(trie, p, limit, value); | |
463 | } | |
464 | bytes=0; | |
465 | if(value!=values[i] || i8!=(p-s)) { | |
0f5d89e8 A |
466 | int32_t k=prev8; |
467 | while(k<i8) { | |
468 | bytes=(bytes<<8)|s[k++]; | |
729e4ab9 A |
469 | } |
470 | } | |
471 | if(value!=values[i]) { | |
0f5d89e8 A |
472 | log_err("error: wrong value from UTRIE2_U8_NEXT(%s)(from %d %lx->U+%04lx) (read %d bytes): " |
473 | "0x%lx instead of 0x%lx\n", | |
474 | testName, (int)prev8, (unsigned long)bytes, (long)c, (int)((p-s)-prev8), | |
475 | (long)value, (long)values[i]); | |
729e4ab9 A |
476 | } |
477 | if(i8!=(p-s)) { | |
0f5d89e8 A |
478 | log_err("error: wrong end index from UTRIE2_U8_NEXT(%s)(from %d %lx->U+%04lx): %ld != %ld\n", |
479 | testName, (int)prev8, (unsigned long)bytes, (long)c, (long)(p-s), (long)i8); | |
729e4ab9 A |
480 | continue; |
481 | } | |
482 | ++i; | |
483 | } | |
484 | ||
485 | /* try backward */ | |
486 | p=limit; | |
487 | i=countValues; | |
488 | while(s<p) { | |
489 | --i; | |
490 | prev8=i8=(int32_t)(p-s); | |
491 | U8_PREV(s, 0, i8, c); | |
492 | if(valueBits==UTRIE2_16_VALUE_BITS) { | |
493 | UTRIE2_U8_PREV16(trie, s, p, value); | |
494 | } else { | |
495 | UTRIE2_U8_PREV32(trie, s, p, value); | |
496 | } | |
497 | bytes=0; | |
498 | if(value!=values[i] || i8!=(p-s)) { | |
499 | int32_t k=i8; | |
500 | while(k<prev8) { | |
501 | bytes=(bytes<<8)|s[k++]; | |
502 | } | |
503 | } | |
504 | if(value!=values[i]) { | |
0f5d89e8 A |
505 | log_err("error: wrong value from UTRIE2_U8_PREV(%s)(from %d %lx->U+%04lx) (read %d bytes): " |
506 | ": 0x%lx instead of 0x%lx\n", | |
507 | testName, (int)prev8, (unsigned long)bytes, (long)c, (int)(prev8-(p-s)), | |
508 | (long)value, (long)values[i]); | |
729e4ab9 A |
509 | } |
510 | if(i8!=(p-s)) { | |
0f5d89e8 A |
511 | log_err("error: wrong end index from UTRIE2_U8_PREV(%s)(from %d %lx->U+%04lx): %ld != %ld\n", |
512 | testName, (int)prev8, (unsigned long)bytes, (long)c, (long)(p-s), (long)i8); | |
729e4ab9 A |
513 | continue; |
514 | } | |
515 | } | |
516 | } | |
517 | ||
518 | static void | |
519 | testFrozenTrie(const char *testName, | |
520 | UTrie2 *trie, UTrie2ValueBits valueBits, | |
521 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
522 | UErrorCode errorCode; | |
523 | uint32_t value, value2; | |
524 | ||
525 | if(!utrie2_isFrozen(trie)) { | |
526 | log_err("error: utrie2_isFrozen(frozen %s) returned FALSE (not frozen)\n", | |
527 | testName); | |
528 | return; | |
529 | } | |
530 | ||
531 | testTrieGetters(testName, trie, valueBits, checkRanges, countCheckRanges); | |
532 | testTrieEnum(testName, trie, checkRanges, countCheckRanges); | |
533 | testTrieUTF16(testName, trie, valueBits, checkRanges, countCheckRanges); | |
534 | testTrieUTF8(testName, trie, valueBits, checkRanges, countCheckRanges); | |
535 | ||
536 | errorCode=U_ZERO_ERROR; | |
537 | value=utrie2_get32(trie, 1); | |
538 | utrie2_set32(trie, 1, 234, &errorCode); | |
539 | value2=utrie2_get32(trie, 1); | |
540 | if(errorCode!=U_NO_WRITE_PERMISSION || value2!=value) { | |
541 | log_err("error: utrie2_set32(frozen %s) failed: it set %s != U_NO_WRITE_PERMISSION\n", | |
542 | testName, u_errorName(errorCode)); | |
543 | return; | |
544 | } | |
545 | ||
546 | errorCode=U_ZERO_ERROR; | |
547 | utrie2_setRange32(trie, 1, 5, 234, TRUE, &errorCode); | |
548 | value2=utrie2_get32(trie, 1); | |
549 | if(errorCode!=U_NO_WRITE_PERMISSION || value2!=value) { | |
550 | log_err("error: utrie2_setRange32(frozen %s) failed: it set %s != U_NO_WRITE_PERMISSION\n", | |
551 | testName, u_errorName(errorCode)); | |
552 | return; | |
553 | } | |
554 | ||
555 | errorCode=U_ZERO_ERROR; | |
556 | value=utrie2_get32FromLeadSurrogateCodeUnit(trie, 0xd801); | |
557 | utrie2_set32ForLeadSurrogateCodeUnit(trie, 0xd801, 234, &errorCode); | |
558 | value2=utrie2_get32FromLeadSurrogateCodeUnit(trie, 0xd801); | |
559 | if(errorCode!=U_NO_WRITE_PERMISSION || value2!=value) { | |
560 | log_err("error: utrie2_set32ForLeadSurrogateCodeUnit(frozen %s) failed: " | |
561 | "it set %s != U_NO_WRITE_PERMISSION\n", | |
562 | testName, u_errorName(errorCode)); | |
563 | return; | |
564 | } | |
565 | } | |
566 | ||
567 | static void | |
568 | testNewTrie(const char *testName, const UTrie2 *trie, | |
569 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
570 | /* The valueBits are ignored for an unfrozen trie. */ | |
571 | testTrieGetters(testName, trie, UTRIE2_COUNT_VALUE_BITS, checkRanges, countCheckRanges); | |
572 | testTrieEnum(testName, trie, checkRanges, countCheckRanges); | |
573 | } | |
574 | ||
575 | static void | |
576 | testTrieSerialize(const char *testName, | |
577 | UTrie2 *trie, UTrie2ValueBits valueBits, | |
578 | UBool withSwap, | |
579 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
580 | uint32_t storage[10000]; | |
581 | int32_t length1, length2, length3; | |
582 | UTrie2ValueBits otherValueBits; | |
583 | UErrorCode errorCode; | |
584 | ||
585 | /* clone the trie so that the caller can reuse the original */ | |
586 | errorCode=U_ZERO_ERROR; | |
587 | trie=utrie2_clone(trie, &errorCode); | |
588 | if(U_FAILURE(errorCode)) { | |
589 | log_err("error: utrie2_clone(unfrozen %s) failed - %s\n", | |
590 | testName, u_errorName(errorCode)); | |
591 | return; | |
592 | } | |
593 | ||
594 | /* | |
595 | * This is not a loop, but simply a block that we can exit with "break" | |
596 | * when something goes wrong. | |
597 | */ | |
598 | do { | |
599 | errorCode=U_ZERO_ERROR; | |
600 | utrie2_serialize(trie, storage, sizeof(storage), &errorCode); | |
601 | if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
602 | log_err("error: utrie2_serialize(unfrozen %s) set %s != U_ILLEGAL_ARGUMENT_ERROR\n", | |
603 | testName, u_errorName(errorCode)); | |
604 | break; | |
605 | } | |
606 | errorCode=U_ZERO_ERROR; | |
607 | utrie2_freeze(trie, valueBits, &errorCode); | |
608 | if(U_FAILURE(errorCode) || !utrie2_isFrozen(trie)) { | |
609 | log_err("error: utrie2_freeze(%s) failed: %s isFrozen: %d\n", | |
610 | testName, u_errorName(errorCode), utrie2_isFrozen(trie)); | |
611 | break; | |
612 | } | |
613 | otherValueBits= valueBits==UTRIE2_16_VALUE_BITS ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS; | |
614 | utrie2_freeze(trie, otherValueBits, &errorCode); | |
615 | if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
616 | log_err("error: utrie2_freeze(already-frozen with other valueBits %s) " | |
617 | "set %s != U_ILLEGAL_ARGUMENT_ERROR\n", | |
618 | testName, u_errorName(errorCode)); | |
619 | break; | |
620 | } | |
621 | errorCode=U_ZERO_ERROR; | |
622 | if(withSwap) { | |
623 | /* clone a frozen trie */ | |
624 | UTrie2 *clone=utrie2_clone(trie, &errorCode); | |
625 | if(U_FAILURE(errorCode)) { | |
626 | log_err("error: cloning a frozen UTrie2 failed (%s) - %s\n", | |
627 | testName, u_errorName(errorCode)); | |
628 | errorCode=U_ZERO_ERROR; /* continue with the original */ | |
629 | } else { | |
630 | utrie2_close(trie); | |
631 | trie=clone; | |
632 | } | |
633 | } | |
634 | length1=utrie2_serialize(trie, NULL, 0, &errorCode); | |
635 | if(errorCode!=U_BUFFER_OVERFLOW_ERROR) { | |
636 | log_err("error: utrie2_serialize(%s) preflighting set %s != U_BUFFER_OVERFLOW_ERROR\n", | |
637 | testName, u_errorName(errorCode)); | |
638 | break; | |
639 | } | |
640 | errorCode=U_ZERO_ERROR; | |
641 | length2=utrie2_serialize(trie, storage, sizeof(storage), &errorCode); | |
642 | if(errorCode==U_BUFFER_OVERFLOW_ERROR) { | |
643 | log_err("error: utrie2_serialize(%s) needs more memory\n", testName); | |
644 | break; | |
645 | } | |
646 | if(U_FAILURE(errorCode)) { | |
647 | log_err("error: utrie2_serialize(%s) failed: %s\n", testName, u_errorName(errorCode)); | |
648 | break; | |
649 | } | |
650 | if(length1!=length2) { | |
651 | log_err("error: trie serialization (%s) lengths different: " | |
652 | "preflight vs. serialize\n", testName); | |
653 | break; | |
654 | } | |
655 | ||
656 | testFrozenTrie(testName, trie, valueBits, checkRanges, countCheckRanges); | |
657 | utrie2_close(trie); | |
658 | trie=NULL; | |
659 | ||
660 | if(withSwap) { | |
661 | uint32_t swapped[10000]; | |
662 | int32_t swappedLength; | |
663 | ||
664 | UDataSwapper *ds; | |
665 | ||
666 | /* swap to opposite-endian */ | |
667 | uprv_memset(swapped, 0x55, length2); | |
668 | ds=udata_openSwapper(U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, | |
669 | !U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); | |
670 | swappedLength=utrie2_swap(ds, storage, -1, NULL, &errorCode); | |
671 | if(U_FAILURE(errorCode) || swappedLength!=length2) { | |
672 | log_err("error: utrie2_swap(%s to OE preflighting) failed (%s) " | |
673 | "or before/after lengths different\n", | |
674 | testName, u_errorName(errorCode)); | |
675 | udata_closeSwapper(ds); | |
676 | break; | |
677 | } | |
678 | swappedLength=utrie2_swap(ds, storage, length2, swapped, &errorCode); | |
679 | udata_closeSwapper(ds); | |
680 | if(U_FAILURE(errorCode) || swappedLength!=length2) { | |
681 | log_err("error: utrie2_swap(%s to OE) failed (%s) or before/after lengths different\n", | |
682 | testName, u_errorName(errorCode)); | |
683 | break; | |
684 | } | |
685 | ||
686 | /* swap back to platform-endian */ | |
687 | uprv_memset(storage, 0xaa, length2); | |
688 | ds=udata_openSwapper(!U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, | |
689 | U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); | |
690 | swappedLength=utrie2_swap(ds, swapped, -1, NULL, &errorCode); | |
691 | if(U_FAILURE(errorCode) || swappedLength!=length2) { | |
692 | log_err("error: utrie2_swap(%s to PE preflighting) failed (%s) " | |
693 | "or before/after lengths different\n", | |
694 | testName, u_errorName(errorCode)); | |
695 | udata_closeSwapper(ds); | |
696 | break; | |
697 | } | |
698 | swappedLength=utrie2_swap(ds, swapped, length2, storage, &errorCode); | |
699 | udata_closeSwapper(ds); | |
700 | if(U_FAILURE(errorCode) || swappedLength!=length2) { | |
701 | log_err("error: utrie2_swap(%s to PE) failed (%s) or before/after lengths different\n", | |
702 | testName, u_errorName(errorCode)); | |
703 | break; | |
704 | } | |
705 | } | |
706 | ||
707 | trie=utrie2_openFromSerialized(valueBits, storage, length2, &length3, &errorCode); | |
708 | if(U_FAILURE(errorCode)) { | |
709 | log_err("error: utrie2_openFromSerialized(%s) failed, %s\n", testName, u_errorName(errorCode)); | |
710 | break; | |
711 | } | |
712 | if((valueBits==UTRIE2_16_VALUE_BITS)!=(trie->data32==NULL)) { | |
713 | log_err("error: trie serialization (%s) did not preserve 32-bitness\n", testName); | |
714 | break; | |
715 | } | |
716 | if(length2!=length3) { | |
717 | log_err("error: trie serialization (%s) lengths different: " | |
718 | "serialize vs. unserialize\n", testName); | |
719 | break; | |
720 | } | |
721 | /* overwrite the storage that is not supposed to be needed */ | |
722 | uprv_memset((char *)storage+length3, 0xfa, (int32_t)(sizeof(storage)-length3)); | |
723 | ||
724 | utrie2_freeze(trie, valueBits, &errorCode); | |
725 | if(U_FAILURE(errorCode) || !utrie2_isFrozen(trie)) { | |
726 | log_err("error: utrie2_freeze(unserialized %s) failed: %s isFrozen: %d\n", | |
727 | testName, u_errorName(errorCode), utrie2_isFrozen(trie)); | |
728 | break; | |
729 | } | |
730 | utrie2_freeze(trie, otherValueBits, &errorCode); | |
731 | if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
732 | log_err("error: utrie2_freeze(unserialized with other valueBits %s) " | |
733 | "set %s != U_ILLEGAL_ARGUMENT_ERROR\n", | |
734 | testName, u_errorName(errorCode)); | |
735 | break; | |
736 | } | |
737 | errorCode=U_ZERO_ERROR; | |
738 | if(withSwap) { | |
739 | /* clone an unserialized trie */ | |
740 | UTrie2 *clone=utrie2_clone(trie, &errorCode); | |
741 | if(U_FAILURE(errorCode)) { | |
742 | log_err("error: utrie2_clone(unserialized %s) failed - %s\n", | |
743 | testName, u_errorName(errorCode)); | |
744 | errorCode=U_ZERO_ERROR; | |
745 | /* no need to break: just test the original trie */ | |
746 | } else { | |
747 | utrie2_close(trie); | |
748 | trie=clone; | |
749 | uprv_memset(storage, 0, sizeof(storage)); | |
750 | } | |
751 | } | |
752 | testFrozenTrie(testName, trie, valueBits, checkRanges, countCheckRanges); | |
753 | { | |
754 | /* clone-as-thawed an unserialized trie */ | |
755 | UTrie2 *clone=utrie2_cloneAsThawed(trie, &errorCode); | |
756 | if(U_FAILURE(errorCode) || utrie2_isFrozen(clone)) { | |
757 | log_err("error: utrie2_cloneAsThawed(unserialized %s) failed - " | |
758 | "%s (isFrozen: %d)\n", | |
759 | testName, u_errorName(errorCode), clone!=NULL && utrie2_isFrozen(trie)); | |
760 | break; | |
761 | } else { | |
762 | utrie2_close(trie); | |
763 | trie=clone; | |
764 | } | |
765 | } | |
766 | { | |
767 | uint32_t value, value2; | |
768 | ||
769 | value=utrie2_get32(trie, 0xa1); | |
770 | utrie2_set32(trie, 0xa1, 789, &errorCode); | |
771 | value2=utrie2_get32(trie, 0xa1); | |
772 | utrie2_set32(trie, 0xa1, value, &errorCode); | |
773 | if(U_FAILURE(errorCode) || value2!=789) { | |
774 | log_err("error: modifying a cloneAsThawed UTrie2 (%s) failed - %s\n", | |
775 | testName, u_errorName(errorCode)); | |
776 | } | |
777 | } | |
778 | testNewTrie(testName, trie, checkRanges, countCheckRanges); | |
779 | } while(0); | |
780 | ||
781 | utrie2_close(trie); | |
782 | } | |
783 | ||
784 | static UTrie2 * | |
785 | testTrieSerializeAllValueBits(const char *testName, | |
786 | UTrie2 *trie, UBool withClone, | |
787 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
788 | char name[40]; | |
789 | ||
790 | /* verify that all the expected values are in the unfrozen trie */ | |
791 | testNewTrie(testName, trie, checkRanges, countCheckRanges); | |
792 | ||
793 | /* | |
794 | * Test with both valueBits serializations, | |
795 | * and that utrie2_serialize() can be called multiple times. | |
796 | */ | |
797 | uprv_strcpy(name, testName); | |
798 | uprv_strcat(name, ".16"); | |
799 | testTrieSerialize(name, trie, | |
800 | UTRIE2_16_VALUE_BITS, withClone, | |
801 | checkRanges, countCheckRanges); | |
802 | ||
803 | if(withClone) { | |
804 | /* | |
805 | * try cloning after the first serialization; | |
806 | * clone-as-thawed just to sometimes try it on an unfrozen trie | |
807 | */ | |
808 | UErrorCode errorCode=U_ZERO_ERROR; | |
809 | UTrie2 *clone=utrie2_cloneAsThawed(trie, &errorCode); | |
810 | if(U_FAILURE(errorCode)) { | |
811 | log_err("error: utrie2_cloneAsThawed(%s) after serialization failed - %s\n", | |
812 | testName, u_errorName(errorCode)); | |
813 | } else { | |
814 | utrie2_close(trie); | |
815 | trie=clone; | |
816 | ||
817 | testNewTrie(testName, trie, checkRanges, countCheckRanges); | |
818 | } | |
819 | } | |
820 | ||
821 | uprv_strcpy(name, testName); | |
822 | uprv_strcat(name, ".32"); | |
823 | testTrieSerialize(name, trie, | |
824 | UTRIE2_32_VALUE_BITS, withClone, | |
825 | checkRanges, countCheckRanges); | |
826 | ||
827 | return trie; /* could be the clone */ | |
828 | } | |
829 | ||
830 | static UTrie2 * | |
831 | makeTrieWithRanges(const char *testName, UBool withClone, | |
832 | const SetRange setRanges[], int32_t countSetRanges, | |
833 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
834 | UTrie2 *trie; | |
835 | uint32_t initialValue, errorValue; | |
836 | uint32_t value; | |
837 | UChar32 start, limit; | |
838 | int32_t i; | |
839 | UErrorCode errorCode; | |
840 | UBool overwrite; | |
841 | ||
842 | log_verbose("\ntesting Trie '%s'\n", testName); | |
843 | errorCode=U_ZERO_ERROR; | |
844 | getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue); | |
845 | trie=utrie2_open(initialValue, errorValue, &errorCode); | |
846 | if(U_FAILURE(errorCode)) { | |
847 | log_err("error: utrie2_open(%s) failed: %s\n", testName, u_errorName(errorCode)); | |
848 | return NULL; | |
849 | } | |
850 | ||
851 | /* set values from setRanges[] */ | |
852 | for(i=0; i<countSetRanges; ++i) { | |
853 | if(withClone && i==countSetRanges/2) { | |
854 | /* switch to a clone in the middle of setting values */ | |
855 | UTrie2 *clone=utrie2_clone(trie, &errorCode); | |
856 | if(U_FAILURE(errorCode)) { | |
857 | log_err("error: utrie2_clone(%s) failed - %s\n", | |
858 | testName, u_errorName(errorCode)); | |
859 | errorCode=U_ZERO_ERROR; /* continue with the original */ | |
860 | } else { | |
861 | utrie2_close(trie); | |
862 | trie=clone; | |
863 | } | |
864 | } | |
865 | start=setRanges[i].start; | |
866 | limit=setRanges[i].limit; | |
867 | value=setRanges[i].value; | |
868 | overwrite=setRanges[i].overwrite; | |
869 | if((limit-start)==1 && overwrite) { | |
870 | utrie2_set32(trie, start, value, &errorCode); | |
871 | } else { | |
872 | utrie2_setRange32(trie, start, limit-1, value, overwrite, &errorCode); | |
873 | } | |
874 | } | |
875 | ||
876 | /* set some values for lead surrogate code units */ | |
877 | utrie2_set32ForLeadSurrogateCodeUnit(trie, 0xd800, 90, &errorCode); | |
878 | utrie2_set32ForLeadSurrogateCodeUnit(trie, 0xd999, 94, &errorCode); | |
879 | utrie2_set32ForLeadSurrogateCodeUnit(trie, 0xdbff, 99, &errorCode); | |
880 | if(U_SUCCESS(errorCode)) { | |
881 | return trie; | |
882 | } else { | |
883 | log_err("error: setting values into a trie (%s) failed - %s\n", | |
884 | testName, u_errorName(errorCode)); | |
885 | utrie2_close(trie); | |
886 | return NULL; | |
887 | } | |
888 | } | |
889 | ||
890 | static void | |
891 | testTrieRanges(const char *testName, UBool withClone, | |
892 | const SetRange setRanges[], int32_t countSetRanges, | |
893 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
894 | UTrie2 *trie=makeTrieWithRanges(testName, withClone, | |
895 | setRanges, countSetRanges, | |
896 | checkRanges, countCheckRanges); | |
897 | if(trie!=NULL) { | |
898 | trie=testTrieSerializeAllValueBits(testName, trie, withClone, | |
899 | checkRanges, countCheckRanges); | |
900 | utrie2_close(trie); | |
901 | } | |
902 | } | |
903 | ||
904 | /* test data ----------------------------------------------------------------*/ | |
905 | ||
906 | /* set consecutive ranges, even with value 0 */ | |
907 | static const SetRange | |
908 | setRanges1[]={ | |
909 | { 0, 0x40, 0, FALSE }, | |
910 | { 0x40, 0xe7, 0x1234, FALSE }, | |
911 | { 0xe7, 0x3400, 0, FALSE }, | |
912 | { 0x3400, 0x9fa6, 0x6162, FALSE }, | |
913 | { 0x9fa6, 0xda9e, 0x3132, FALSE }, | |
914 | { 0xdada, 0xeeee, 0x87ff, FALSE }, | |
915 | { 0xeeee, 0x11111, 1, FALSE }, | |
916 | { 0x11111, 0x44444, 0x6162, FALSE }, | |
917 | { 0x44444, 0x60003, 0, FALSE }, | |
918 | { 0xf0003, 0xf0004, 0xf, FALSE }, | |
919 | { 0xf0004, 0xf0006, 0x10, FALSE }, | |
920 | { 0xf0006, 0xf0007, 0x11, FALSE }, | |
921 | { 0xf0007, 0xf0040, 0x12, FALSE }, | |
922 | { 0xf0040, 0x110000, 0, FALSE } | |
923 | }; | |
924 | ||
925 | static const CheckRange | |
926 | checkRanges1[]={ | |
927 | { 0, 0 }, | |
928 | { 0x40, 0 }, | |
929 | { 0xe7, 0x1234 }, | |
930 | { 0x3400, 0 }, | |
931 | { 0x9fa6, 0x6162 }, | |
932 | { 0xda9e, 0x3132 }, | |
933 | { 0xdada, 0 }, | |
934 | { 0xeeee, 0x87ff }, | |
935 | { 0x11111, 1 }, | |
936 | { 0x44444, 0x6162 }, | |
937 | { 0xf0003, 0 }, | |
938 | { 0xf0004, 0xf }, | |
939 | { 0xf0006, 0x10 }, | |
940 | { 0xf0007, 0x11 }, | |
941 | { 0xf0040, 0x12 }, | |
942 | { 0x110000, 0 } | |
943 | }; | |
944 | ||
945 | /* set some interesting overlapping ranges */ | |
946 | static const SetRange | |
947 | setRanges2[]={ | |
948 | { 0x21, 0x7f, 0x5555, TRUE }, | |
949 | { 0x2f800, 0x2fedc, 0x7a, TRUE }, | |
950 | { 0x72, 0xdd, 3, TRUE }, | |
951 | { 0xdd, 0xde, 4, FALSE }, | |
952 | { 0x201, 0x240, 6, TRUE }, /* 3 consecutive blocks with the same pattern but */ | |
953 | { 0x241, 0x280, 6, TRUE }, /* discontiguous value ranges, testing utrie2_enum() */ | |
954 | { 0x281, 0x2c0, 6, TRUE }, | |
955 | { 0x2f987, 0x2fa98, 5, TRUE }, | |
956 | { 0x2f777, 0x2f883, 0, TRUE }, | |
957 | { 0x2f900, 0x2ffaa, 1, FALSE }, | |
958 | { 0x2ffaa, 0x2ffab, 2, TRUE }, | |
959 | { 0x2ffbb, 0x2ffc0, 7, TRUE } | |
960 | }; | |
961 | ||
962 | static const CheckRange | |
963 | checkRanges2[]={ | |
964 | { 0, 0 }, | |
965 | { 0x21, 0 }, | |
966 | { 0x72, 0x5555 }, | |
967 | { 0xdd, 3 }, | |
968 | { 0xde, 4 }, | |
969 | { 0x201, 0 }, | |
970 | { 0x240, 6 }, | |
971 | { 0x241, 0 }, | |
972 | { 0x280, 6 }, | |
973 | { 0x281, 0 }, | |
974 | { 0x2c0, 6 }, | |
975 | { 0x2f883, 0 }, | |
976 | { 0x2f987, 0x7a }, | |
977 | { 0x2fa98, 5 }, | |
978 | { 0x2fedc, 0x7a }, | |
979 | { 0x2ffaa, 1 }, | |
980 | { 0x2ffab, 2 }, | |
981 | { 0x2ffbb, 0 }, | |
982 | { 0x2ffc0, 7 }, | |
983 | { 0x110000, 0 } | |
984 | }; | |
985 | ||
986 | static const CheckRange | |
987 | checkRanges2_d800[]={ | |
988 | { 0x10000, 0 }, | |
989 | { 0x10400, 0 } | |
990 | }; | |
991 | ||
992 | static const CheckRange | |
993 | checkRanges2_d87e[]={ | |
994 | { 0x2f800, 6 }, | |
995 | { 0x2f883, 0 }, | |
996 | { 0x2f987, 0x7a }, | |
997 | { 0x2fa98, 5 }, | |
998 | { 0x2fc00, 0x7a } | |
999 | }; | |
1000 | ||
1001 | static const CheckRange | |
1002 | checkRanges2_d87f[]={ | |
1003 | { 0x2fc00, 0 }, | |
1004 | { 0x2fedc, 0x7a }, | |
1005 | { 0x2ffaa, 1 }, | |
1006 | { 0x2ffab, 2 }, | |
1007 | { 0x2ffbb, 0 }, | |
1008 | { 0x2ffc0, 7 }, | |
1009 | { 0x30000, 0 } | |
1010 | }; | |
1011 | ||
1012 | static const CheckRange | |
1013 | checkRanges2_dbff[]={ | |
1014 | { 0x10fc00, 0 }, | |
1015 | { 0x110000, 0 } | |
1016 | }; | |
1017 | ||
1018 | /* use a non-zero initial value */ | |
1019 | static const SetRange | |
1020 | setRanges3[]={ | |
1021 | { 0x31, 0xa4, 1, FALSE }, | |
1022 | { 0x3400, 0x6789, 2, FALSE }, | |
1023 | { 0x8000, 0x89ab, 9, TRUE }, | |
1024 | { 0x9000, 0xa000, 4, TRUE }, | |
1025 | { 0xabcd, 0xbcde, 3, TRUE }, | |
1026 | { 0x55555, 0x110000, 6, TRUE }, /* highStart<U+ffff with non-initialValue */ | |
1027 | { 0xcccc, 0x55555, 6, TRUE } | |
1028 | }; | |
1029 | ||
1030 | static const CheckRange | |
1031 | checkRanges3[]={ | |
1032 | { 0, 9 }, /* non-zero initialValue */ | |
1033 | { 0x31, 9 }, | |
1034 | { 0xa4, 1 }, | |
1035 | { 0x3400, 9 }, | |
1036 | { 0x6789, 2 }, | |
1037 | { 0x9000, 9 }, | |
1038 | { 0xa000, 4 }, | |
1039 | { 0xabcd, 9 }, | |
1040 | { 0xbcde, 3 }, | |
1041 | { 0xcccc, 9 }, | |
1042 | { 0x110000, 6 } | |
1043 | }; | |
1044 | ||
1045 | /* empty or single-value tries, testing highStart==0 */ | |
1046 | static const SetRange | |
1047 | setRangesEmpty[]={ | |
1048 | { 0, 0, 0, FALSE }, /* need some values for it to compile */ | |
1049 | }; | |
1050 | ||
1051 | static const CheckRange | |
1052 | checkRangesEmpty[]={ | |
1053 | { 0, 3 }, | |
1054 | { 0x110000, 3 } | |
1055 | }; | |
1056 | ||
1057 | static const SetRange | |
1058 | setRangesSingleValue[]={ | |
1059 | { 0, 0x110000, 5, TRUE }, | |
1060 | }; | |
1061 | ||
1062 | static const CheckRange | |
1063 | checkRangesSingleValue[]={ | |
1064 | { 0, 3 }, | |
1065 | { 0x110000, 5 } | |
1066 | }; | |
1067 | ||
1068 | static void | |
1069 | TrieTest(void) { | |
1070 | testTrieRanges("set1", FALSE, | |
b331163b A |
1071 | setRanges1, UPRV_LENGTHOF(setRanges1), |
1072 | checkRanges1, UPRV_LENGTHOF(checkRanges1)); | |
729e4ab9 | 1073 | testTrieRanges("set2-overlap", FALSE, |
b331163b A |
1074 | setRanges2, UPRV_LENGTHOF(setRanges2), |
1075 | checkRanges2, UPRV_LENGTHOF(checkRanges2)); | |
729e4ab9 | 1076 | testTrieRanges("set3-initial-9", FALSE, |
b331163b A |
1077 | setRanges3, UPRV_LENGTHOF(setRanges3), |
1078 | checkRanges3, UPRV_LENGTHOF(checkRanges3)); | |
729e4ab9 A |
1079 | testTrieRanges("set-empty", FALSE, |
1080 | setRangesEmpty, 0, | |
b331163b | 1081 | checkRangesEmpty, UPRV_LENGTHOF(checkRangesEmpty)); |
729e4ab9 | 1082 | testTrieRanges("set-single-value", FALSE, |
b331163b A |
1083 | setRangesSingleValue, UPRV_LENGTHOF(setRangesSingleValue), |
1084 | checkRangesSingleValue, UPRV_LENGTHOF(checkRangesSingleValue)); | |
729e4ab9 A |
1085 | |
1086 | testTrieRanges("set2-overlap.withClone", TRUE, | |
b331163b A |
1087 | setRanges2, UPRV_LENGTHOF(setRanges2), |
1088 | checkRanges2, UPRV_LENGTHOF(checkRanges2)); | |
729e4ab9 A |
1089 | } |
1090 | ||
1091 | static void | |
1092 | EnumNewTrieForLeadSurrogateTest(void) { | |
1093 | static const char *const testName="enum-for-lead"; | |
1094 | UTrie2 *trie=makeTrieWithRanges(testName, FALSE, | |
b331163b A |
1095 | setRanges2, UPRV_LENGTHOF(setRanges2), |
1096 | checkRanges2, UPRV_LENGTHOF(checkRanges2)); | |
729e4ab9 A |
1097 | while(trie!=NULL) { |
1098 | const CheckRange *checkRanges; | |
1099 | ||
1100 | checkRanges=checkRanges2_d800+1; | |
1101 | utrie2_enumForLeadSurrogate(trie, 0xd800, | |
1102 | testEnumValue, testEnumRange, | |
1103 | &checkRanges); | |
1104 | checkRanges=checkRanges2_d87e+1; | |
1105 | utrie2_enumForLeadSurrogate(trie, 0xd87e, | |
1106 | testEnumValue, testEnumRange, | |
1107 | &checkRanges); | |
1108 | checkRanges=checkRanges2_d87f+1; | |
1109 | utrie2_enumForLeadSurrogate(trie, 0xd87f, | |
1110 | testEnumValue, testEnumRange, | |
1111 | &checkRanges); | |
1112 | checkRanges=checkRanges2_dbff+1; | |
1113 | utrie2_enumForLeadSurrogate(trie, 0xdbff, | |
1114 | testEnumValue, testEnumRange, | |
1115 | &checkRanges); | |
1116 | if(!utrie2_isFrozen(trie)) { | |
1117 | UErrorCode errorCode=U_ZERO_ERROR; | |
1118 | utrie2_freeze(trie, UTRIE2_16_VALUE_BITS, &errorCode); | |
1119 | if(U_FAILURE(errorCode)) { | |
1120 | log_err("error: utrie2_freeze(%s) failed\n", testName); | |
1121 | utrie2_close(trie); | |
1122 | return; | |
1123 | } | |
1124 | } else { | |
1125 | utrie2_close(trie); | |
1126 | break; | |
1127 | } | |
1128 | } | |
1129 | } | |
1130 | ||
1131 | /* test utrie2_openDummy() -------------------------------------------------- */ | |
1132 | ||
1133 | static void | |
1134 | dummyTest(UTrie2ValueBits valueBits) { | |
1135 | CheckRange | |
1136 | checkRanges[]={ | |
1137 | { -1, 0 }, | |
1138 | { 0, 0 }, | |
1139 | { 0x110000, 0 } | |
1140 | }; | |
1141 | ||
1142 | UTrie2 *trie; | |
1143 | UErrorCode errorCode; | |
1144 | ||
1145 | const char *testName; | |
1146 | uint32_t initialValue, errorValue; | |
1147 | ||
1148 | if(valueBits==UTRIE2_16_VALUE_BITS) { | |
1149 | testName="dummy.16"; | |
1150 | initialValue=0x313; | |
1151 | errorValue=0xaffe; | |
1152 | } else { | |
1153 | testName="dummy.32"; | |
1154 | initialValue=0x01234567; | |
1155 | errorValue=0x89abcdef; | |
1156 | } | |
1157 | checkRanges[0].value=errorValue; | |
1158 | checkRanges[1].value=checkRanges[2].value=initialValue; | |
1159 | ||
1160 | errorCode=U_ZERO_ERROR; | |
1161 | trie=utrie2_openDummy(valueBits, initialValue, errorValue, &errorCode); | |
1162 | if(U_FAILURE(errorCode)) { | |
1163 | log_err("utrie2_openDummy(valueBits=%d) failed - %s\n", valueBits, u_errorName(errorCode)); | |
1164 | return; | |
1165 | } | |
1166 | ||
b331163b | 1167 | testFrozenTrie(testName, trie, valueBits, checkRanges, UPRV_LENGTHOF(checkRanges)); |
729e4ab9 A |
1168 | utrie2_close(trie); |
1169 | } | |
1170 | ||
1171 | static void | |
1172 | DummyTrieTest(void) { | |
1173 | dummyTest(UTRIE2_16_VALUE_BITS); | |
1174 | dummyTest(UTRIE2_32_VALUE_BITS); | |
1175 | } | |
1176 | ||
1177 | /* test builder memory management ------------------------------------------- */ | |
1178 | ||
1179 | static void | |
1180 | FreeBlocksTest(void) { | |
1181 | static const CheckRange | |
1182 | checkRanges[]={ | |
1183 | { 0, 1 }, | |
1184 | { 0x740, 1 }, | |
1185 | { 0x780, 2 }, | |
1186 | { 0x880, 3 }, | |
1187 | { 0x110000, 1 } | |
1188 | }; | |
1189 | static const char *const testName="free-blocks"; | |
1190 | ||
1191 | UTrie2 *trie; | |
1192 | int32_t i; | |
1193 | UErrorCode errorCode; | |
1194 | ||
1195 | errorCode=U_ZERO_ERROR; | |
1196 | trie=utrie2_open(1, 0xbad, &errorCode); | |
1197 | if(U_FAILURE(errorCode)) { | |
1198 | log_err("error: utrie2_open(%s) failed: %s\n", testName, u_errorName(errorCode)); | |
1199 | return; | |
1200 | } | |
1201 | ||
1202 | /* | |
1203 | * Repeatedly set overlapping same-value ranges to stress the free-data-block management. | |
1204 | * If it fails, it will overflow the data array. | |
1205 | */ | |
1206 | for(i=0; i<(0x120000>>UTRIE2_SHIFT_2)/2; ++i) { | |
1207 | utrie2_setRange32(trie, 0x740, 0x840-1, 1, TRUE, &errorCode); | |
1208 | utrie2_setRange32(trie, 0x780, 0x880-1, 1, TRUE, &errorCode); | |
1209 | utrie2_setRange32(trie, 0x740, 0x840-1, 2, TRUE, &errorCode); | |
1210 | utrie2_setRange32(trie, 0x780, 0x880-1, 3, TRUE, &errorCode); | |
1211 | } | |
1212 | /* make blocks that will be free during compaction */ | |
1213 | utrie2_setRange32(trie, 0x1000, 0x3000-1, 2, TRUE, &errorCode); | |
1214 | utrie2_setRange32(trie, 0x2000, 0x4000-1, 3, TRUE, &errorCode); | |
1215 | utrie2_setRange32(trie, 0x1000, 0x4000-1, 1, TRUE, &errorCode); | |
1216 | /* set some values for lead surrogate code units */ | |
1217 | utrie2_set32ForLeadSurrogateCodeUnit(trie, 0xd800, 90, &errorCode); | |
1218 | utrie2_set32ForLeadSurrogateCodeUnit(trie, 0xd999, 94, &errorCode); | |
1219 | utrie2_set32ForLeadSurrogateCodeUnit(trie, 0xdbff, 99, &errorCode); | |
1220 | if(U_FAILURE(errorCode)) { | |
1221 | log_err("error: setting lots of ranges into a trie (%s) failed - %s\n", | |
1222 | testName, u_errorName(errorCode)); | |
1223 | utrie2_close(trie); | |
1224 | return; | |
1225 | } | |
1226 | ||
1227 | trie=testTrieSerializeAllValueBits(testName, trie, FALSE, | |
b331163b | 1228 | checkRanges, UPRV_LENGTHOF(checkRanges)); |
729e4ab9 A |
1229 | utrie2_close(trie); |
1230 | } | |
1231 | ||
1232 | static void | |
1233 | GrowDataArrayTest(void) { | |
1234 | static const CheckRange | |
1235 | checkRanges[]={ | |
1236 | { 0, 1 }, | |
1237 | { 0x720, 2 }, | |
1238 | { 0x7a0, 3 }, | |
1239 | { 0x8a0, 4 }, | |
1240 | { 0x110000, 5 } | |
1241 | }; | |
1242 | static const char *const testName="grow-data"; | |
1243 | ||
1244 | UTrie2 *trie; | |
1245 | int32_t i; | |
1246 | UErrorCode errorCode; | |
1247 | ||
1248 | errorCode=U_ZERO_ERROR; | |
1249 | trie=utrie2_open(1, 0xbad, &errorCode); | |
1250 | if(U_FAILURE(errorCode)) { | |
1251 | log_err("error: utrie2_open(%s) failed: %s\n", testName, u_errorName(errorCode)); | |
1252 | return; | |
1253 | } | |
1254 | ||
1255 | /* | |
1256 | * Use utrie2_set32() not utrie2_setRange32() to write non-initialValue-data. | |
1257 | * Should grow/reallocate the data array to a sufficient length. | |
1258 | */ | |
1259 | for(i=0; i<0x1000; ++i) { | |
1260 | utrie2_set32(trie, i, 2, &errorCode); | |
1261 | } | |
1262 | for(i=0x720; i<0x1100; ++i) { /* some overlap */ | |
1263 | utrie2_set32(trie, i, 3, &errorCode); | |
1264 | } | |
1265 | for(i=0x7a0; i<0x900; ++i) { | |
1266 | utrie2_set32(trie, i, 4, &errorCode); | |
1267 | } | |
1268 | for(i=0x8a0; i<0x110000; ++i) { | |
1269 | utrie2_set32(trie, i, 5, &errorCode); | |
1270 | } | |
1271 | for(i=0xd800; i<0xdc00; ++i) { | |
1272 | utrie2_set32ForLeadSurrogateCodeUnit(trie, i, 1, &errorCode); | |
1273 | } | |
1274 | /* set some values for lead surrogate code units */ | |
1275 | utrie2_set32ForLeadSurrogateCodeUnit(trie, 0xd800, 90, &errorCode); | |
1276 | utrie2_set32ForLeadSurrogateCodeUnit(trie, 0xd999, 94, &errorCode); | |
1277 | utrie2_set32ForLeadSurrogateCodeUnit(trie, 0xdbff, 99, &errorCode); | |
1278 | if(U_FAILURE(errorCode)) { | |
1279 | log_err("error: setting lots of values into a trie (%s) failed - %s\n", | |
1280 | testName, u_errorName(errorCode)); | |
1281 | utrie2_close(trie); | |
1282 | return; | |
1283 | } | |
1284 | ||
1285 | trie=testTrieSerializeAllValueBits(testName, trie, FALSE, | |
b331163b | 1286 | checkRanges, UPRV_LENGTHOF(checkRanges)); |
729e4ab9 A |
1287 | utrie2_close(trie); |
1288 | } | |
1289 | ||
1290 | /* versions 1 and 2 --------------------------------------------------------- */ | |
1291 | ||
729e4ab9 A |
1292 | static UNewTrie * |
1293 | makeNewTrie1WithRanges(const char *testName, | |
1294 | const SetRange setRanges[], int32_t countSetRanges, | |
1295 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
1296 | UNewTrie *newTrie; | |
1297 | uint32_t initialValue, errorValue; | |
1298 | uint32_t value; | |
1299 | UChar32 start, limit; | |
1300 | int32_t i; | |
1301 | UErrorCode errorCode; | |
1302 | UBool overwrite, ok; | |
1303 | ||
1304 | log_verbose("\ntesting Trie '%s'\n", testName); | |
1305 | errorCode=U_ZERO_ERROR; | |
1306 | getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue); | |
1307 | newTrie=utrie_open(NULL, NULL, 2000, | |
1308 | initialValue, initialValue, | |
1309 | FALSE); | |
1310 | if(U_FAILURE(errorCode)) { | |
1311 | log_err("error: utrie_open(%s) failed: %s\n", testName, u_errorName(errorCode)); | |
1312 | return NULL; | |
1313 | } | |
1314 | ||
1315 | /* set values from setRanges[] */ | |
1316 | ok=TRUE; | |
1317 | for(i=0; i<countSetRanges; ++i) { | |
1318 | start=setRanges[i].start; | |
1319 | limit=setRanges[i].limit; | |
1320 | value=setRanges[i].value; | |
1321 | overwrite=setRanges[i].overwrite; | |
1322 | if((limit-start)==1 && overwrite) { | |
1323 | ok&=utrie_set32(newTrie, start, value); | |
1324 | } else { | |
1325 | ok&=utrie_setRange32(newTrie, start, limit, value, overwrite); | |
1326 | } | |
1327 | } | |
1328 | if(ok) { | |
1329 | return newTrie; | |
1330 | } else { | |
1331 | log_err("error: setting values into a trie1 (%s) failed\n", testName); | |
1332 | utrie_close(newTrie); | |
1333 | return NULL; | |
1334 | } | |
1335 | } | |
1336 | ||
1337 | static void | |
1338 | testTrie2FromTrie1(const char *testName, | |
1339 | const SetRange setRanges[], int32_t countSetRanges, | |
1340 | const CheckRange checkRanges[], int32_t countCheckRanges) { | |
1341 | uint32_t memory1_16[3000], memory1_32[3000]; | |
1342 | int32_t length16, length32; | |
1343 | UChar lead; | |
1344 | ||
1345 | char name[40]; | |
1346 | ||
1347 | UNewTrie *newTrie1_16, *newTrie1_32; | |
1348 | UTrie trie1_16, trie1_32; | |
1349 | UTrie2 *trie2; | |
1350 | uint32_t initialValue, errorValue; | |
1351 | UErrorCode errorCode; | |
1352 | ||
1353 | newTrie1_16=makeNewTrie1WithRanges(testName, | |
1354 | setRanges, countSetRanges, | |
1355 | checkRanges, countCheckRanges); | |
1356 | if(newTrie1_16==NULL) { | |
1357 | return; | |
1358 | } | |
1359 | newTrie1_32=utrie_clone(NULL, newTrie1_16, NULL, 0); | |
1360 | if(newTrie1_32==NULL) { | |
1361 | utrie_close(newTrie1_16); | |
1362 | return; | |
1363 | } | |
1364 | errorCode=U_ZERO_ERROR; | |
1365 | length16=utrie_serialize(newTrie1_16, memory1_16, sizeof(memory1_16), | |
1366 | NULL, TRUE, &errorCode); | |
1367 | length32=utrie_serialize(newTrie1_32, memory1_32, sizeof(memory1_32), | |
1368 | NULL, FALSE, &errorCode); | |
1369 | utrie_unserialize(&trie1_16, memory1_16, length16, &errorCode); | |
1370 | utrie_unserialize(&trie1_32, memory1_32, length32, &errorCode); | |
1371 | utrie_close(newTrie1_16); | |
1372 | utrie_close(newTrie1_32); | |
1373 | if(U_FAILURE(errorCode)) { | |
1374 | log_err("error: utrie_serialize or unserialize(%s) failed: %s\n", | |
1375 | testName, u_errorName(errorCode)); | |
1376 | return; | |
1377 | } | |
1378 | ||
1379 | getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue); | |
1380 | ||
1381 | uprv_strcpy(name, testName); | |
1382 | uprv_strcat(name, ".16"); | |
1383 | trie2=utrie2_fromUTrie(&trie1_16, errorValue, &errorCode); | |
1384 | if(U_SUCCESS(errorCode)) { | |
1385 | testFrozenTrie(name, trie2, UTRIE2_16_VALUE_BITS, checkRanges, countCheckRanges); | |
1386 | for(lead=0xd800; lead<0xdc00; ++lead) { | |
1387 | uint32_t value1, value2; | |
1388 | value1=UTRIE_GET16_FROM_LEAD(&trie1_16, lead); | |
1389 | value2=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie2, lead); | |
1390 | if(value1!=value2) { | |
1391 | log_err("error: utrie2_fromUTrie(%s) wrong value %ld!=%ld " | |
1392 | "from lead surrogate code unit U+%04lx\n", | |
1393 | name, (long)value2, (long)value1, (long)lead); | |
1394 | break; | |
1395 | } | |
1396 | } | |
1397 | } | |
1398 | utrie2_close(trie2); | |
1399 | ||
1400 | uprv_strcpy(name, testName); | |
1401 | uprv_strcat(name, ".32"); | |
1402 | trie2=utrie2_fromUTrie(&trie1_32, errorValue, &errorCode); | |
1403 | if(U_SUCCESS(errorCode)) { | |
1404 | testFrozenTrie(name, trie2, UTRIE2_32_VALUE_BITS, checkRanges, countCheckRanges); | |
1405 | for(lead=0xd800; lead<0xdc00; ++lead) { | |
1406 | uint32_t value1, value2; | |
1407 | value1=UTRIE_GET32_FROM_LEAD(&trie1_32, lead); | |
1408 | value2=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie2, lead); | |
1409 | if(value1!=value2) { | |
1410 | log_err("error: utrie2_fromUTrie(%s) wrong value %ld!=%ld " | |
1411 | "from lead surrogate code unit U+%04lx\n", | |
1412 | name, (long)value2, (long)value1, (long)lead); | |
1413 | break; | |
1414 | } | |
1415 | } | |
1416 | } | |
1417 | utrie2_close(trie2); | |
1418 | } | |
1419 | ||
1420 | static void | |
1421 | Trie12ConversionTest(void) { | |
1422 | testTrie2FromTrie1("trie1->trie2", | |
b331163b A |
1423 | setRanges2, UPRV_LENGTHOF(setRanges2), |
1424 | checkRanges2, UPRV_LENGTHOF(checkRanges2)); | |
729e4ab9 A |
1425 | } |
1426 | ||
1427 | void | |
1428 | addTrie2Test(TestNode** root) { | |
1429 | addTest(root, &TrieTest, "tsutil/trie2test/TrieTest"); | |
1430 | addTest(root, &EnumNewTrieForLeadSurrogateTest, | |
1431 | "tsutil/trie2test/EnumNewTrieForLeadSurrogateTest"); | |
1432 | addTest(root, &DummyTrieTest, "tsutil/trie2test/DummyTrieTest"); | |
1433 | addTest(root, &FreeBlocksTest, "tsutil/trie2test/FreeBlocksTest"); | |
1434 | addTest(root, &GrowDataArrayTest, "tsutil/trie2test/GrowDataArrayTest"); | |
729e4ab9 A |
1435 | addTest(root, &Trie12ConversionTest, "tsutil/trie2test/Trie12ConversionTest"); |
1436 | } |