]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/ucptrietest.c
ICU-66108.tar.gz
[apple/icu.git] / icuSources / test / cintltst / ucptrietest.c
CommitLineData
3d1f044b
A
1// © 2017 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4// ucptrietest.c (modified from trie2test.c)
5// created: 2017dec29 Markus W. Scherer
6
7#include <stdio.h>
8#include "unicode/utypes.h"
9#include "unicode/ucptrie.h"
10#include "unicode/umutablecptrie.h"
11#include "unicode/utf.h"
12#include "unicode/utf16.h"
13#include "unicode/utf8.h"
14#include "uassert.h"
15#include "ucptrie_impl.h"
16#include "utrie.h"
17#include "cstring.h"
18#include "cmemory.h"
19#include "udataswp.h"
20#include "cintltst.h"
21
22void addUCPTrieTest(TestNode** root);
23
24/* Values for setting possibly overlapping, out-of-order ranges of values */
25typedef struct SetRange {
26 UChar32 start, limit;
27 uint32_t value;
28} SetRange;
29
30/*
31 * Values for testing:
32 * value is set from the previous boundary's limit to before
33 * this boundary's limit
34 *
35 * There must be an entry with limit 0 and the intialValue.
36 * It may be preceded by an entry with negative limit and the errorValue.
37 */
38typedef struct CheckRange {
39 UChar32 limit;
40 uint32_t value;
41} CheckRange;
42
43static int32_t
44skipSpecialValues(const CheckRange checkRanges[], int32_t countCheckRanges) {
45 int32_t i;
46 for(i=0; i<countCheckRanges && checkRanges[i].limit<=0; ++i) {}
47 return i;
48}
49
50static int32_t
51getSpecialValues(const CheckRange checkRanges[], int32_t countCheckRanges,
52 uint32_t *pInitialValue, uint32_t *pErrorValue) {
53 int32_t i=0;
54 if(i<countCheckRanges && checkRanges[i].limit<0) {
55 *pErrorValue=checkRanges[i++].value;
56 } else {
57 *pErrorValue=0xad;
58 }
59 if(i<countCheckRanges && checkRanges[i].limit==0) {
60 *pInitialValue=checkRanges[i++].value;
61 } else {
62 *pInitialValue=0;
63 }
64 return i;
65}
66
67/* ucptrie_enum() callback, modifies a value */
68static uint32_t U_CALLCONV
69testFilter(const void *context, uint32_t value) {
340931cb 70 (void)context; // suppress compiler warnings about unused variable
3d1f044b
A
71 return value ^ 0x5555;
72}
73
74static UBool
75doCheckRange(const char *name, const char *variant,
76 UChar32 start, UChar32 end, uint32_t value,
77 UChar32 expEnd, uint32_t expValue) {
78 if (end < 0) {
79 if (expEnd >= 0) {
80 log_err("error: %s getRanges (%s) fails to deliver range [U+%04lx..U+%04lx].0x%lx\n",
81 name, variant, (long)start, (long)expEnd, (long)expValue);
82 }
83 return FALSE;
84 }
85 if (expEnd < 0) {
86 log_err("error: %s getRanges (%s) delivers unexpected range [U+%04lx..U+%04lx].0x%lx\n",
87 name, variant, (long)start, (long)end, (long)value);
88 return FALSE;
89 }
90 if (end != expEnd || value != expValue) {
91 log_err("error: %s getRanges (%s) delivers wrong range [U+%04lx..U+%04lx].0x%lx "
92 "instead of [U+%04lx..U+%04lx].0x%lx\n",
93 name, variant, (long)start, (long)end, (long)value,
94 (long)start, (long)expEnd, (long)expValue);
95 return FALSE;
96 }
97 return TRUE;
98}
99
100// Test iteration starting from various UTF-8/16 and trie structure boundaries.
101// Also test starting partway through lead & trail surrogates for fixed-surrogate-value options,
102// and partway through supplementary code points.
103static UChar32 iterStarts[] = {
104 0, 0x7f, 0x80, 0x7ff, 0x800, 0xfff, 0x1000,
105 0xd7ff, 0xd800, 0xd888, 0xdddd, 0xdfff, 0xe000,
106 0xffff, 0x10000, 0x12345, 0x10ffff, 0x110000
107};
108
109static void
110testTrieGetRanges(const char *testName, const UCPTrie *trie, const UMutableCPTrie *mutableTrie,
111 UCPMapRangeOption option, uint32_t surrValue,
112 const CheckRange checkRanges[], int32_t countCheckRanges) {
113 const char *const typeName = trie == NULL ? "mutableTrie" : "trie";
114 const char *const optionName = option == UCPMAP_RANGE_NORMAL ? "normal" :
115 option == UCPMAP_RANGE_FIXED_LEAD_SURROGATES ? "fixedLeadSurr" : "fixedAllSurr";
116 char name[80];
117 int32_t s;
118 for (s = 0; s < UPRV_LENGTHOF(iterStarts); ++s) {
119 UChar32 start = iterStarts[s];
120 int32_t i, i0;
121 UChar32 end, expEnd;
122 uint32_t value, expValue;
123 // No need to go from each iteration start to the very end.
124 int32_t innerLoopCount;
125
126 sprintf(name, "%s/%s(%s) min=U+%04lx", typeName, optionName, testName, (long)start);
127
128 // Skip over special values and low ranges.
129 for (i = 0; i < countCheckRanges && checkRanges[i].limit <= start; ++i) {}
130 i0 = i;
131 // without value handler
132 for (innerLoopCount = 0;; ++i, start = end + 1) {
133 if (i < countCheckRanges) {
134 expEnd = checkRanges[i].limit - 1;
135 expValue = checkRanges[i].value;
136 } else {
137 expEnd = -1;
138 expValue = value = 0x5005;
139 }
140 end = trie != NULL ?
141 ucptrie_getRange(trie, start, option, surrValue, NULL, NULL, &value) :
142 umutablecptrie_getRange(mutableTrie, start, option, surrValue, NULL, NULL, &value);
143 if (!doCheckRange(name, "without value handler", start, end, value, expEnd, expValue)) {
144 break;
145 }
146 if (s != 0 && ++innerLoopCount == 5) { break; }
147 }
148 // with value handler
149 for (i = i0, start = iterStarts[s], innerLoopCount = 0;; ++i, start = end + 1) {
150 if (i < countCheckRanges) {
151 expEnd = checkRanges[i].limit - 1;
152 expValue = checkRanges[i].value ^ 0x5555;
153 } else {
154 expEnd = -1;
155 expValue = value = 0x5005;
156 }
157 end = trie != NULL ?
158 ucptrie_getRange(trie, start, option, surrValue ^ 0x5555, testFilter, NULL, &value) :
159 umutablecptrie_getRange(mutableTrie, start, option, surrValue ^ 0x5555,
160 testFilter, NULL, &value);
161 if (!doCheckRange(name, "with value handler", start, end, value, expEnd, expValue)) {
162 break;
163 }
164 if (s != 0 && ++innerLoopCount == 5) { break; }
165 }
166 // without value
167 for (i = i0, start = iterStarts[s], innerLoopCount = 0;; ++i, start = end + 1) {
168 if (i < countCheckRanges) {
169 expEnd = checkRanges[i].limit - 1;
170 } else {
171 expEnd = -1;
172 }
173 end = trie != NULL ?
174 ucptrie_getRange(trie, start, option, surrValue, NULL, NULL, NULL) :
175 umutablecptrie_getRange(mutableTrie, start, option, surrValue, NULL, NULL, NULL);
176 if (!doCheckRange(name, "without value", start, end, 0, expEnd, 0)) {
177 break;
178 }
179 if (s != 0 && ++innerLoopCount == 5) { break; }
180 }
181 }
182}
183
184static void
185testTrieGetters(const char *testName, const UCPTrie *trie,
186 UCPTrieType type, UCPTrieValueWidth valueWidth,
187 const CheckRange checkRanges[], int32_t countCheckRanges) {
188 uint32_t initialValue, errorValue;
189 uint32_t value, value2;
190 UChar32 start, limit;
191 int32_t i, countSpecials;
192 int32_t countErrors=0;
193
194 const char *const typeName = "trie";
195
196 countSpecials=getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue);
197
198 start=0;
199 for(i=countSpecials; i<countCheckRanges; ++i) {
200 limit=checkRanges[i].limit;
201 value=checkRanges[i].value;
202
203 while(start<limit) {
204 if (start <= 0x7f) {
205 if (valueWidth == UCPTRIE_VALUE_BITS_16) {
206 value2 = UCPTRIE_ASCII_GET(trie, UCPTRIE_16, start);
207 } else if (valueWidth == UCPTRIE_VALUE_BITS_32) {
208 value2 = UCPTRIE_ASCII_GET(trie, UCPTRIE_32, start);
209 } else {
210 value2 = UCPTRIE_ASCII_GET(trie, UCPTRIE_8, start);
211 }
212 if (value != value2) {
213 log_err("error: %s(%s).fromASCII(U+%04lx)==0x%lx instead of 0x%lx\n",
214 typeName, testName, (long)start, (long)value2, (long)value);
215 ++countErrors;
216 }
217 }
218 if (type == UCPTRIE_TYPE_FAST) {
219 if(start<=0xffff) {
220 if(valueWidth==UCPTRIE_VALUE_BITS_16) {
221 value2=UCPTRIE_FAST_BMP_GET(trie, UCPTRIE_16, start);
222 } else if(valueWidth==UCPTRIE_VALUE_BITS_32) {
223 value2=UCPTRIE_FAST_BMP_GET(trie, UCPTRIE_32, start);
224 } else {
225 value2=UCPTRIE_FAST_BMP_GET(trie, UCPTRIE_8, start);
226 }
227 if(value!=value2) {
228 log_err("error: %s(%s).fromBMP(U+%04lx)==0x%lx instead of 0x%lx\n",
229 typeName, testName, (long)start, (long)value2, (long)value);
230 ++countErrors;
231 }
232 } else {
233 if(valueWidth==UCPTRIE_VALUE_BITS_16) {
234 value2 = UCPTRIE_FAST_SUPP_GET(trie, UCPTRIE_16, start);
235 } else if(valueWidth==UCPTRIE_VALUE_BITS_32) {
236 value2 = UCPTRIE_FAST_SUPP_GET(trie, UCPTRIE_32, start);
237 } else {
238 value2 = UCPTRIE_FAST_SUPP_GET(trie, UCPTRIE_8, start);
239 }
240 if(value!=value2) {
241 log_err("error: %s(%s).fromSupp(U+%04lx)==0x%lx instead of 0x%lx\n",
242 typeName, testName, (long)start, (long)value2, (long)value);
243 ++countErrors;
244 }
245 }
246 if(valueWidth==UCPTRIE_VALUE_BITS_16) {
247 value2 = UCPTRIE_FAST_GET(trie, UCPTRIE_16, start);
248 } else if(valueWidth==UCPTRIE_VALUE_BITS_32) {
249 value2 = UCPTRIE_FAST_GET(trie, UCPTRIE_32, start);
250 } else {
251 value2 = UCPTRIE_FAST_GET(trie, UCPTRIE_8, start);
252 }
253 } else {
254 if(valueWidth==UCPTRIE_VALUE_BITS_16) {
255 value2 = UCPTRIE_SMALL_GET(trie, UCPTRIE_16, start);
256 } else if(valueWidth==UCPTRIE_VALUE_BITS_32) {
257 value2 = UCPTRIE_SMALL_GET(trie, UCPTRIE_32, start);
258 } else {
259 value2 = UCPTRIE_SMALL_GET(trie, UCPTRIE_8, start);
260 }
261 }
262 if(value!=value2) {
263 log_err("error: %s(%s).get(U+%04lx)==0x%lx instead of 0x%lx\n",
264 typeName, testName, (long)start, (long)value2, (long)value);
265 ++countErrors;
266 }
267 value2=ucptrie_get(trie, start);
268 if(value!=value2) {
269 log_err("error: %s(%s).get(U+%04lx)==0x%lx instead of 0x%lx\n",
270 typeName, testName, (long)start, (long)value2, (long)value);
271 ++countErrors;
272 }
273 ++start;
274 if(countErrors>10) {
275 return;
276 }
277 }
278 }
279
280 /* test linear ASCII range from the data array pointer (access to "internal" field) */
281 start=0;
282 for(i=countSpecials; i<countCheckRanges && start<=0x7f; ++i) {
283 limit=checkRanges[i].limit;
284 value=checkRanges[i].value;
285
286 while(start<limit && start<=0x7f) {
287 if(valueWidth==UCPTRIE_VALUE_BITS_16) {
288 value2=trie->data.ptr16[start];
289 } else if(valueWidth==UCPTRIE_VALUE_BITS_32) {
290 value2=trie->data.ptr32[start];
291 } else {
292 value2=trie->data.ptr8[start];
293 }
294 if(value!=value2) {
295 log_err("error: %s(%s).asciiData[U+%04lx]==0x%lx instead of 0x%lx\n",
296 typeName, testName, (long)start, (long)value2, (long)value);
297 ++countErrors;
298 }
299 ++start;
300 if(countErrors>10) {
301 return;
302 }
303 }
304 }
305
306 /* test errorValue */
307 if (type == UCPTRIE_TYPE_FAST) {
308 if(valueWidth==UCPTRIE_VALUE_BITS_16) {
309 value = UCPTRIE_FAST_GET(trie, UCPTRIE_16, -1);
310 value2 = UCPTRIE_FAST_GET(trie, UCPTRIE_16, 0x110000);
311 } else if(valueWidth==UCPTRIE_VALUE_BITS_32) {
312 value = UCPTRIE_FAST_GET(trie, UCPTRIE_32, -1);
313 value2 = UCPTRIE_FAST_GET(trie, UCPTRIE_32, 0x110000);
314 } else {
315 value = UCPTRIE_FAST_GET(trie, UCPTRIE_8, -1);
316 value2 = UCPTRIE_FAST_GET(trie, UCPTRIE_8, 0x110000);
317 }
318 } else {
319 if(valueWidth==UCPTRIE_VALUE_BITS_16) {
320 value = UCPTRIE_SMALL_GET(trie, UCPTRIE_16, -1);
321 value2 = UCPTRIE_SMALL_GET(trie, UCPTRIE_16, 0x110000);
322 } else if(valueWidth==UCPTRIE_VALUE_BITS_32) {
323 value = UCPTRIE_SMALL_GET(trie, UCPTRIE_32, -1);
324 value2 = UCPTRIE_SMALL_GET(trie, UCPTRIE_32, 0x110000);
325 } else {
326 value = UCPTRIE_SMALL_GET(trie, UCPTRIE_8, -1);
327 value2 = UCPTRIE_SMALL_GET(trie, UCPTRIE_8, 0x110000);
328 }
329 }
330 if(value!=errorValue || value2!=errorValue) {
331 log_err("error: %s(%s).get(out of range) != errorValue\n",
332 typeName, testName);
333 }
334 value=ucptrie_get(trie, -1);
335 value2=ucptrie_get(trie, 0x110000);
336 if(value!=errorValue || value2!=errorValue) {
337 log_err("error: %s(%s).get(out of range) != errorValue\n",
338 typeName, testName);
339 }
340}
341
342static void
343testBuilderGetters(const char *testName, const UMutableCPTrie *mutableTrie,
344 const CheckRange checkRanges[], int32_t countCheckRanges) {
345 uint32_t initialValue, errorValue;
346 uint32_t value, value2;
347 UChar32 start, limit;
348 int32_t i, countSpecials;
349 int32_t countErrors=0;
350
351 const char *const typeName = "mutableTrie";
352
353 countSpecials=getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue);
354
355 start=0;
356 for(i=countSpecials; i<countCheckRanges; ++i) {
357 limit=checkRanges[i].limit;
358 value=checkRanges[i].value;
359
360 while(start<limit) {
361 value2=umutablecptrie_get(mutableTrie, start);
362 if(value!=value2) {
363 log_err("error: %s(%s).get(U+%04lx)==0x%lx instead of 0x%lx\n",
364 typeName, testName, (long)start, (long)value2, (long)value);
365 ++countErrors;
366 }
367 ++start;
368 if(countErrors>10) {
369 return;
370 }
371 }
372 }
373
374 /* test errorValue */
375 value=umutablecptrie_get(mutableTrie, -1);
376 value2=umutablecptrie_get(mutableTrie, 0x110000);
377 if(value!=errorValue || value2!=errorValue) {
378 log_err("error: %s(%s).get(out of range) != errorValue\n",
379 typeName, testName);
380 }
381}
382
383#define ACCIDENTAL_SURROGATE_PAIR(s, length, cp) (length > 0 && U16_IS_LEAD(s[length-1]) && U_IS_TRAIL(cp))
384
385static void
386testTrieUTF16(const char *testName,
387 const UCPTrie *trie, UCPTrieValueWidth valueWidth,
388 const CheckRange checkRanges[], int32_t countCheckRanges) {
389 UChar s[30000];
390 uint32_t values[16000];
391
392 const UChar *p, *limit;
393
394 uint32_t errorValue = ucptrie_get(trie, -1);
395 uint32_t value, expected;
396 UChar32 prevCP, c, c2;
397 int32_t i, length, sIndex, countValues;
398
399 /* write a string */
400 prevCP=0;
401 length=countValues=0;
402 for(i=skipSpecialValues(checkRanges, countCheckRanges); i<countCheckRanges; ++i) {
403 value=checkRanges[i].value;
404 /* write three code points */
405 if(!ACCIDENTAL_SURROGATE_PAIR(s, length, prevCP)) {
406 U16_APPEND_UNSAFE(s, length, prevCP); /* start of the range */
407 values[countValues++]=value;
408 }
409 U_ASSERT(length < UPRV_LENGTHOF(s) && countValues < UPRV_LENGTHOF(values));
410 c=checkRanges[i].limit;
411 prevCP=(prevCP+c)/2; /* middle of the range */
412 if(!ACCIDENTAL_SURROGATE_PAIR(s, length, prevCP)) {
413 U16_APPEND_UNSAFE(s, length, prevCP);
414 values[countValues++]=value;
415 }
416 prevCP=c;
417 --c; /* end of the range */
418 if(!ACCIDENTAL_SURROGATE_PAIR(s, length, c)) {
419 U16_APPEND_UNSAFE(s, length, c);
420 values[countValues++]=value;
421 }
422 }
423 limit=s+length;
424 if(length>UPRV_LENGTHOF(s)) {
425 log_err("UTF-16 test string length %d > capacity %d\n", (int)length, (int)UPRV_LENGTHOF(s));
426 return;
427 }
428 if(countValues>UPRV_LENGTHOF(values)) {
429 log_err("UTF-16 test values length %d > capacity %d\n", (int)countValues, (int)UPRV_LENGTHOF(values));
430 return;
431 }
432
433 /* try forward */
434 p=s;
435 i=0;
436 while(p<limit) {
437 sIndex=(int32_t)(p-s);
438 U16_NEXT(s, sIndex, length, c2);
439 c=0x33;
440 if(valueWidth==UCPTRIE_VALUE_BITS_16) {
441 UCPTRIE_FAST_U16_NEXT(trie, UCPTRIE_16, p, limit, c, value);
442 } else if(valueWidth==UCPTRIE_VALUE_BITS_32) {
443 UCPTRIE_FAST_U16_NEXT(trie, UCPTRIE_32, p, limit, c, value);
444 } else {
445 UCPTRIE_FAST_U16_NEXT(trie, UCPTRIE_8, p, limit, c, value);
446 }
447 expected = U_IS_SURROGATE(c) ? errorValue : values[i];
448 if(value!=expected) {
449 log_err("error: wrong value from UCPTRIE_NEXT(%s)(U+%04lx): 0x%lx instead of 0x%lx\n",
450 testName, (long)c, (long)value, (long)expected);
451 }
452 if(c!=c2) {
453 log_err("error: wrong code point from UCPTRIE_NEXT(%s): U+%04lx != U+%04lx\n",
454 testName, (long)c, (long)c2);
455 continue;
456 }
457 ++i;
458 }
459
460 /* try backward */
461 p=limit;
462 i=countValues;
463 while(s<p) {
464 --i;
465 sIndex=(int32_t)(p-s);
466 U16_PREV(s, 0, sIndex, c2);
467 c=0x33;
468 if(valueWidth==UCPTRIE_VALUE_BITS_16) {
469 UCPTRIE_FAST_U16_PREV(trie, UCPTRIE_16, s, p, c, value);
470 } else if(valueWidth==UCPTRIE_VALUE_BITS_32) {
471 UCPTRIE_FAST_U16_PREV(trie, UCPTRIE_32, s, p, c, value);
472 } else {
473 UCPTRIE_FAST_U16_PREV(trie, UCPTRIE_8, s, p, c, value);
474 }
475 expected = U_IS_SURROGATE(c) ? errorValue : values[i];
476 if(value!=expected) {
477 log_err("error: wrong value from UCPTRIE_PREV(%s)(U+%04lx): 0x%lx instead of 0x%lx\n",
478 testName, (long)c, (long)value, (long)expected);
479 }
480 if(c!=c2) {
481 log_err("error: wrong code point from UCPTRIE_PREV(%s): U+%04lx != U+%04lx\n",
482 testName, c, c2);
483 }
484 }
485}
486
487static void
488testTrieUTF8(const char *testName,
489 const UCPTrie *trie, UCPTrieValueWidth valueWidth,
490 const CheckRange checkRanges[], int32_t countCheckRanges) {
491 // Note: The byte sequence comments refer to the original UTF-8 definition.
492 // Starting with ICU 60, any sequence that is not a prefix of a valid one
493 // is treated as multiple single-byte errors.
494 // For testing, we only rely on U8_... and UCPTrie UTF-8 macros
495 // iterating consistently.
496 static const uint8_t illegal[]={
497 0xc0, 0x80, /* non-shortest U+0000 */
498 0xc1, 0xbf, /* non-shortest U+007f */
499 0xc2, /* truncated */
500 0xe0, 0x90, 0x80, /* non-shortest U+0400 */
501 0xe0, 0xa0, /* truncated */
502 0xed, 0xa0, 0x80, /* lead surrogate U+d800 */
503 0xed, 0xbf, 0xbf, /* trail surrogate U+dfff */
504 0xf0, 0x8f, 0xbf, 0xbf, /* non-shortest U+ffff */
505 0xf0, 0x90, 0x80, /* truncated */
506 0xf4, 0x90, 0x80, 0x80, /* beyond-Unicode U+110000 */
507 0xf8, 0x80, 0x80, 0x80, /* truncated */
508 0xf8, 0x80, 0x80, 0x80, 0x80, /* 5-byte UTF-8 */
509 0xfd, 0xbf, 0xbf, 0xbf, 0xbf, /* truncated */
510 0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, /* 6-byte UTF-8 */
511 0xfe,
512 0xff
513 };
514 uint8_t s[60000];
515 uint32_t values[16000];
516
517 const uint8_t *p, *limit;
518
519 uint32_t initialValue, errorValue;
520 uint32_t value, expectedBytes, actualBytes;
521 UChar32 prevCP, c;
522 int32_t i, countSpecials, length, countValues;
523 int32_t prev8, i8;
524
525 countSpecials=getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue);
526
527 /* write a string */
528 prevCP=0;
529 length=countValues=0;
530 /* first a couple of trail bytes in lead position */
531 s[length++]=0x80;
532 values[countValues++]=errorValue;
533 s[length++]=0xbf;
534 values[countValues++]=errorValue;
535 prev8=i8=0;
536 for(i=countSpecials; i<countCheckRanges; ++i) {
537 value=checkRanges[i].value;
538 /* write three legal (or surrogate) code points */
539 U8_APPEND_UNSAFE(s, length, prevCP); /* start of the range */
540 if(U_IS_SURROGATE(prevCP)) {
541 // A surrogate byte sequence counts as 3 single-byte errors.
542 values[countValues++]=errorValue;
543 values[countValues++]=errorValue;
544 values[countValues++]=errorValue;
545 } else {
546 values[countValues++]=value;
547 }
548 U_ASSERT(length < UPRV_LENGTHOF(s) && countValues < UPRV_LENGTHOF(values));
549 c=checkRanges[i].limit;
550 prevCP=(prevCP+c)/2; /* middle of the range */
551 U8_APPEND_UNSAFE(s, length, prevCP);
552 if(U_IS_SURROGATE(prevCP)) {
553 // A surrogate byte sequence counts as 3 single-byte errors.
554 values[countValues++]=errorValue;
555 values[countValues++]=errorValue;
556 values[countValues++]=errorValue;
557 } else {
558 values[countValues++]=value;
559 }
560 prevCP=c;
561 --c; /* end of the range */
562 U8_APPEND_UNSAFE(s, length, c);
563 if(U_IS_SURROGATE(c)) {
564 // A surrogate byte sequence counts as 3 single-byte errors.
565 values[countValues++]=errorValue;
566 values[countValues++]=errorValue;
567 values[countValues++]=errorValue;
568 } else {
569 values[countValues++]=value;
570 }
571 /* write an illegal byte sequence */
340931cb 572 if(i8<(int32_t)sizeof(illegal)) {
3d1f044b
A
573 U8_FWD_1(illegal, i8, sizeof(illegal));
574 while(prev8<i8) {
575 s[length++]=illegal[prev8++];
576 }
577 values[countValues++]=errorValue;
578 }
579 }
580 /* write the remaining illegal byte sequences */
340931cb 581 while(i8<(int32_t)sizeof(illegal)) {
3d1f044b
A
582 U8_FWD_1(illegal, i8, sizeof(illegal));
583 while(prev8<i8) {
584 s[length++]=illegal[prev8++];
585 }
586 values[countValues++]=errorValue;
587 }
588 limit=s+length;
589 if(length>UPRV_LENGTHOF(s)) {
590 log_err("UTF-8 test string length %d > capacity %d\n", (int)length, (int)UPRV_LENGTHOF(s));
591 return;
592 }
593 if(countValues>UPRV_LENGTHOF(values)) {
594 log_err("UTF-8 test values length %d > capacity %d\n", (int)countValues, (int)UPRV_LENGTHOF(values));
595 return;
596 }
597
598 /* try forward */
599 p=s;
600 i=0;
601 while(p<limit) {
602 prev8=i8=(int32_t)(p-s);
603 U8_NEXT(s, i8, length, c);
604 if(valueWidth==UCPTRIE_VALUE_BITS_16) {
605 UCPTRIE_FAST_U8_NEXT(trie, UCPTRIE_16, p, limit, value);
606 } else if(valueWidth==UCPTRIE_VALUE_BITS_32) {
607 UCPTRIE_FAST_U8_NEXT(trie, UCPTRIE_32, p, limit, value);
608 } else {
609 UCPTRIE_FAST_U8_NEXT(trie, UCPTRIE_8, p, limit, value);
610 }
611 expectedBytes=0;
612 if(value!=values[i] || i8!=(p-s)) {
613 int32_t k=prev8;
614 while(k<i8) {
615 expectedBytes=(expectedBytes<<8)|s[k++];
616 }
617 }
618 if(i8==(p-s)) {
619 actualBytes=expectedBytes;
620 } else {
621 actualBytes=0;
622 int32_t k=prev8;
623 while(k<(p-s)) {
624 actualBytes=(actualBytes<<8)|s[k++];
625 }
626 }
627 if(value!=values[i]) {
628 log_err("error: wrong value from UCPTRIE_FAST_U8_NEXT(%s)(from %d %lx->U+%04lx) (read %d bytes): "
629 "0x%lx instead of 0x%lx (from bytes %lx)\n",
630 testName, (int)prev8, (unsigned long)actualBytes, (long)c, (int)((p-s)-prev8),
631 (long)value, (long)values[i], (unsigned long)expectedBytes);
632 }
633 if(i8!=(p-s)) {
634 log_err("error: wrong end index from UCPTRIE_FAST_U8_NEXT(%s)(from %d %lx->U+%04lx): "
635 "%ld != %ld (bytes %lx)\n",
636 testName, (int)prev8, (unsigned long)actualBytes, (long)c,
637 (long)(p-s), (long)i8, (unsigned long)expectedBytes);
638 break;
639 }
640 ++i;
641 }
642
643 /* try backward */
644 p=limit;
645 i=countValues;
646 while(s<p) {
647 --i;
648 prev8=i8=(int32_t)(p-s);
649 U8_PREV(s, 0, i8, c);
650 if(valueWidth==UCPTRIE_VALUE_BITS_16) {
651 UCPTRIE_FAST_U8_PREV(trie, UCPTRIE_16, s, p, value);
652 } else if(valueWidth==UCPTRIE_VALUE_BITS_32) {
653 UCPTRIE_FAST_U8_PREV(trie, UCPTRIE_32, s, p, value);
654 } else {
655 UCPTRIE_FAST_U8_PREV(trie, UCPTRIE_8, s, p, value);
656 }
657 expectedBytes=0;
658 if(value!=values[i] || i8!=(p-s)) {
659 int32_t k=i8;
660 while(k<prev8) {
661 expectedBytes=(expectedBytes<<8)|s[k++];
662 }
663 }
664 if(i8==(p-s)) {
665 actualBytes=expectedBytes;
666 } else {
667 actualBytes=0;
668 int32_t k=(int32_t)(p-s);
669 while(k<prev8) {
670 actualBytes=(actualBytes<<8)|s[k++];
671 }
672 }
673 if(value!=values[i]) {
674 log_err("error: wrong value from UCPTRIE_FAST_U8_PREV(%s)(from %d %lx->U+%04lx) (read %d bytes): "
675 "0x%lx instead of 0x%lx (from bytes %lx)\n",
676 testName, (int)prev8, (unsigned long)actualBytes, (long)c, (int)(prev8-(p-s)),
677 (long)value, (long)values[i], (unsigned long)expectedBytes);
678 }
679 if(i8!=(p-s)) {
680 log_err("error: wrong end index from UCPTRIE_FAST_U8_PREV(%s)(from %d %lx->U+%04lx): "
681 "%ld != %ld (bytes %lx)\n",
682 testName, (int)prev8, (unsigned long)actualBytes, (long)c,
683 (long)(p-s), (long)i8, (unsigned long)expectedBytes);
684 break;
685 }
686 }
687}
688
689static void
690testTrie(const char *testName, const UCPTrie *trie,
691 UCPTrieType type, UCPTrieValueWidth valueWidth,
692 const CheckRange checkRanges[], int32_t countCheckRanges) {
693 testTrieGetters(testName, trie, type, valueWidth, checkRanges, countCheckRanges);
694 testTrieGetRanges(testName, trie, NULL, UCPMAP_RANGE_NORMAL, 0, checkRanges, countCheckRanges);
695 if (type == UCPTRIE_TYPE_FAST) {
696 testTrieUTF16(testName, trie, valueWidth, checkRanges, countCheckRanges);
697 testTrieUTF8(testName, trie, valueWidth, checkRanges, countCheckRanges);
698 }
699}
700
701static void
702testBuilder(const char *testName, const UMutableCPTrie *mutableTrie,
703 const CheckRange checkRanges[], int32_t countCheckRanges) {
704 testBuilderGetters(testName, mutableTrie, checkRanges, countCheckRanges);
705 testTrieGetRanges(testName, NULL, mutableTrie, UCPMAP_RANGE_NORMAL, 0, checkRanges, countCheckRanges);
706}
707
708static uint32_t storage[120000];
709static uint32_t swapped[120000];
710
711static void
712testTrieSerialize(const char *testName, UMutableCPTrie *mutableTrie,
713 UCPTrieType type, UCPTrieValueWidth valueWidth, UBool withSwap,
714 const CheckRange checkRanges[], int32_t countCheckRanges) {
715 UCPTrie *trie;
716 int32_t length1, length2, length3;
717 UErrorCode errorCode;
718
719 /* clone the trie so that the caller can reuse the original */
720 errorCode=U_ZERO_ERROR;
721 mutableTrie = umutablecptrie_clone(mutableTrie, &errorCode);
722 if(U_FAILURE(errorCode)) {
723 log_err("error: umutablecptrie_clone(%s) failed - %s\n",
724 testName, u_errorName(errorCode));
725 return;
726 }
727
728 /*
729 * This is not a loop, but simply a block that we can exit with "break"
730 * when something goes wrong.
731 */
732 do {
733 errorCode=U_ZERO_ERROR;
734 trie = umutablecptrie_buildImmutable(mutableTrie, type, valueWidth, &errorCode);
735 if (U_FAILURE(errorCode)) {
736 log_err("error: umutablecptrie_buildImmutable(%s) failed: %s\n",
737 testName, u_errorName(errorCode));
738 break;
739 }
740 errorCode=U_ZERO_ERROR;
741 length1=ucptrie_toBinary(trie, NULL, 0, &errorCode);
742 if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
743 log_err("error: ucptrie_toBinary(%s) preflighting set %s != U_BUFFER_OVERFLOW_ERROR\n",
744 testName, u_errorName(errorCode));
745 break;
746 }
747 errorCode=U_ZERO_ERROR;
748 length2=ucptrie_toBinary(trie, storage, sizeof(storage), &errorCode);
749 if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
750 log_err("error: ucptrie_toBinary(%s) needs more memory\n", testName);
751 break;
752 }
753 if(U_FAILURE(errorCode)) {
754 log_err("error: ucptrie_toBinary(%s) failed: %s\n", testName, u_errorName(errorCode));
755 break;
756 }
757 if(length1!=length2) {
758 log_err("error: trie serialization (%s) lengths different: "
759 "preflight vs. serialize\n", testName);
760 break;
761 }
762
763 testTrie(testName, trie, type, valueWidth, checkRanges, countCheckRanges);
764 ucptrie_close(trie);
765 trie=NULL;
766
767 if(withSwap) {
768 int32_t swappedLength;
769
770 UDataSwapper *ds;
771
772 /* swap to opposite-endian */
773 uprv_memset(swapped, 0x55, length2);
774 ds=udata_openSwapper(U_IS_BIG_ENDIAN, U_CHARSET_FAMILY,
775 !U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
776 swappedLength=ucptrie_swap(ds, storage, -1, NULL, &errorCode);
777 if(U_FAILURE(errorCode) || swappedLength!=length2) {
778 log_err("error: ucptrie_swap(%s to OE preflighting) failed (%s) "
779 "or before/after lengths different\n",
780 testName, u_errorName(errorCode));
781 udata_closeSwapper(ds);
782 break;
783 }
784 swappedLength=ucptrie_swap(ds, storage, length2, swapped, &errorCode);
785 udata_closeSwapper(ds);
786 if(U_FAILURE(errorCode) || swappedLength!=length2) {
787 log_err("error: ucptrie_swap(%s to OE) failed (%s) or before/after lengths different\n",
788 testName, u_errorName(errorCode));
789 break;
790 }
791
792 /* swap back to platform-endian */
793 uprv_memset(storage, 0xaa, length2);
794 ds=udata_openSwapper(!U_IS_BIG_ENDIAN, U_CHARSET_FAMILY,
795 U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
796 swappedLength=ucptrie_swap(ds, swapped, -1, NULL, &errorCode);
797 if(U_FAILURE(errorCode) || swappedLength!=length2) {
798 log_err("error: ucptrie_swap(%s to PE preflighting) failed (%s) "
799 "or before/after lengths different\n",
800 testName, u_errorName(errorCode));
801 udata_closeSwapper(ds);
802 break;
803 }
804 swappedLength=ucptrie_swap(ds, swapped, length2, storage, &errorCode);
805 udata_closeSwapper(ds);
806 if(U_FAILURE(errorCode) || swappedLength!=length2) {
807 log_err("error: ucptrie_swap(%s to PE) failed (%s) or before/after lengths different\n",
808 testName, u_errorName(errorCode));
809 break;
810 }
811 }
812
813 trie = ucptrie_openFromBinary(type, valueWidth, storage, length2, &length3, &errorCode);
814 if(U_FAILURE(errorCode)) {
815 log_err("error: ucptrie_openFromBinary(%s) failed, %s\n", testName, u_errorName(errorCode));
816 break;
817 }
818 if(type != ucptrie_getType(trie)) {
819 log_err("error: trie serialization (%s) did not preserve trie type\n", testName);
820 break;
821 }
822 if(valueWidth != ucptrie_getValueWidth(trie)) {
823 log_err("error: trie serialization (%s) did not preserve data value width\n", testName);
824 break;
825 }
826 if(length2!=length3) {
827 log_err("error: trie serialization (%s) lengths different: "
828 "serialize vs. unserialize\n", testName);
829 break;
830 }
831 /* overwrite the storage that is not supposed to be needed */
832 uprv_memset((char *)storage+length3, 0xfa, (int32_t)(sizeof(storage)-length3));
833
834 {
835 errorCode=U_ZERO_ERROR;
836 UCPTrie *any = ucptrie_openFromBinary(UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY,
837 storage, length3, NULL, &errorCode);
838 if (U_SUCCESS(errorCode)) {
839 if (type != ucptrie_getType(any)) {
840 log_err("error: ucptrie_openFromBinary("
841 "UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY).getType() wrong\n");
842 }
843 if (valueWidth != ucptrie_getValueWidth(any)) {
844 log_err("error: ucptrie_openFromBinary("
845 "UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY).getValueWidth() wrong\n");
846 }
847 ucptrie_close(any);
848 } else {
849 log_err("error: ucptrie_openFromBinary("
850 "UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY) failed - %s\n",
851 u_errorName(errorCode));
852 }
853 }
854
855 errorCode=U_ZERO_ERROR;
856 testTrie(testName, trie, type, valueWidth, checkRanges, countCheckRanges);
857 {
858 /* make a mutable trie from an immutable one */
859 uint32_t value, value2;
860 UMutableCPTrie *mutable2 = umutablecptrie_fromUCPTrie(trie, &errorCode);
861 if(U_FAILURE(errorCode)) {
862 log_err("error: umutablecptrie_fromUCPTrie(unserialized %s) failed - %s\n",
863 testName, u_errorName(errorCode));
864 break;
865 }
866
867 value=umutablecptrie_get(mutable2, 0xa1);
868 umutablecptrie_set(mutable2, 0xa1, 789, &errorCode);
869 value2=umutablecptrie_get(mutable2, 0xa1);
870 umutablecptrie_set(mutable2, 0xa1, value, &errorCode);
871 if(U_FAILURE(errorCode) || value2!=789) {
872 log_err("error: modifying a mutableTrie-from-UCPTrie (%s) failed - %s\n",
873 testName, u_errorName(errorCode));
874 }
875 testBuilder(testName, mutable2, checkRanges, countCheckRanges);
876 umutablecptrie_close(mutable2);
877 }
878 } while(0);
879
880 umutablecptrie_close(mutableTrie);
881 ucptrie_close(trie);
882}
883
884static UMutableCPTrie *
885testTrieSerializeAllValueWidth(const char *testName,
886 UMutableCPTrie *mutableTrie, UBool withClone,
887 const CheckRange checkRanges[], int32_t countCheckRanges) {
888 char name[40];
889 uint32_t oredValues = 0;
890 int32_t i;
891 for (i = 0; i < countCheckRanges; ++i) {
892 oredValues |= checkRanges[i].value;
893 }
894
895 testBuilder(testName, mutableTrie, checkRanges, countCheckRanges);
896
897 if (oredValues <= 0xffff) {
898 uprv_strcpy(name, testName);
899 uprv_strcat(name, ".16");
900 testTrieSerialize(name, mutableTrie,
901 UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16, withClone,
902 checkRanges, countCheckRanges);
903 }
904
905 uprv_strcpy(name, testName);
906 uprv_strcat(name, ".32");
907 testTrieSerialize(name, mutableTrie,
908 UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_32, withClone,
909 checkRanges, countCheckRanges);
910
911 if (oredValues <= 0xff) {
912 uprv_strcpy(name, testName);
913 uprv_strcat(name, ".8");
914 testTrieSerialize(name, mutableTrie,
915 UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_8, withClone,
916 checkRanges, countCheckRanges);
917 }
918
919 if (oredValues <= 0xffff) {
920 uprv_strcpy(name, testName);
921 uprv_strcat(name, ".small16");
922 testTrieSerialize(name, mutableTrie,
923 UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_16, withClone,
924 checkRanges, countCheckRanges);
925 }
926
927 return mutableTrie;
928}
929
930static UMutableCPTrie *
931makeTrieWithRanges(const char *testName, UBool withClone,
932 const SetRange setRanges[], int32_t countSetRanges,
933 const CheckRange checkRanges[], int32_t countCheckRanges) {
934 UMutableCPTrie *mutableTrie;
935 uint32_t initialValue, errorValue;
936 uint32_t value;
937 UChar32 start, limit;
938 int32_t i;
939 UErrorCode errorCode;
940
941 log_verbose("\ntesting Trie '%s'\n", testName);
942 errorCode=U_ZERO_ERROR;
943 getSpecialValues(checkRanges, countCheckRanges, &initialValue, &errorValue);
944 mutableTrie = umutablecptrie_open(initialValue, errorValue, &errorCode);
945 if(U_FAILURE(errorCode)) {
946 log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode));
947 return NULL;
948 }
949
950 /* set values from setRanges[] */
951 for(i=0; i<countSetRanges; ++i) {
952 if(withClone && i==countSetRanges/2) {
953 /* switch to a clone in the middle of setting values */
954 UMutableCPTrie *clone = umutablecptrie_clone(mutableTrie, &errorCode);
955 if(U_FAILURE(errorCode)) {
956 log_err("error: umutablecptrie_clone(%s) failed - %s\n",
957 testName, u_errorName(errorCode));
958 errorCode=U_ZERO_ERROR; /* continue with the original */
959 } else {
960 umutablecptrie_close(mutableTrie);
961 mutableTrie = clone;
962 }
963 }
964 start=setRanges[i].start;
965 limit=setRanges[i].limit;
966 value=setRanges[i].value;
967 if ((limit - start) == 1) {
968 umutablecptrie_set(mutableTrie, start, value, &errorCode);
969 } else {
970 umutablecptrie_setRange(mutableTrie, start, limit-1, value, &errorCode);
971 }
972 }
973
974 if(U_SUCCESS(errorCode)) {
975 return mutableTrie;
976 } else {
977 log_err("error: setting values into a mutable trie (%s) failed - %s\n",
978 testName, u_errorName(errorCode));
979 umutablecptrie_close(mutableTrie);
980 return NULL;
981 }
982}
983
984static void
985testTrieRanges(const char *testName, UBool withClone,
986 const SetRange setRanges[], int32_t countSetRanges,
987 const CheckRange checkRanges[], int32_t countCheckRanges) {
988 UMutableCPTrie *mutableTrie = makeTrieWithRanges(
989 testName, withClone, setRanges, countSetRanges, checkRanges, countCheckRanges);
990 if (mutableTrie != NULL) {
991 mutableTrie = testTrieSerializeAllValueWidth(testName, mutableTrie, withClone,
992 checkRanges, countCheckRanges);
993 umutablecptrie_close(mutableTrie);
994 }
995}
996
997/* test data ----------------------------------------------------------------*/
998
999/* set consecutive ranges, even with value 0 */
1000static const SetRange
1001setRanges1[]={
1002 { 0, 0x40, 0 },
1003 { 0x40, 0xe7, 0x34 },
1004 { 0xe7, 0x3400, 0 },
1005 { 0x3400, 0x9fa6, 0x61 },
1006 { 0x9fa6, 0xda9e, 0x31 },
1007 { 0xdada, 0xeeee, 0xff },
1008 { 0xeeee, 0x11111, 1 },
1009 { 0x11111, 0x44444, 0x61 },
1010 { 0x44444, 0x60003, 0 },
1011 { 0xf0003, 0xf0004, 0xf },
1012 { 0xf0004, 0xf0006, 0x10 },
1013 { 0xf0006, 0xf0007, 0x11 },
1014 { 0xf0007, 0xf0040, 0x12 },
1015 { 0xf0040, 0x110000, 0 }
1016};
1017
1018static const CheckRange
1019checkRanges1[]={
1020 { 0, 0 },
1021 { 0x40, 0 },
1022 { 0xe7, 0x34 },
1023 { 0x3400, 0 },
1024 { 0x9fa6, 0x61 },
1025 { 0xda9e, 0x31 },
1026 { 0xdada, 0 },
1027 { 0xeeee, 0xff },
1028 { 0x11111, 1 },
1029 { 0x44444, 0x61 },
1030 { 0xf0003, 0 },
1031 { 0xf0004, 0xf },
1032 { 0xf0006, 0x10 },
1033 { 0xf0007, 0x11 },
1034 { 0xf0040, 0x12 },
1035 { 0x110000, 0 }
1036};
1037
1038/* set some interesting overlapping ranges */
1039static const SetRange
1040setRanges2[]={
1041 { 0x21, 0x7f, 0x5555 },
1042 { 0x2f800, 0x2fedc, 0x7a },
1043 { 0x72, 0xdd, 3 },
1044 { 0xdd, 0xde, 4 },
1045 { 0x201, 0x240, 6 }, /* 3 consecutive blocks with the same pattern but */
1046 { 0x241, 0x280, 6 }, /* discontiguous value ranges, testing iteration */
1047 { 0x281, 0x2c0, 6 },
1048 { 0x2f987, 0x2fa98, 5 },
1049 { 0x2f777, 0x2f883, 0 },
1050 { 0x2fedc, 0x2ffaa, 1 },
1051 { 0x2ffaa, 0x2ffab, 2 },
1052 { 0x2ffbb, 0x2ffc0, 7 }
1053};
1054
1055static const CheckRange
1056checkRanges2[]={
1057 { 0, 0 },
1058 { 0x21, 0 },
1059 { 0x72, 0x5555 },
1060 { 0xdd, 3 },
1061 { 0xde, 4 },
1062 { 0x201, 0 },
1063 { 0x240, 6 },
1064 { 0x241, 0 },
1065 { 0x280, 6 },
1066 { 0x281, 0 },
1067 { 0x2c0, 6 },
1068 { 0x2f883, 0 },
1069 { 0x2f987, 0x7a },
1070 { 0x2fa98, 5 },
1071 { 0x2fedc, 0x7a },
1072 { 0x2ffaa, 1 },
1073 { 0x2ffab, 2 },
1074 { 0x2ffbb, 0 },
1075 { 0x2ffc0, 7 },
1076 { 0x110000, 0 }
1077};
1078
1079/* use a non-zero initial value */
1080static const SetRange
1081setRanges3[]={
1082 { 0x31, 0xa4, 1 },
1083 { 0x3400, 0x6789, 2 },
1084 { 0x8000, 0x89ab, 9 },
1085 { 0x9000, 0xa000, 4 },
1086 { 0xabcd, 0xbcde, 3 },
1087 { 0x55555, 0x110000, 6 }, /* highStart<U+ffff with non-initialValue */
1088 { 0xcccc, 0x55555, 6 }
1089};
1090
1091static const CheckRange
1092checkRanges3[]={
1093 { 0, 9 }, /* non-zero initialValue */
1094 { 0x31, 9 },
1095 { 0xa4, 1 },
1096 { 0x3400, 9 },
1097 { 0x6789, 2 },
1098 { 0x9000, 9 },
1099 { 0xa000, 4 },
1100 { 0xabcd, 9 },
1101 { 0xbcde, 3 },
1102 { 0xcccc, 9 },
1103 { 0x110000, 6 }
1104};
1105
1106/* empty or single-value tries, testing highStart==0 */
1107static const SetRange
1108setRangesEmpty[]={
1109 { 0, 0, 0 }, /* need some values for it to compile */
1110};
1111
1112static const CheckRange
1113checkRangesEmpty[]={
1114 { 0, 3 },
1115 { 0x110000, 3 }
1116};
1117
1118static const SetRange
1119setRangesSingleValue[]={
1120 { 0, 0x110000, 5 },
1121};
1122
1123static const CheckRange
1124checkRangesSingleValue[]={
1125 { 0, 3 },
1126 { 0x110000, 5 }
1127};
1128
1129static void
1130TrieTestSet1(void) {
1131 testTrieRanges("set1", FALSE,
1132 setRanges1, UPRV_LENGTHOF(setRanges1),
1133 checkRanges1, UPRV_LENGTHOF(checkRanges1));
1134}
1135
1136static void
1137TrieTestSet2Overlap(void) {
1138 testTrieRanges("set2-overlap", FALSE,
1139 setRanges2, UPRV_LENGTHOF(setRanges2),
1140 checkRanges2, UPRV_LENGTHOF(checkRanges2));
1141}
1142
1143static void
1144TrieTestSet3Initial9(void) {
1145 testTrieRanges("set3-initial-9", FALSE,
1146 setRanges3, UPRV_LENGTHOF(setRanges3),
1147 checkRanges3, UPRV_LENGTHOF(checkRanges3));
1148}
1149
1150static void
1151TrieTestSetEmpty(void) {
1152 testTrieRanges("set-empty", FALSE,
1153 setRangesEmpty, 0,
1154 checkRangesEmpty, UPRV_LENGTHOF(checkRangesEmpty));
1155}
1156
1157static void
1158TrieTestSetSingleValue(void) {
1159 testTrieRanges("set-single-value", FALSE,
1160 setRangesSingleValue, UPRV_LENGTHOF(setRangesSingleValue),
1161 checkRangesSingleValue, UPRV_LENGTHOF(checkRangesSingleValue));
1162}
1163
1164static void
1165TrieTestSet2OverlapWithClone(void) {
1166 testTrieRanges("set2-overlap.withClone", TRUE,
1167 setRanges2, UPRV_LENGTHOF(setRanges2),
1168 checkRanges2, UPRV_LENGTHOF(checkRanges2));
1169}
1170
1171/* test mutable-trie memory management -------------------------------------- */
1172
1173static void
1174FreeBlocksTest(void) {
1175 static const CheckRange
1176 checkRanges[]={
1177 { 0, 1 },
1178 { 0x740, 1 },
1179 { 0x780, 2 },
1180 { 0x880, 3 },
1181 { 0x110000, 1 }
1182 };
1183 static const char *const testName="free-blocks";
1184
1185 UMutableCPTrie *mutableTrie;
1186 int32_t i;
1187 UErrorCode errorCode;
1188
1189 errorCode=U_ZERO_ERROR;
1190 mutableTrie=umutablecptrie_open(1, 0xad, &errorCode);
1191 if(U_FAILURE(errorCode)) {
1192 log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode));
1193 return;
1194 }
1195
1196 /*
1197 * Repeatedly set overlapping same-value ranges to stress the free-data-block management.
1198 * If it fails, it will overflow the data array.
1199 */
1200 for(i=0; i<(0x120000>>4)/2; ++i) { // 4=UCPTRIE_SHIFT_3
1201 umutablecptrie_setRange(mutableTrie, 0x740, 0x840-1, 1, &errorCode);
1202 umutablecptrie_setRange(mutableTrie, 0x780, 0x880-1, 1, &errorCode);
1203 umutablecptrie_setRange(mutableTrie, 0x740, 0x840-1, 2, &errorCode);
1204 umutablecptrie_setRange(mutableTrie, 0x780, 0x880-1, 3, &errorCode);
1205 }
1206 /* make blocks that will be free during compaction */
1207 umutablecptrie_setRange(mutableTrie, 0x1000, 0x3000-1, 2, &errorCode);
1208 umutablecptrie_setRange(mutableTrie, 0x2000, 0x4000-1, 3, &errorCode);
1209 umutablecptrie_setRange(mutableTrie, 0x1000, 0x4000-1, 1, &errorCode);
1210 if(U_FAILURE(errorCode)) {
1211 log_err("error: setting lots of ranges into a mutable trie (%s) failed - %s\n",
1212 testName, u_errorName(errorCode));
1213 umutablecptrie_close(mutableTrie);
1214 return;
1215 }
1216
1217 mutableTrie = testTrieSerializeAllValueWidth(testName, mutableTrie, FALSE,
1218 checkRanges, UPRV_LENGTHOF(checkRanges));
1219 umutablecptrie_close(mutableTrie);
1220}
1221
1222static void
1223GrowDataArrayTest(void) {
1224 static const CheckRange
1225 checkRanges[]={
1226 { 0, 1 },
1227 { 0x720, 2 },
1228 { 0x7a0, 3 },
1229 { 0x8a0, 4 },
1230 { 0x110000, 5 }
1231 };
1232 static const char *const testName="grow-data";
1233
1234 UMutableCPTrie *mutableTrie;
1235 int32_t i;
1236 UErrorCode errorCode;
1237
1238 errorCode=U_ZERO_ERROR;
1239 mutableTrie=umutablecptrie_open(1, 0xad, &errorCode);
1240 if(U_FAILURE(errorCode)) {
1241 log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode));
1242 return;
1243 }
1244
1245 /*
1246 * Use umutablecptrie_set() not umutablecptrie_setRange() to write non-initialValue-data.
1247 * Should grow/reallocate the data array to a sufficient length.
1248 */
1249 for(i=0; i<0x1000; ++i) {
1250 umutablecptrie_set(mutableTrie, i, 2, &errorCode);
1251 }
1252 for(i=0x720; i<0x1100; ++i) { /* some overlap */
1253 umutablecptrie_set(mutableTrie, i, 3, &errorCode);
1254 }
1255 for(i=0x7a0; i<0x900; ++i) {
1256 umutablecptrie_set(mutableTrie, i, 4, &errorCode);
1257 }
1258 for(i=0x8a0; i<0x110000; ++i) {
1259 umutablecptrie_set(mutableTrie, i, 5, &errorCode);
1260 }
1261 if(U_FAILURE(errorCode)) {
1262 log_err("error: setting lots of values into a mutable trie (%s) failed - %s\n",
1263 testName, u_errorName(errorCode));
1264 umutablecptrie_close(mutableTrie);
1265 return;
1266 }
1267
1268 mutableTrie = testTrieSerializeAllValueWidth(testName, mutableTrie, FALSE,
1269 checkRanges, UPRV_LENGTHOF(checkRanges));
1270 umutablecptrie_close(mutableTrie);
1271}
1272
1273static void
1274ManyAllSameBlocksTest(void) {
1275 static const char *const testName="many-all-same";
1276
1277 UMutableCPTrie *mutableTrie;
1278 int32_t i;
1279 UErrorCode errorCode;
1280 CheckRange checkRanges[(0x110000 >> 12) + 1];
1281
1282 errorCode = U_ZERO_ERROR;
1283 mutableTrie = umutablecptrie_open(0xff33, 0xad, &errorCode);
1284 if (U_FAILURE(errorCode)) {
1285 log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode));
1286 return;
1287 }
1288 checkRanges[0].limit = 0;
1289 checkRanges[0].value = 0xff33; // initialValue
1290
1291 // Many all-same-value blocks.
1292 for (i = 0; i < 0x110000; i += 0x1000) {
1293 uint32_t value = i >> 12;
1294 umutablecptrie_setRange(mutableTrie, i, i + 0xfff, value, &errorCode);
1295 checkRanges[value + 1].limit = i + 0x1000;
1296 checkRanges[value + 1].value = value;
1297 }
1298 for (i = 0; i < 0x110000; i += 0x1000) {
1299 uint32_t expected = i >> 12;
1300 uint32_t v0 = umutablecptrie_get(mutableTrie, i);
1301 uint32_t vfff = umutablecptrie_get(mutableTrie, i + 0xfff);
1302 if (v0 != expected || vfff != expected) {
1303 log_err("error: UMutableCPTrie U+%04lx unexpected value\n", (long)i);
1304 }
1305 }
1306
1307 mutableTrie = testTrieSerializeAllValueWidth(testName, mutableTrie, FALSE,
1308 checkRanges, UPRV_LENGTHOF(checkRanges));
1309 umutablecptrie_close(mutableTrie);
1310}
1311
1312static void
1313MuchDataTest(void) {
1314 static const char *const testName="much-data";
1315
1316 UMutableCPTrie *mutableTrie;
1317 int32_t r, c;
1318 UErrorCode errorCode = U_ZERO_ERROR;
1319 CheckRange checkRanges[(0x10000 >> 6) + (0x10240 >> 4) + 10];
1320
1321 mutableTrie = umutablecptrie_open(0xff33, 0xad, &errorCode);
1322 if (U_FAILURE(errorCode)) {
1323 log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode));
1324 return;
1325 }
1326 checkRanges[0].limit = 0;
1327 checkRanges[0].value = 0xff33; // initialValue
1328 r = 1;
1329
1330 // Add much data that does not compact well,
1331 // to get more than 128k data values after compaction.
1332 for (c = 0; c < 0x10000; c += 0x40) {
1333 uint32_t value = c >> 4;
1334 umutablecptrie_setRange(mutableTrie, c, c + 0x3f, value, &errorCode);
1335 checkRanges[r].limit = c + 0x40;
1336 checkRanges[r++].value = value;
1337 }
1338 checkRanges[r].limit = 0x20000;
1339 checkRanges[r++].value = 0xff33;
1340 for (c = 0x20000; c < 0x30230; c += 0x10) {
1341 uint32_t value = c >> 4;
1342 umutablecptrie_setRange(mutableTrie, c, c + 0xf, value, &errorCode);
1343 checkRanges[r].limit = c + 0x10;
1344 checkRanges[r++].value = value;
1345 }
1346 umutablecptrie_setRange(mutableTrie, 0x30230, 0x30233, 0x3023, &errorCode);
1347 checkRanges[r].limit = 0x30234;
1348 checkRanges[r++].value = 0x3023;
1349 umutablecptrie_setRange(mutableTrie, 0x30234, 0xdffff, 0x5005, &errorCode);
1350 checkRanges[r].limit = 0xe0000;
1351 checkRanges[r++].value = 0x5005;
1352 umutablecptrie_setRange(mutableTrie, 0xe0000, 0x10ffff, 0x9009, &errorCode);
1353 checkRanges[r].limit = 0x110000;
1354 checkRanges[r++].value = 0x9009;
1355 if (U_FAILURE(errorCode)) {
1356 log_err("error: setting lots of values into a mutable trie (%s) failed - %s\n",
1357 testName, u_errorName(errorCode));
1358 umutablecptrie_close(mutableTrie);
1359 return;
1360 }
1361 U_ASSERT(r <= UPRV_LENGTHOF(checkRanges));
1362
1363 testBuilder(testName, mutableTrie, checkRanges, r);
1364 testTrieSerialize("much-data.16", mutableTrie,
1365 UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16, FALSE, checkRanges, r);
1366 umutablecptrie_close(mutableTrie);
1367}
1368
1369static void testGetRangesFixedSurr(const char *testName, const UMutableCPTrie *mutableTrie,
1370 UCPMapRangeOption option,
1371 const CheckRange checkRanges[], int32_t countCheckRanges) {
1372 testTrieGetRanges(testName, NULL, mutableTrie, option, 5, checkRanges, countCheckRanges);
1373 UErrorCode errorCode = U_ZERO_ERROR;
1374 UMutableCPTrie *clone = umutablecptrie_clone(mutableTrie, &errorCode);
1375 UCPTrie *trie;
1376 if (U_FAILURE(errorCode)) {
1377 log_err("error: umutablecptrie_clone(%s) failed: %s\n", testName, u_errorName(errorCode));
1378 return;
1379 }
1380 trie = umutablecptrie_buildImmutable(clone, UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16, &errorCode);
1381 umutablecptrie_close(clone);
1382 if (U_FAILURE(errorCode)) {
1383 log_err("error: umutablecptrie_buildImmutable(%s) failed: %s\n", testName, u_errorName(errorCode));
1384 return;
1385 }
1386 testTrieGetRanges(testName, trie, NULL, option, 5, checkRanges, countCheckRanges);
1387 ucptrie_close(trie);
1388}
1389
1390static void
1391TrieTestGetRangesFixedSurr(void) {
1392 static const SetRange
1393 setRangesFixedSurr[]={
1394 { 0xd000, 0xd7ff, 5 },
1395 { 0xd7ff, 0xe001, 3 },
1396 { 0xe001, 0xf900, 5 },
1397 };
1398
1399 static const CheckRange
1400 checkRangesFixedLeadSurr1[]={
1401 { 0, 0 },
1402 { 0xd000, 0 },
1403 { 0xd7ff, 5 },
1404 { 0xd800, 3 },
1405 { 0xdc00, 5 },
1406 { 0xe001, 3 },
1407 { 0xf900, 5 },
1408 { 0x110000, 0 }
1409 };
1410
1411 static const CheckRange
1412 checkRangesFixedAllSurr1[]={
1413 { 0, 0 },
1414 { 0xd000, 0 },
1415 { 0xd7ff, 5 },
1416 { 0xd800, 3 },
1417 { 0xe000, 5 },
1418 { 0xe001, 3 },
1419 { 0xf900, 5 },
1420 { 0x110000, 0 }
1421 };
1422
1423 static const CheckRange
1424 checkRangesFixedLeadSurr3[]={
1425 { 0, 0 },
1426 { 0xd000, 0 },
1427 { 0xdc00, 5 },
1428 { 0xe001, 3 },
1429 { 0xf900, 5 },
1430 { 0x110000, 0 }
1431 };
1432
1433 static const CheckRange
1434 checkRangesFixedAllSurr3[]={
1435 { 0, 0 },
1436 { 0xd000, 0 },
1437 { 0xe000, 5 },
1438 { 0xe001, 3 },
1439 { 0xf900, 5 },
1440 { 0x110000, 0 }
1441 };
1442
1443 static const CheckRange
1444 checkRangesFixedSurr4[]={
1445 { 0, 0 },
1446 { 0xd000, 0 },
1447 { 0xf900, 5 },
1448 { 0x110000, 0 }
1449 };
1450
1451 UMutableCPTrie *mutableTrie = makeTrieWithRanges(
1452 "fixedSurr", FALSE, setRangesFixedSurr, UPRV_LENGTHOF(setRangesFixedSurr),
1453 checkRangesFixedLeadSurr1, UPRV_LENGTHOF(checkRangesFixedLeadSurr1));
1454 UErrorCode errorCode = U_ZERO_ERROR;
1455 if (mutableTrie == NULL) {
1456 return;
1457 }
1458 testGetRangesFixedSurr("fixedLeadSurr1", mutableTrie, UCPMAP_RANGE_FIXED_LEAD_SURROGATES,
1459 checkRangesFixedLeadSurr1, UPRV_LENGTHOF(checkRangesFixedLeadSurr1));
1460 testGetRangesFixedSurr("fixedAllSurr1", mutableTrie, UCPMAP_RANGE_FIXED_ALL_SURROGATES,
1461 checkRangesFixedAllSurr1, UPRV_LENGTHOF(checkRangesFixedAllSurr1));
1462 // Setting a range in the middle of lead surrogates makes no difference.
1463 umutablecptrie_setRange(mutableTrie, 0xd844, 0xd899, 5, &errorCode);
1464 if (U_FAILURE(errorCode)) {
1465 log_err("error: umutablecptrie_setRange(fixedSurr2) failed: %s\n", u_errorName(errorCode));
1466 umutablecptrie_close(mutableTrie);
1467 return;
1468 }
1469 testGetRangesFixedSurr("fixedLeadSurr2", mutableTrie, UCPMAP_RANGE_FIXED_LEAD_SURROGATES,
1470 checkRangesFixedLeadSurr1, UPRV_LENGTHOF(checkRangesFixedLeadSurr1));
1471 // Bridge the gap before the lead surrogates.
1472 umutablecptrie_set(mutableTrie, 0xd7ff, 5, &errorCode);
1473 if (U_FAILURE(errorCode)) {
1474 log_err("error: umutablecptrie_set(fixedSurr3) failed: %s\n", u_errorName(errorCode));
1475 umutablecptrie_close(mutableTrie);
1476 return;
1477 }
1478 testGetRangesFixedSurr("fixedLeadSurr3", mutableTrie, UCPMAP_RANGE_FIXED_LEAD_SURROGATES,
1479 checkRangesFixedLeadSurr3, UPRV_LENGTHOF(checkRangesFixedLeadSurr3));
1480 testGetRangesFixedSurr("fixedAllSurr3", mutableTrie, UCPMAP_RANGE_FIXED_ALL_SURROGATES,
1481 checkRangesFixedAllSurr3, UPRV_LENGTHOF(checkRangesFixedAllSurr3));
1482 // Bridge the gap after the trail surrogates.
1483 umutablecptrie_set(mutableTrie, 0xe000, 5, &errorCode);
1484 if (U_FAILURE(errorCode)) {
1485 log_err("error: umutablecptrie_set(fixedSurr4) failed: %s\n", u_errorName(errorCode));
1486 umutablecptrie_close(mutableTrie);
1487 return;
1488 }
1489 testGetRangesFixedSurr("fixedSurr4", mutableTrie, UCPMAP_RANGE_FIXED_ALL_SURROGATES,
1490 checkRangesFixedSurr4, UPRV_LENGTHOF(checkRangesFixedSurr4));
1491 umutablecptrie_close(mutableTrie);
1492}
1493
1494static void TestSmallNullBlockMatchesFast(void) {
1495 // The initial builder+getRange code had a bug:
1496 // When there is no null data block in the fast-index range,
1497 // but a fast-range data block starts with enough values to match a small data block,
1498 // then getRange() got confused.
1499 // The builder must prevent this.
1500 static const SetRange setRanges[] = {
1501 { 0, 0x880, 1 },
1502 // U+0880..U+088F map to initial value 0, potential match for small null data block.
1503 { 0x890, 0x1040, 2 },
1504 // U+1040..U+1050 map to 0.
1505 // First small null data block in a small-type trie.
1506 // In a fast-type trie, it is ok to match a small null data block at U+1041
1507 // but not at U+1040.
1508 { 0x1051, 0x10000, 3 },
1509 // No fast data block (block length 64) filled with 0 regardless of trie type.
1510 // Need more blocks filled with 0 than the largest range above,
1511 // and need a highStart above that so that it actually counts.
1512 { 0x20000, 0x110000, 9 }
1513 };
1514
1515 static const CheckRange checkRanges[] = {
1516 { 0x0880, 1 },
1517 { 0x0890, 0 },
1518 { 0x1040, 2 },
1519 { 0x1051, 0 },
1520 { 0x10000, 3 },
1521 { 0x20000, 0 },
1522 { 0x110000, 9 }
1523 };
1524
1525 testTrieRanges("small0-in-fast", FALSE,
1526 setRanges, UPRV_LENGTHOF(setRanges),
1527 checkRanges, UPRV_LENGTHOF(checkRanges));
1528}
1529
1530static void ShortAllSameBlocksTest(void) {
1531 static const char *const testName = "short-all-same";
1532 // Many all-same-value blocks but only of the small block length used in the mutable trie.
1533 // The builder code needs to turn a group of short ALL_SAME blocks below fastLimit
1534 // into a MIXED block, and reserve data array capacity for that.
1535 UErrorCode errorCode = U_ZERO_ERROR;
1536 UMutableCPTrie *mutableTrie = umutablecptrie_open(0, 0xad, &errorCode);
1537 CheckRange checkRanges[0x101];
1538 int32_t i;
1539 if (U_FAILURE(errorCode)) {
1540 log_err("error: umutablecptrie_open(%s) failed: %s\n", testName, u_errorName(errorCode));
1541 return;
1542 }
1543 for (i = 0; i < 0x1000; i += 0x10) {
1544 uint32_t value = i >> 4;
1545 umutablecptrie_setRange(mutableTrie, i, i + 0xf, value, &errorCode);
1546 checkRanges[value].limit = i + 0x10;
1547 checkRanges[value].value = value;
1548 }
1549 checkRanges[0x100].limit = 0x110000;
1550 checkRanges[0x100].value = 0;
1551 if (U_FAILURE(errorCode)) {
1552 log_err("error: setting values into a mutable trie (%s) failed - %s\n",
1553 testName, u_errorName(errorCode));
1554 umutablecptrie_close(mutableTrie);
1555 return;
1556 }
1557
1558 mutableTrie = testTrieSerializeAllValueWidth(testName, mutableTrie, FALSE,
1559 checkRanges, UPRV_LENGTHOF(checkRanges));
1560 umutablecptrie_close(mutableTrie);
1561}
1562
1563void
1564addUCPTrieTest(TestNode** root) {
1565 addTest(root, &TrieTestSet1, "tsutil/ucptrietest/TrieTestSet1");
1566 addTest(root, &TrieTestSet2Overlap, "tsutil/ucptrietest/TrieTestSet2Overlap");
1567 addTest(root, &TrieTestSet3Initial9, "tsutil/ucptrietest/TrieTestSet3Initial9");
1568 addTest(root, &TrieTestSetEmpty, "tsutil/ucptrietest/TrieTestSetEmpty");
1569 addTest(root, &TrieTestSetSingleValue, "tsutil/ucptrietest/TrieTestSetSingleValue");
1570 addTest(root, &TrieTestSet2OverlapWithClone, "tsutil/ucptrietest/TrieTestSet2OverlapWithClone");
1571 addTest(root, &FreeBlocksTest, "tsutil/ucptrietest/FreeBlocksTest");
1572 addTest(root, &GrowDataArrayTest, "tsutil/ucptrietest/GrowDataArrayTest");
1573 addTest(root, &ManyAllSameBlocksTest, "tsutil/ucptrietest/ManyAllSameBlocksTest");
1574 addTest(root, &MuchDataTest, "tsutil/ucptrietest/MuchDataTest");
1575 addTest(root, &TrieTestGetRangesFixedSurr, "tsutil/ucptrietest/TrieTestGetRangesFixedSurr");
1576 addTest(root, &TestSmallNullBlockMatchesFast, "tsutil/ucptrietest/TestSmallNullBlockMatchesFast");
1577 addTest(root, &ShortAllSameBlocksTest, "tsutil/ucptrietest/ShortAllSameBlocksTest");
1578}