]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ********************************************************************** | |
374ca955 | 3 | * Copyright (c) 2002-2004, International Business Machines |
b75a7d8f A |
4 | * Corporation and others. All Rights Reserved. |
5 | ********************************************************************** | |
b75a7d8f A |
6 | */ |
7 | #include "unicode/uset.h" | |
8 | #include "unicode/ustring.h" | |
9 | #include "cintltst.h" | |
10 | #include <stdlib.h> | |
11 | #include <string.h> | |
12 | ||
13 | #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
14 | ||
15 | #define TEST(x) addTest(root, &x, "uset/" # x) | |
16 | ||
17 | static void TestAPI(void); | |
18 | static void Testj2269(void); | |
19 | static void TestSerialized(void); | |
374ca955 | 20 | static void TestNonInvariantPattern(void); |
b75a7d8f A |
21 | |
22 | void addUSetTest(TestNode** root); | |
23 | ||
24 | static void expect(const USet* set, | |
25 | const char* inList, | |
26 | const char* outList, | |
27 | UErrorCode* ec); | |
28 | static void expectContainment(const USet* set, | |
29 | const char* list, | |
30 | UBool isIn); | |
31 | static char oneUCharToChar(UChar32 c); | |
32 | static void expectItems(const USet* set, | |
33 | const char* items); | |
34 | ||
35 | void | |
36 | addUSetTest(TestNode** root) { | |
37 | TEST(TestAPI); | |
38 | TEST(Testj2269); | |
39 | TEST(TestSerialized); | |
374ca955 | 40 | TEST(TestNonInvariantPattern); |
b75a7d8f A |
41 | } |
42 | ||
43 | /*------------------------------------------------------------------ | |
44 | * Tests | |
45 | *------------------------------------------------------------------*/ | |
46 | ||
47 | static void Testj2269() { | |
48 | UErrorCode status = U_ZERO_ERROR; | |
49 | UChar a[4] = { 0x61, 0x62, 0x63, 0 }; | |
50 | USet *s = uset_open(1, 0); | |
51 | uset_addString(s, a, 3); | |
52 | a[0] = 0x63; a[1] = 0x63; | |
53 | expect(s, "{abc}", "{ccc}", &status); | |
54 | uset_close(s); | |
55 | } | |
56 | ||
57 | static const UChar PAT[] = {91,97,45,99,123,97,98,125,93,0}; /* "[a-c{ab}]" */ | |
58 | static const int32_t PAT_LEN = (sizeof(PAT) / sizeof(PAT[0])) - 1; | |
59 | ||
374ca955 A |
60 | static const UChar PAT_lb[] = {0x6C, 0x62, 0}; /* "lb" */ |
61 | static const int32_t PAT_lb_LEN = (sizeof(PAT_lb) / sizeof(PAT_lb[0])) - 1; | |
62 | ||
63 | static const UChar VAL_SP[] = {0x53, 0x50, 0}; /* "SP" */ | |
64 | static const int32_t VAL_SP_LEN = (sizeof(VAL_SP) / sizeof(VAL_SP[0])) - 1; | |
65 | ||
b75a7d8f A |
66 | static const UChar STR_bc[] = {98,99,0}; /* "bc" */ |
67 | static const int32_t STR_bc_LEN = (sizeof(STR_bc) / sizeof(STR_bc[0])) - 1; | |
68 | ||
69 | static const UChar STR_ab[] = {97,98,0}; /* "ab" */ | |
70 | static const int32_t STR_ab_LEN = (sizeof(STR_ab) / sizeof(STR_ab[0])) - 1; | |
71 | ||
72 | /** | |
73 | * Basic API test for uset.x | |
74 | */ | |
75 | static void TestAPI() { | |
76 | USet* set; | |
374ca955 | 77 | USet* set2; |
b75a7d8f A |
78 | UErrorCode ec; |
79 | ||
80 | /* [] */ | |
81 | set = uset_open(1, 1); | |
82 | uset_clear(set); | |
83 | expect(set, "", "abc{ab}", NULL); | |
84 | uset_close(set); | |
85 | ||
86 | /* [ABC] */ | |
87 | set = uset_open(0x0041, 0x0043); | |
88 | expect(set, "ABC", "DEF{ab}", NULL); | |
89 | uset_close(set); | |
90 | ||
91 | /* [a-c{ab}] */ | |
92 | ec = U_ZERO_ERROR; | |
93 | set = uset_openPattern(PAT, PAT_LEN, &ec); | |
94 | if(U_FAILURE(ec)) { | |
95 | log_data_err("uset_openPattern([a-c{ab}]) failed - %s\n", u_errorName(ec)); | |
96 | return; | |
97 | } | |
374ca955 A |
98 | if(!uset_resemblesPattern(PAT, PAT_LEN, 0)) { |
99 | log_data_err("uset_resemblesPattern of PAT failed\n"); | |
100 | } | |
b75a7d8f A |
101 | expect(set, "abc{ab}", "def{bc}", &ec); |
102 | ||
103 | /* [a-d{ab}] */ | |
104 | uset_add(set, 0x64); | |
105 | expect(set, "abcd{ab}", "ef{bc}", NULL); | |
106 | ||
107 | /* [acd{ab}{bc}] */ | |
108 | uset_remove(set, 0x62); | |
109 | uset_addString(set, STR_bc, STR_bc_LEN); | |
110 | expect(set, "acd{ab}{bc}", "bef{cd}", NULL); | |
111 | ||
112 | /* [acd{bc}] */ | |
113 | uset_removeString(set, STR_ab, STR_ab_LEN); | |
114 | expect(set, "acd{bc}", "bfg{ab}", NULL); | |
115 | ||
116 | /* [^acd{bc}] */ | |
117 | uset_complement(set); | |
118 | expect(set, "bef{bc}", "acd{ac}", NULL); | |
119 | ||
120 | /* [a-e{bc}] */ | |
121 | uset_complement(set); | |
122 | uset_addRange(set, 0x0062, 0x0065); | |
123 | expect(set, "abcde{bc}", "fg{ab}", NULL); | |
124 | ||
125 | /* [de{bc}] */ | |
126 | uset_removeRange(set, 0x0050, 0x0063); | |
127 | expect(set, "de{bc}", "bcfg{ab}", NULL); | |
128 | ||
374ca955 A |
129 | /* [g-l] */ |
130 | uset_set(set, 0x0067, 0x006C); | |
131 | expect(set, "ghijkl", "de{bc}", NULL); | |
132 | ||
133 | if (uset_indexOf(set, 0x0067) != 0) { | |
134 | log_data_err("uset_indexOf failed finding correct index of 'g'\n"); | |
135 | } | |
136 | ||
137 | if (uset_charAt(set, 0) != 0x0067) { | |
138 | log_data_err("uset_charAt failed finding correct char 'g' at index 0\n"); | |
139 | } | |
140 | ||
141 | /* How to test this one...? */ | |
142 | uset_compact(set); | |
143 | ||
144 | /* [g-i] */ | |
145 | uset_retain(set, 0x0067, 0x0069); | |
146 | expect(set, "ghi", "dejkl{bc}", NULL); | |
147 | ||
148 | /* UCHAR_ASCII_HEX_DIGIT */ | |
149 | uset_applyIntPropertyValue(set, UCHAR_ASCII_HEX_DIGIT, 1, &ec); | |
150 | if(U_FAILURE(ec)) { | |
151 | log_data_err("uset_applyIntPropertyValue([UCHAR_ASCII_HEX_DIGIT]) failed - %s\n", u_errorName(ec)); | |
152 | return; | |
153 | } | |
154 | expect(set, "0123456789ABCDEFabcdef", "GHIjkl{bc}", NULL); | |
155 | ||
156 | /* [] */ | |
157 | set2 = uset_open(1, 1); | |
158 | uset_clear(set2); | |
159 | ||
160 | /* space */ | |
161 | uset_applyPropertyAlias(set2, PAT_lb, PAT_lb_LEN, VAL_SP, VAL_SP_LEN, &ec); | |
162 | expect(set2, " ", "abcdefghi{bc}", NULL); | |
163 | ||
164 | /* [a-c] */ | |
165 | uset_set(set2, 0x0061, 0x0063); | |
166 | /* [g-i] */ | |
167 | uset_set(set, 0x0067, 0x0069); | |
168 | ||
169 | /* [a-c g-i] */ | |
170 | uset_complementAll(set, set2); | |
171 | expect(set, "abcghi", "def{bc}", NULL); | |
172 | ||
173 | /* [g-i] */ | |
174 | uset_removeAll(set, set2); | |
175 | expect(set, "ghi", "abcdef{bc}", NULL); | |
176 | ||
177 | /* [a-c g-i] */ | |
178 | uset_addAll(set2, set); | |
179 | expect(set2, "abcghi", "def{bc}", NULL); | |
180 | ||
181 | /* [g-i] */ | |
182 | uset_retainAll(set2, set); | |
183 | expect(set2, "ghi", "abcdef{bc}", NULL); | |
184 | ||
b75a7d8f | 185 | uset_close(set); |
374ca955 | 186 | uset_close(set2); |
b75a7d8f A |
187 | } |
188 | ||
189 | /*------------------------------------------------------------------ | |
190 | * Support | |
191 | *------------------------------------------------------------------*/ | |
192 | ||
193 | /** | |
194 | * Verifies that the given set contains the characters and strings in | |
195 | * inList, and does not contain those in outList. Also verifies that | |
196 | * 'set' is not NULL and that 'ec' succeeds. | |
197 | * @param set the set to test, or NULL (on error) | |
198 | * @param inList list of set contents, in iteration order. Format is | |
199 | * list of individual strings, in iteration order, followed by sorted | |
200 | * list of strings, delimited by {}. This means we do not test | |
201 | * characters '{' or '}' and we do not test strings containing those | |
202 | * characters either. | |
203 | * @param outList list of things not in the set. Same format as | |
204 | * inList. | |
205 | * @param ec an error code, checked for success. May be NULL in which | |
206 | * case it is ignored. | |
207 | */ | |
208 | static void expect(const USet* set, | |
209 | const char* inList, | |
210 | const char* outList, | |
211 | UErrorCode* ec) { | |
212 | if (ec!=NULL && U_FAILURE(*ec)) { | |
213 | log_err("FAIL: %s\n", u_errorName(*ec)); | |
214 | return; | |
215 | } | |
216 | if (set == NULL) { | |
217 | log_err("FAIL: USet is NULL\n"); | |
218 | return; | |
219 | } | |
220 | expectContainment(set, inList, TRUE); | |
221 | expectContainment(set, outList, FALSE); | |
222 | expectItems(set, inList); | |
223 | } | |
224 | ||
225 | static void expectContainment(const USet* set, | |
226 | const char* list, | |
227 | UBool isIn) { | |
228 | const char* p = list; | |
374ca955 A |
229 | UChar ustr[4096]; |
230 | char *pat; | |
b75a7d8f | 231 | UErrorCode ec; |
374ca955 | 232 | int32_t rangeStart = -1, rangeEnd = -1, length; |
b75a7d8f A |
233 | |
234 | ec = U_ZERO_ERROR; | |
374ca955 A |
235 | length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec); |
236 | if(U_FAILURE(ec)) { | |
237 | log_err("FAIL: uset_toPattern() fails in expectContainment() - %s\n", u_errorName(ec)); | |
238 | return; | |
239 | } | |
240 | pat=aescstrdup(ustr, length); | |
b75a7d8f A |
241 | |
242 | while (*p) { | |
243 | if (*p=='{') { | |
244 | const char* stringStart = ++p; | |
245 | int32_t stringLength = 0; | |
246 | char strCopy[64]; | |
247 | ||
374ca955 A |
248 | while (*p++ != '}') { |
249 | } | |
250 | stringLength = (int32_t)(p - stringStart - 1); | |
b75a7d8f A |
251 | strncpy(strCopy, stringStart, stringLength); |
252 | strCopy[stringLength] = 0; | |
253 | ||
254 | u_charsToUChars(stringStart, ustr, stringLength); | |
255 | ||
256 | if (uset_containsString(set, ustr, stringLength) == isIn) { | |
257 | log_verbose("Ok: %s %s \"%s\"\n", pat, | |
258 | (isIn ? "contains" : "does not contain"), | |
259 | strCopy); | |
260 | } else { | |
261 | log_err("FAIL: %s %s \"%s\"\n", pat, | |
262 | (isIn ? "does not contain" : "contains"), | |
263 | strCopy); | |
264 | } | |
265 | } | |
266 | ||
267 | else { | |
268 | UChar32 c; | |
269 | ||
270 | u_charsToUChars(p, ustr, 1); | |
271 | c = ustr[0]; | |
272 | ||
273 | if (uset_contains(set, c) == isIn) { | |
274 | log_verbose("Ok: %s %s '%c'\n", pat, | |
275 | (isIn ? "contains" : "does not contain"), | |
276 | *p); | |
277 | } else { | |
278 | log_err("FAIL: %s %s '%c'\n", pat, | |
279 | (isIn ? "does not contain" : "contains"), | |
280 | *p); | |
281 | } | |
282 | ||
283 | /* Test the range API too by looking for ranges */ | |
284 | if (c == rangeEnd+1) { | |
285 | rangeEnd = c; | |
286 | } else { | |
287 | if (rangeStart >= 0) { | |
288 | if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) { | |
289 | log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat, | |
290 | (isIn ? "contains" : "does not contain"), | |
291 | rangeStart, rangeEnd); | |
292 | } else { | |
293 | log_err("FAIL: %s %s U+%04X-U+%04X\n", pat, | |
294 | (isIn ? "does not contain" : "contains"), | |
295 | rangeStart, rangeEnd); | |
296 | } | |
297 | } | |
298 | rangeStart = rangeEnd = c; | |
299 | } | |
300 | ||
301 | ++p; | |
302 | } | |
303 | } | |
304 | ||
305 | if (rangeStart >= 0) { | |
306 | if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) { | |
307 | log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat, | |
308 | (isIn ? "contains" : "does not contain"), | |
309 | rangeStart, rangeEnd); | |
310 | } else { | |
311 | log_err("FAIL: %s %s U+%04X-U+%04X\n", pat, | |
312 | (isIn ? "does not contain" : "contains"), | |
313 | rangeStart, rangeEnd); | |
314 | } | |
315 | } | |
316 | } | |
317 | ||
374ca955 | 318 | /* This only works for invariant BMP chars */ |
b75a7d8f A |
319 | static char oneUCharToChar(UChar32 c) { |
320 | UChar ubuf[1]; | |
321 | char buf[1]; | |
322 | ubuf[0] = (UChar) c; | |
323 | u_UCharsToChars(ubuf, buf, 1); | |
324 | return buf[0]; | |
325 | } | |
326 | ||
327 | static void expectItems(const USet* set, | |
328 | const char* items) { | |
329 | const char* p = items; | |
374ca955 A |
330 | UChar ustr[4096], itemStr[4096]; |
331 | char buf[4096]; | |
332 | char *pat; | |
b75a7d8f A |
333 | UErrorCode ec; |
334 | int32_t expectedSize = 0; | |
335 | int32_t itemCount = uset_getItemCount(set); | |
336 | int32_t itemIndex = 0; | |
337 | UChar32 start = 1, end = 0; | |
374ca955 | 338 | int32_t itemLen = 0, length; |
b75a7d8f A |
339 | |
340 | ec = U_ZERO_ERROR; | |
374ca955 | 341 | length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec); |
b75a7d8f A |
342 | if (U_FAILURE(ec)) { |
343 | log_err("FAIL: uset_toPattern => %s\n", u_errorName(ec)); | |
344 | return; | |
345 | } | |
374ca955 | 346 | pat=aescstrdup(ustr, length); |
b75a7d8f A |
347 | |
348 | if (uset_isEmpty(set) != (strlen(items)==0)) { | |
349 | log_err("FAIL: %s should return %s from isEmpty\n", | |
350 | pat, | |
351 | strlen(items)==0 ? "TRUE" : "FALSE"); | |
352 | } | |
353 | ||
354 | /* Don't test patterns starting with "[^" */ | |
374ca955 | 355 | if (u_strlen(ustr) > 2 && ustr[1] == 0x5e /*'^'*/) { |
b75a7d8f A |
356 | return; |
357 | } | |
358 | ||
359 | while (*p) { | |
360 | ||
361 | ++expectedSize; | |
362 | ||
363 | if (start > end || start == -1) { | |
364 | /* Fetch our next item */ | |
365 | if (itemIndex >= itemCount) { | |
366 | log_err("FAIL: ran out of items iterating %s\n", pat); | |
367 | return; | |
368 | } | |
369 | ||
370 | itemLen = uset_getItem(set, itemIndex, &start, &end, | |
371 | itemStr, sizeof(itemStr), &ec); | |
372 | if (U_FAILURE(ec) || itemLen < 0) { | |
373 | log_err("FAIL: uset_getItem => %s\n", u_errorName(ec)); | |
374 | return; | |
375 | } | |
376 | ||
377 | if (itemLen == 0) { | |
378 | log_verbose("Ok: %s item %d is %c-%c\n", pat, | |
379 | itemIndex, oneUCharToChar(start), | |
380 | oneUCharToChar(end)); | |
381 | } else { | |
382 | itemStr[itemLen] = 0; | |
383 | u_UCharsToChars(itemStr, buf, itemLen+1); | |
384 | log_verbose("Ok: %s item %d is \"%s\"\n", pat, itemIndex, buf); | |
385 | } | |
386 | ||
387 | ++itemIndex; | |
388 | } | |
389 | ||
390 | if (*p=='{') { | |
391 | const char* stringStart = ++p; | |
392 | int32_t stringLength = 0; | |
393 | char strCopy[64]; | |
394 | ||
374ca955 A |
395 | while (*p++ != '}') { |
396 | } | |
397 | stringLength = (int32_t)(p - stringStart - 1); | |
b75a7d8f A |
398 | strncpy(strCopy, stringStart, stringLength); |
399 | strCopy[stringLength] = 0; | |
400 | ||
401 | u_charsToUChars(stringStart, ustr, stringLength); | |
402 | ustr[stringLength] = 0; | |
403 | ||
404 | if (itemLen == 0) { | |
405 | log_err("FAIL: for %s expect \"%s\" next, but got a char\n", | |
406 | pat, strCopy); | |
407 | return; | |
408 | } | |
409 | ||
410 | if (u_strcmp(ustr, itemStr) != 0) { | |
411 | log_err("FAIL: for %s expect \"%s\" next\n", | |
412 | pat, strCopy); | |
413 | return; | |
414 | } | |
415 | } | |
416 | ||
417 | else { | |
418 | UChar32 c; | |
419 | ||
420 | u_charsToUChars(p, ustr, 1); | |
421 | c = ustr[0]; | |
422 | ||
423 | if (itemLen != 0) { | |
424 | log_err("FAIL: for %s expect '%c' next, but got a string\n", | |
425 | pat, *p); | |
426 | return; | |
427 | } | |
428 | ||
429 | if (c != start++) { | |
430 | log_err("FAIL: for %s expect '%c' next\n", | |
431 | pat, *p); | |
432 | return; | |
433 | } | |
434 | ||
435 | ++p; | |
436 | } | |
437 | } | |
438 | ||
439 | if (uset_size(set) == expectedSize) { | |
440 | log_verbose("Ok: %s size is %d\n", pat, expectedSize); | |
441 | } else { | |
442 | log_err("FAIL: %s size is %d, expected %d\n", | |
443 | pat, uset_size(set), expectedSize); | |
444 | } | |
445 | } | |
446 | ||
447 | static void | |
448 | TestSerialized() { | |
449 | uint16_t buffer[1000]; | |
450 | USerializedSet sset; | |
451 | USet *set; | |
452 | UErrorCode errorCode; | |
453 | UChar32 c; | |
454 | int32_t length; | |
455 | ||
456 | /* use a pattern that generates both BMP and supplementary code points */ | |
457 | U_STRING_DECL(pattern, "[:Cf:]", 6); | |
458 | U_STRING_INIT(pattern, "[:Cf:]", 6); | |
459 | ||
460 | errorCode=U_ZERO_ERROR; | |
461 | set=uset_openPattern(pattern, -1, &errorCode); | |
462 | if(U_FAILURE(errorCode)) { | |
463 | log_data_err("uset_openPattern([:Cf:]) failed - %s\n", u_errorName(errorCode)); | |
464 | return; | |
465 | } | |
466 | ||
467 | length=uset_serialize(set, buffer, LENGTHOF(buffer), &errorCode); | |
468 | if(U_FAILURE(errorCode)) { | |
469 | log_err("unable to uset_serialize([:Cf:]) - %s\n", u_errorName(errorCode)); | |
470 | uset_close(set); | |
471 | return; | |
472 | } | |
473 | ||
474 | uset_getSerializedSet(&sset, buffer, length); | |
475 | for(c=0; c<=0x10ffff; ++c) { | |
476 | if(uset_contains(set, c)!=uset_serializedContains(&sset, c)) { | |
477 | log_err("uset_contains(U+%04x)!=uset_serializedContains(U+%04x)\n", c); | |
478 | break; | |
479 | } | |
480 | } | |
481 | ||
482 | uset_close(set); | |
483 | } | |
484 | ||
374ca955 A |
485 | /** |
486 | * Make sure that when non-invariant chars are passed to uset_openPattern | |
487 | * they do not cause an ugly failure mode (e.g. assertion failure). | |
488 | * JB#3795. | |
489 | */ | |
490 | static void | |
491 | TestNonInvariantPattern() { | |
492 | UErrorCode ec = U_ZERO_ERROR; | |
493 | /* The critical part of this test is that the following pattern | |
494 | must contain a non-invariant character. */ | |
495 | static const char *pattern = "[:ccc!=0:]"; | |
496 | UChar buf[256]; | |
497 | int32_t len = u_unescape(pattern, buf, 256); | |
498 | /* This test 'fails' by having an assertion failure within the | |
499 | following call. It passes by running to completion with no | |
500 | assertion failure. */ | |
501 | USet *set = uset_openPattern(buf, len, &ec); | |
502 | uset_close(set); | |
503 | } | |
504 | ||
b75a7d8f | 505 | /*eof*/ |