]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/usettest.c
ICU-8.11.tar.gz
[apple/icu.git] / icuSources / test / cintltst / usettest.c
1 /*
2 **********************************************************************
3 * Copyright (c) 2002-2005, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 */
7 #include "unicode/uset.h"
8 #include "unicode/ustring.h"
9 #include "cintltst.h"
10 #include <stdlib.h>
11 #include <string.h>
12
13 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
14
15 #define TEST(x) addTest(root, &x, "uset/" # x)
16
17 static void TestAPI(void);
18 static void Testj2269(void);
19 static void TestSerialized(void);
20 static void TestNonInvariantPattern(void);
21 static void TestBadPattern(void);
22
23 void addUSetTest(TestNode** root);
24
25 static void expect(const USet* set,
26 const char* inList,
27 const char* outList,
28 UErrorCode* ec);
29 static void expectContainment(const USet* set,
30 const char* list,
31 UBool isIn);
32 static char oneUCharToChar(UChar32 c);
33 static void expectItems(const USet* set,
34 const char* items);
35
36 void
37 addUSetTest(TestNode** root) {
38 TEST(TestAPI);
39 TEST(Testj2269);
40 TEST(TestSerialized);
41 TEST(TestNonInvariantPattern);
42 TEST(TestBadPattern);
43 }
44
45 /*------------------------------------------------------------------
46 * Tests
47 *------------------------------------------------------------------*/
48
49 static void Testj2269() {
50 UErrorCode status = U_ZERO_ERROR;
51 UChar a[4] = { 0x61, 0x62, 0x63, 0 };
52 USet *s = uset_open(1, 0);
53 uset_addString(s, a, 3);
54 a[0] = 0x63; a[1] = 0x63;
55 expect(s, "{abc}", "{ccc}", &status);
56 uset_close(s);
57 }
58
59 static const UChar PAT[] = {91,97,45,99,123,97,98,125,93,0}; /* "[a-c{ab}]" */
60 static const int32_t PAT_LEN = (sizeof(PAT) / sizeof(PAT[0])) - 1;
61
62 static const UChar PAT_lb[] = {0x6C, 0x62, 0}; /* "lb" */
63 static const int32_t PAT_lb_LEN = (sizeof(PAT_lb) / sizeof(PAT_lb[0])) - 1;
64
65 static const UChar VAL_SP[] = {0x53, 0x50, 0}; /* "SP" */
66 static const int32_t VAL_SP_LEN = (sizeof(VAL_SP) / sizeof(VAL_SP[0])) - 1;
67
68 static const UChar STR_bc[] = {98,99,0}; /* "bc" */
69 static const int32_t STR_bc_LEN = (sizeof(STR_bc) / sizeof(STR_bc[0])) - 1;
70
71 static const UChar STR_ab[] = {97,98,0}; /* "ab" */
72 static const int32_t STR_ab_LEN = (sizeof(STR_ab) / sizeof(STR_ab[0])) - 1;
73
74 /**
75 * Basic API test for uset.x
76 */
77 static void TestAPI() {
78 USet* set;
79 USet* set2;
80 UErrorCode ec;
81
82 /* [] */
83 set = uset_open(1, 1);
84 uset_clear(set);
85 expect(set, "", "abc{ab}", NULL);
86 uset_close(set);
87
88 /* [ABC] */
89 set = uset_open(0x0041, 0x0043);
90 expect(set, "ABC", "DEF{ab}", NULL);
91 uset_close(set);
92
93 /* [a-c{ab}] */
94 ec = U_ZERO_ERROR;
95 set = uset_openPattern(PAT, PAT_LEN, &ec);
96 if(U_FAILURE(ec)) {
97 log_err("uset_openPattern([a-c{ab}]) failed - %s\n", u_errorName(ec));
98 return;
99 }
100 if(!uset_resemblesPattern(PAT, PAT_LEN, 0)) {
101 log_err("uset_resemblesPattern of PAT failed\n");
102 }
103 expect(set, "abc{ab}", "def{bc}", &ec);
104
105 /* [a-d{ab}] */
106 uset_add(set, 0x64);
107 expect(set, "abcd{ab}", "ef{bc}", NULL);
108
109 /* [acd{ab}{bc}] */
110 uset_remove(set, 0x62);
111 uset_addString(set, STR_bc, STR_bc_LEN);
112 expect(set, "acd{ab}{bc}", "bef{cd}", NULL);
113
114 /* [acd{bc}] */
115 uset_removeString(set, STR_ab, STR_ab_LEN);
116 expect(set, "acd{bc}", "bfg{ab}", NULL);
117
118 /* [^acd{bc}] */
119 uset_complement(set);
120 expect(set, "bef{bc}", "acd{ac}", NULL);
121
122 /* [a-e{bc}] */
123 uset_complement(set);
124 uset_addRange(set, 0x0062, 0x0065);
125 expect(set, "abcde{bc}", "fg{ab}", NULL);
126
127 /* [de{bc}] */
128 uset_removeRange(set, 0x0050, 0x0063);
129 expect(set, "de{bc}", "bcfg{ab}", NULL);
130
131 /* [g-l] */
132 uset_set(set, 0x0067, 0x006C);
133 expect(set, "ghijkl", "de{bc}", NULL);
134
135 if (uset_indexOf(set, 0x0067) != 0) {
136 log_err("uset_indexOf failed finding correct index of 'g'\n");
137 }
138
139 if (uset_charAt(set, 0) != 0x0067) {
140 log_err("uset_charAt failed finding correct char 'g' at index 0\n");
141 }
142
143 /* How to test this one...? */
144 uset_compact(set);
145
146 /* [g-i] */
147 uset_retain(set, 0x0067, 0x0069);
148 expect(set, "ghi", "dejkl{bc}", NULL);
149
150 /* UCHAR_ASCII_HEX_DIGIT */
151 uset_applyIntPropertyValue(set, UCHAR_ASCII_HEX_DIGIT, 1, &ec);
152 if(U_FAILURE(ec)) {
153 log_err("uset_applyIntPropertyValue([UCHAR_ASCII_HEX_DIGIT]) failed - %s\n", u_errorName(ec));
154 return;
155 }
156 expect(set, "0123456789ABCDEFabcdef", "GHIjkl{bc}", NULL);
157
158 /* [ab] */
159 uset_clear(set);
160 uset_addAllCodePoints(set, STR_ab, STR_ab_LEN);
161 expect(set, "ab", "def{ab}", NULL);
162 if (uset_containsAllCodePoints(set, STR_bc, STR_bc_LEN)){
163 log_err("set should not conatin all characters of \"bc\" \n");
164 }
165
166 /* [] */
167 set2 = uset_open(1, 1);
168 uset_clear(set2);
169
170 /* space */
171 uset_applyPropertyAlias(set2, PAT_lb, PAT_lb_LEN, VAL_SP, VAL_SP_LEN, &ec);
172 expect(set2, " ", "abcdefghi{bc}", NULL);
173
174 /* [a-c] */
175 uset_set(set2, 0x0061, 0x0063);
176 /* [g-i] */
177 uset_set(set, 0x0067, 0x0069);
178
179 /* [a-c g-i] */
180 if (uset_containsSome(set, set2)) {
181 log_err("set should not contain some of set2 yet\n");
182 }
183 uset_complementAll(set, set2);
184 if (!uset_containsSome(set, set2)) {
185 log_err("set should contain some of set2\n");
186 }
187 expect(set, "abcghi", "def{bc}", NULL);
188
189 /* [g-i] */
190 uset_removeAll(set, set2);
191 expect(set, "ghi", "abcdef{bc}", NULL);
192
193 /* [a-c g-i] */
194 uset_addAll(set2, set);
195 expect(set2, "abcghi", "def{bc}", NULL);
196
197 /* [g-i] */
198 uset_retainAll(set2, set);
199 expect(set2, "ghi", "abcdef{bc}", NULL);
200
201 uset_close(set);
202 uset_close(set2);
203 }
204
205 /*------------------------------------------------------------------
206 * Support
207 *------------------------------------------------------------------*/
208
209 /**
210 * Verifies that the given set contains the characters and strings in
211 * inList, and does not contain those in outList. Also verifies that
212 * 'set' is not NULL and that 'ec' succeeds.
213 * @param set the set to test, or NULL (on error)
214 * @param inList list of set contents, in iteration order. Format is
215 * list of individual strings, in iteration order, followed by sorted
216 * list of strings, delimited by {}. This means we do not test
217 * characters '{' or '}' and we do not test strings containing those
218 * characters either.
219 * @param outList list of things not in the set. Same format as
220 * inList.
221 * @param ec an error code, checked for success. May be NULL in which
222 * case it is ignored.
223 */
224 static void expect(const USet* set,
225 const char* inList,
226 const char* outList,
227 UErrorCode* ec) {
228 if (ec!=NULL && U_FAILURE(*ec)) {
229 log_err("FAIL: %s\n", u_errorName(*ec));
230 return;
231 }
232 if (set == NULL) {
233 log_err("FAIL: USet is NULL\n");
234 return;
235 }
236 expectContainment(set, inList, TRUE);
237 expectContainment(set, outList, FALSE);
238 expectItems(set, inList);
239 }
240
241 static void expectContainment(const USet* set,
242 const char* list,
243 UBool isIn) {
244 const char* p = list;
245 UChar ustr[4096];
246 char *pat;
247 UErrorCode ec;
248 int32_t rangeStart = -1, rangeEnd = -1, length;
249
250 ec = U_ZERO_ERROR;
251 length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
252 if(U_FAILURE(ec)) {
253 log_err("FAIL: uset_toPattern() fails in expectContainment() - %s\n", u_errorName(ec));
254 return;
255 }
256 pat=aescstrdup(ustr, length);
257
258 while (*p) {
259 if (*p=='{') {
260 const char* stringStart = ++p;
261 int32_t stringLength = 0;
262 char strCopy[64];
263
264 while (*p++ != '}') {
265 }
266 stringLength = (int32_t)(p - stringStart - 1);
267 strncpy(strCopy, stringStart, stringLength);
268 strCopy[stringLength] = 0;
269
270 u_charsToUChars(stringStart, ustr, stringLength);
271
272 if (uset_containsString(set, ustr, stringLength) == isIn) {
273 log_verbose("Ok: %s %s \"%s\"\n", pat,
274 (isIn ? "contains" : "does not contain"),
275 strCopy);
276 } else {
277 log_err("FAIL: %s %s \"%s\"\n", pat,
278 (isIn ? "does not contain" : "contains"),
279 strCopy);
280 }
281 }
282
283 else {
284 UChar32 c;
285
286 u_charsToUChars(p, ustr, 1);
287 c = ustr[0];
288
289 if (uset_contains(set, c) == isIn) {
290 log_verbose("Ok: %s %s '%c'\n", pat,
291 (isIn ? "contains" : "does not contain"),
292 *p);
293 } else {
294 log_err("FAIL: %s %s '%c'\n", pat,
295 (isIn ? "does not contain" : "contains"),
296 *p);
297 }
298
299 /* Test the range API too by looking for ranges */
300 if (c == rangeEnd+1) {
301 rangeEnd = c;
302 } else {
303 if (rangeStart >= 0) {
304 if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) {
305 log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat,
306 (isIn ? "contains" : "does not contain"),
307 rangeStart, rangeEnd);
308 } else {
309 log_err("FAIL: %s %s U+%04X-U+%04X\n", pat,
310 (isIn ? "does not contain" : "contains"),
311 rangeStart, rangeEnd);
312 }
313 }
314 rangeStart = rangeEnd = c;
315 }
316
317 ++p;
318 }
319 }
320
321 if (rangeStart >= 0) {
322 if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) {
323 log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat,
324 (isIn ? "contains" : "does not contain"),
325 rangeStart, rangeEnd);
326 } else {
327 log_err("FAIL: %s %s U+%04X-U+%04X\n", pat,
328 (isIn ? "does not contain" : "contains"),
329 rangeStart, rangeEnd);
330 }
331 }
332 }
333
334 /* This only works for invariant BMP chars */
335 static char oneUCharToChar(UChar32 c) {
336 UChar ubuf[1];
337 char buf[1];
338 ubuf[0] = (UChar) c;
339 u_UCharsToChars(ubuf, buf, 1);
340 return buf[0];
341 }
342
343 static void expectItems(const USet* set,
344 const char* items) {
345 const char* p = items;
346 UChar ustr[4096], itemStr[4096];
347 char buf[4096];
348 char *pat;
349 UErrorCode ec;
350 int32_t expectedSize = 0;
351 int32_t itemCount = uset_getItemCount(set);
352 int32_t itemIndex = 0;
353 UChar32 start = 1, end = 0;
354 int32_t itemLen = 0, length;
355
356 ec = U_ZERO_ERROR;
357 length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
358 if (U_FAILURE(ec)) {
359 log_err("FAIL: uset_toPattern => %s\n", u_errorName(ec));
360 return;
361 }
362 pat=aescstrdup(ustr, length);
363
364 if (uset_isEmpty(set) != (strlen(items)==0)) {
365 log_err("FAIL: %s should return %s from isEmpty\n",
366 pat,
367 strlen(items)==0 ? "TRUE" : "FALSE");
368 }
369
370 /* Don't test patterns starting with "[^" */
371 if (u_strlen(ustr) > 2 && ustr[1] == 0x5e /*'^'*/) {
372 return;
373 }
374
375 while (*p) {
376
377 ++expectedSize;
378
379 if (start > end || start == -1) {
380 /* Fetch our next item */
381 if (itemIndex >= itemCount) {
382 log_err("FAIL: ran out of items iterating %s\n", pat);
383 return;
384 }
385
386 itemLen = uset_getItem(set, itemIndex, &start, &end,
387 itemStr, sizeof(itemStr), &ec);
388 if (U_FAILURE(ec) || itemLen < 0) {
389 log_err("FAIL: uset_getItem => %s\n", u_errorName(ec));
390 return;
391 }
392
393 if (itemLen == 0) {
394 log_verbose("Ok: %s item %d is %c-%c\n", pat,
395 itemIndex, oneUCharToChar(start),
396 oneUCharToChar(end));
397 } else {
398 itemStr[itemLen] = 0;
399 u_UCharsToChars(itemStr, buf, itemLen+1);
400 log_verbose("Ok: %s item %d is \"%s\"\n", pat, itemIndex, buf);
401 }
402
403 ++itemIndex;
404 }
405
406 if (*p=='{') {
407 const char* stringStart = ++p;
408 int32_t stringLength = 0;
409 char strCopy[64];
410
411 while (*p++ != '}') {
412 }
413 stringLength = (int32_t)(p - stringStart - 1);
414 strncpy(strCopy, stringStart, stringLength);
415 strCopy[stringLength] = 0;
416
417 u_charsToUChars(stringStart, ustr, stringLength);
418 ustr[stringLength] = 0;
419
420 if (itemLen == 0) {
421 log_err("FAIL: for %s expect \"%s\" next, but got a char\n",
422 pat, strCopy);
423 return;
424 }
425
426 if (u_strcmp(ustr, itemStr) != 0) {
427 log_err("FAIL: for %s expect \"%s\" next\n",
428 pat, strCopy);
429 return;
430 }
431 }
432
433 else {
434 UChar32 c;
435
436 u_charsToUChars(p, ustr, 1);
437 c = ustr[0];
438
439 if (itemLen != 0) {
440 log_err("FAIL: for %s expect '%c' next, but got a string\n",
441 pat, *p);
442 return;
443 }
444
445 if (c != start++) {
446 log_err("FAIL: for %s expect '%c' next\n",
447 pat, *p);
448 return;
449 }
450
451 ++p;
452 }
453 }
454
455 if (uset_size(set) == expectedSize) {
456 log_verbose("Ok: %s size is %d\n", pat, expectedSize);
457 } else {
458 log_err("FAIL: %s size is %d, expected %d\n",
459 pat, uset_size(set), expectedSize);
460 }
461 }
462
463 static void
464 TestSerialized() {
465 uint16_t buffer[1000];
466 USerializedSet sset;
467 USet *set;
468 UErrorCode errorCode;
469 UChar32 c;
470 int32_t length;
471
472 /* use a pattern that generates both BMP and supplementary code points */
473 U_STRING_DECL(pattern, "[:Cf:]", 6);
474 U_STRING_INIT(pattern, "[:Cf:]", 6);
475
476 errorCode=U_ZERO_ERROR;
477 set=uset_openPattern(pattern, -1, &errorCode);
478 if(U_FAILURE(errorCode)) {
479 log_err("uset_openPattern([:Cf:]) failed - %s\n", u_errorName(errorCode));
480 return;
481 }
482
483 length=uset_serialize(set, buffer, LENGTHOF(buffer), &errorCode);
484 if(U_FAILURE(errorCode)) {
485 log_err("unable to uset_serialize([:Cf:]) - %s\n", u_errorName(errorCode));
486 uset_close(set);
487 return;
488 }
489
490 uset_getSerializedSet(&sset, buffer, length);
491 for(c=0; c<=0x10ffff; ++c) {
492 if(uset_contains(set, c)!=uset_serializedContains(&sset, c)) {
493 log_err("uset_contains(U+%04x)!=uset_serializedContains(U+%04x)\n", c);
494 break;
495 }
496 }
497
498 uset_close(set);
499 }
500
501 /**
502 * Make sure that when non-invariant chars are passed to uset_openPattern
503 * they do not cause an ugly failure mode (e.g. assertion failure).
504 * JB#3795.
505 */
506 static void
507 TestNonInvariantPattern() {
508 UErrorCode ec = U_ZERO_ERROR;
509 /* The critical part of this test is that the following pattern
510 must contain a non-invariant character. */
511 static const char *pattern = "[:ccc!=0:]";
512 UChar buf[256];
513 int32_t len = u_unescape(pattern, buf, 256);
514 /* This test 'fails' by having an assertion failure within the
515 following call. It passes by running to completion with no
516 assertion failure. */
517 USet *set = uset_openPattern(buf, len, &ec);
518 uset_close(set);
519 }
520
521 static void TestBadPattern(void) {
522 UErrorCode status = U_ZERO_ERROR;
523 USet *pat;
524 U_STRING_DECL(pattern, "[", 1);
525 U_STRING_INIT(pattern, "[", 1);
526 pat = uset_openPatternOptions(pattern, u_strlen(pattern), 0, &status);
527 if (pat != NULL || U_SUCCESS(status)) {
528 log_err("uset_openPatternOptions did not fail as expected %s\n", u_errorName(status));
529 }
530 }
531
532 /*eof*/