]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/usettest.c
ICU-57163.0.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / usettest.c
1 /*
2 **********************************************************************
3 * Copyright (c) 2002-2016, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 */
7 #include "unicode/uset.h"
8 #include "unicode/ustring.h"
9 #include "cintltst.h"
10 #include "cmemory.h"
11 #include <stdlib.h>
12 #include <string.h>
13
14 #define TEST(x) addTest(root, &x, "uset/" # x)
15
16 static void TestAPI(void);
17 static void Testj2269(void);
18 static void TestSerialized(void);
19 static void TestNonInvariantPattern(void);
20 static void TestBadPattern(void);
21 static void TestFreezable(void);
22 static void TestSpan(void);
23
24 void addUSetTest(TestNode** root);
25
26 static void expect(const USet* set,
27 const char* inList,
28 const char* outList,
29 UErrorCode* ec);
30 static void expectContainment(const USet* set,
31 const char* list,
32 UBool isIn);
33 static char oneUCharToChar(UChar32 c);
34 static void expectItems(const USet* set,
35 const char* items);
36
37 void
38 addUSetTest(TestNode** root) {
39 TEST(TestAPI);
40 TEST(Testj2269);
41 TEST(TestSerialized);
42 TEST(TestNonInvariantPattern);
43 TEST(TestBadPattern);
44 TEST(TestFreezable);
45 TEST(TestSpan);
46 }
47
48 /*------------------------------------------------------------------
49 * Tests
50 *------------------------------------------------------------------*/
51
52 static void Testj2269() {
53 UErrorCode status = U_ZERO_ERROR;
54 UChar a[4] = { 0x61, 0x62, 0x63, 0 };
55 USet *s = uset_open(1, 0);
56 uset_addString(s, a, 3);
57 a[0] = 0x63; a[1] = 0x63;
58 expect(s, "{abc}", "{ccc}", &status);
59 uset_close(s);
60 }
61
62 static const UChar PAT[] = {91,97,45,99,123,97,98,125,93,0}; /* "[a-c{ab}]" */
63 static const int32_t PAT_LEN = UPRV_LENGTHOF(PAT) - 1;
64
65 static const UChar PAT_lb[] = {0x6C, 0x62, 0}; /* "lb" */
66 static const int32_t PAT_lb_LEN = UPRV_LENGTHOF(PAT_lb) - 1;
67
68 static const UChar VAL_SP[] = {0x53, 0x50, 0}; /* "SP" */
69 static const int32_t VAL_SP_LEN = UPRV_LENGTHOF(VAL_SP) - 1;
70
71 static const UChar STR_bc[] = {98,99,0}; /* "bc" */
72 static const int32_t STR_bc_LEN = UPRV_LENGTHOF(STR_bc) - 1;
73
74 static const UChar STR_ab[] = {97,98,0}; /* "ab" */
75 static const int32_t STR_ab_LEN = UPRV_LENGTHOF(STR_ab) - 1;
76
77 /**
78 * Basic API test for uset.x
79 */
80 static void TestAPI() {
81 USet* set;
82 USet* set2;
83 UErrorCode ec;
84
85 /* [] */
86 set = uset_openEmpty();
87 expect(set, "", "abc{ab}", NULL);
88 uset_close(set);
89
90 set = uset_open(1, 0);
91 expect(set, "", "abc{ab}", NULL);
92 uset_close(set);
93
94 set = uset_open(1, 1);
95 uset_clear(set);
96 expect(set, "", "abc{ab}", NULL);
97 uset_close(set);
98
99 /* [ABC] */
100 set = uset_open(0x0041, 0x0043);
101 expect(set, "ABC", "DEF{ab}", NULL);
102 uset_close(set);
103
104 /* [a-c{ab}] */
105 ec = U_ZERO_ERROR;
106 set = uset_openPattern(PAT, PAT_LEN, &ec);
107 if(U_FAILURE(ec)) {
108 log_err("uset_openPattern([a-c{ab}]) failed - %s\n", u_errorName(ec));
109 return;
110 }
111 if(!uset_resemblesPattern(PAT, PAT_LEN, 0)) {
112 log_err("uset_resemblesPattern of PAT failed\n");
113 }
114 expect(set, "abc{ab}", "def{bc}", &ec);
115
116 /* [a-d{ab}] */
117 uset_add(set, 0x64);
118 expect(set, "abcd{ab}", "ef{bc}", NULL);
119
120 /* [acd{ab}{bc}] */
121 uset_remove(set, 0x62);
122 uset_addString(set, STR_bc, STR_bc_LEN);
123 expect(set, "acd{ab}{bc}", "bef{cd}", NULL);
124
125 /* [acd{bc}] */
126 uset_removeString(set, STR_ab, STR_ab_LEN);
127 expect(set, "acd{bc}", "bfg{ab}", NULL);
128
129 /* [^acd{bc}] */
130 uset_complement(set);
131 expect(set, "bef{bc}", "acd{ac}", NULL);
132
133 /* [a-e{bc}] */
134 uset_complement(set);
135 uset_addRange(set, 0x0062, 0x0065);
136 expect(set, "abcde{bc}", "fg{ab}", NULL);
137
138 /* [de{bc}] */
139 uset_removeRange(set, 0x0050, 0x0063);
140 expect(set, "de{bc}", "bcfg{ab}", NULL);
141
142 /* [g-l] */
143 uset_set(set, 0x0067, 0x006C);
144 expect(set, "ghijkl", "de{bc}", NULL);
145
146 if (uset_indexOf(set, 0x0067) != 0) {
147 log_err("uset_indexOf failed finding correct index of 'g'\n");
148 }
149
150 if (uset_charAt(set, 0) != 0x0067) {
151 log_err("uset_charAt failed finding correct char 'g' at index 0\n");
152 }
153
154 /* How to test this one...? */
155 uset_compact(set);
156
157 /* [g-i] */
158 uset_retain(set, 0x0067, 0x0069);
159 expect(set, "ghi", "dejkl{bc}", NULL);
160
161 /* UCHAR_ASCII_HEX_DIGIT */
162 uset_applyIntPropertyValue(set, UCHAR_ASCII_HEX_DIGIT, 1, &ec);
163 if(U_FAILURE(ec)) {
164 log_err("uset_applyIntPropertyValue([UCHAR_ASCII_HEX_DIGIT]) failed - %s\n", u_errorName(ec));
165 return;
166 }
167 expect(set, "0123456789ABCDEFabcdef", "GHIjkl{bc}", NULL);
168
169 /* [ab] */
170 uset_clear(set);
171 uset_addAllCodePoints(set, STR_ab, STR_ab_LEN);
172 expect(set, "ab", "def{ab}", NULL);
173 if (uset_containsAllCodePoints(set, STR_bc, STR_bc_LEN)){
174 log_err("set should not conatin all characters of \"bc\" \n");
175 }
176
177 /* [] */
178 set2 = uset_open(1, 1);
179 uset_clear(set2);
180
181 /* space */
182 uset_applyPropertyAlias(set2, PAT_lb, PAT_lb_LEN, VAL_SP, VAL_SP_LEN, &ec);
183 expect(set2, " ", "abcdefghi{bc}", NULL);
184
185 /* [a-c] */
186 uset_set(set2, 0x0061, 0x0063);
187 /* [g-i] */
188 uset_set(set, 0x0067, 0x0069);
189
190 /* [a-c g-i] */
191 if (uset_containsSome(set, set2)) {
192 log_err("set should not contain some of set2 yet\n");
193 }
194 uset_complementAll(set, set2);
195 if (!uset_containsSome(set, set2)) {
196 log_err("set should contain some of set2\n");
197 }
198 expect(set, "abcghi", "def{bc}", NULL);
199
200 /* [g-i] */
201 uset_removeAll(set, set2);
202 expect(set, "ghi", "abcdef{bc}", NULL);
203
204 /* [a-c g-i] */
205 uset_addAll(set2, set);
206 expect(set2, "abcghi", "def{bc}", NULL);
207
208 /* [g-i] */
209 uset_retainAll(set2, set);
210 expect(set2, "ghi", "abcdef{bc}", NULL);
211
212 uset_close(set);
213 uset_close(set2);
214 }
215
216 /*------------------------------------------------------------------
217 * Support
218 *------------------------------------------------------------------*/
219
220 /**
221 * Verifies that the given set contains the characters and strings in
222 * inList, and does not contain those in outList. Also verifies that
223 * 'set' is not NULL and that 'ec' succeeds.
224 * @param set the set to test, or NULL (on error)
225 * @param inList list of set contents, in iteration order. Format is
226 * list of individual strings, in iteration order, followed by sorted
227 * list of strings, delimited by {}. This means we do not test
228 * characters '{' or '}' and we do not test strings containing those
229 * characters either.
230 * @param outList list of things not in the set. Same format as
231 * inList.
232 * @param ec an error code, checked for success. May be NULL in which
233 * case it is ignored.
234 */
235 static void expect(const USet* set,
236 const char* inList,
237 const char* outList,
238 UErrorCode* ec) {
239 if (ec!=NULL && U_FAILURE(*ec)) {
240 log_err("FAIL: %s\n", u_errorName(*ec));
241 return;
242 }
243 if (set == NULL) {
244 log_err("FAIL: USet is NULL\n");
245 return;
246 }
247 expectContainment(set, inList, TRUE);
248 expectContainment(set, outList, FALSE);
249 expectItems(set, inList);
250 }
251
252 static void expectContainment(const USet* set,
253 const char* list,
254 UBool isIn) {
255 const char* p = list;
256 UChar ustr[4096];
257 char *pat;
258 UErrorCode ec;
259 int32_t rangeStart = -1, rangeEnd = -1, length;
260
261 ec = U_ZERO_ERROR;
262 length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
263 if(U_FAILURE(ec)) {
264 log_err("FAIL: uset_toPattern() fails in expectContainment() - %s\n", u_errorName(ec));
265 return;
266 }
267 pat=aescstrdup(ustr, length);
268
269 while (*p) {
270 if (*p=='{') {
271 const char* stringStart = ++p;
272 int32_t stringLength = 0;
273 char strCopy[64];
274
275 while (*p++ != '}') {
276 }
277 stringLength = (int32_t)(p - stringStart - 1);
278 strncpy(strCopy, stringStart, stringLength);
279 strCopy[stringLength] = 0;
280
281 u_charsToUChars(stringStart, ustr, stringLength);
282
283 if (uset_containsString(set, ustr, stringLength) == isIn) {
284 log_verbose("Ok: %s %s \"%s\"\n", pat,
285 (isIn ? "contains" : "does not contain"),
286 strCopy);
287 } else {
288 log_data_err("FAIL: %s %s \"%s\" (Are you missing data?)\n", pat,
289 (isIn ? "does not contain" : "contains"),
290 strCopy);
291 }
292 }
293
294 else {
295 UChar32 c;
296
297 u_charsToUChars(p, ustr, 1);
298 c = ustr[0];
299
300 if (uset_contains(set, c) == isIn) {
301 log_verbose("Ok: %s %s '%c'\n", pat,
302 (isIn ? "contains" : "does not contain"),
303 *p);
304 } else {
305 log_data_err("FAIL: %s %s '%c' (Are you missing data?)\n", pat,
306 (isIn ? "does not contain" : "contains"),
307 *p);
308 }
309
310 /* Test the range API too by looking for ranges */
311 if (c == rangeEnd+1) {
312 rangeEnd = c;
313 } else {
314 if (rangeStart >= 0) {
315 if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) {
316 log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat,
317 (isIn ? "contains" : "does not contain"),
318 rangeStart, rangeEnd);
319 } else {
320 log_data_err("FAIL: %s %s U+%04X-U+%04X (Are you missing data?)\n", pat,
321 (isIn ? "does not contain" : "contains"),
322 rangeStart, rangeEnd);
323 }
324 }
325 rangeStart = rangeEnd = c;
326 }
327
328 ++p;
329 }
330 }
331
332 if (rangeStart >= 0) {
333 if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) {
334 log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat,
335 (isIn ? "contains" : "does not contain"),
336 rangeStart, rangeEnd);
337 } else {
338 log_data_err("FAIL: %s %s U+%04X-U+%04X (Are you missing data?)\n", pat,
339 (isIn ? "does not contain" : "contains"),
340 rangeStart, rangeEnd);
341 }
342 }
343 }
344
345 /* This only works for invariant BMP chars */
346 static char oneUCharToChar(UChar32 c) {
347 UChar ubuf[1];
348 char buf[1];
349 ubuf[0] = (UChar) c;
350 u_UCharsToChars(ubuf, buf, 1);
351 return buf[0];
352 }
353
354 static void expectItems(const USet* set,
355 const char* items) {
356 const char* p = items;
357 UChar ustr[4096], itemStr[4096];
358 char buf[4096];
359 char *pat;
360 UErrorCode ec;
361 int32_t expectedSize = 0;
362 int32_t itemCount = uset_getItemCount(set);
363 int32_t itemIndex = 0;
364 UChar32 start = 1, end = 0;
365 int32_t itemLen = 0, length;
366
367 ec = U_ZERO_ERROR;
368 length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
369 if (U_FAILURE(ec)) {
370 log_err("FAIL: uset_toPattern => %s\n", u_errorName(ec));
371 return;
372 }
373 pat=aescstrdup(ustr, length);
374
375 if (uset_isEmpty(set) != (strlen(items)==0)) {
376 log_data_err("FAIL: %s should return %s from isEmpty (Are you missing data?)\n",
377 pat,
378 strlen(items)==0 ? "TRUE" : "FALSE");
379 }
380
381 /* Don't test patterns starting with "[^" */
382 if (u_strlen(ustr) > 2 && ustr[1] == 0x5e /*'^'*/) {
383 return;
384 }
385
386 while (*p) {
387
388 ++expectedSize;
389
390 if (start > end || start == -1) {
391 /* Fetch our next item */
392 if (itemIndex >= itemCount) {
393 log_data_err("FAIL: ran out of items iterating %s (Are you missing data?)\n", pat);
394 return;
395 }
396
397 itemLen = uset_getItem(set, itemIndex, &start, &end,
398 itemStr, sizeof(itemStr), &ec);
399 if (U_FAILURE(ec) || itemLen < 0) {
400 log_err("FAIL: uset_getItem => %s\n", u_errorName(ec));
401 return;
402 }
403
404 if (itemLen == 0) {
405 log_verbose("Ok: %s item %d is %c-%c\n", pat,
406 itemIndex, oneUCharToChar(start),
407 oneUCharToChar(end));
408 } else {
409 itemStr[itemLen] = 0;
410 u_UCharsToChars(itemStr, buf, itemLen+1);
411 log_verbose("Ok: %s item %d is \"%s\"\n", pat, itemIndex, buf);
412 }
413
414 ++itemIndex;
415 }
416
417 if (*p=='{') {
418 const char* stringStart = ++p;
419 int32_t stringLength = 0;
420 char strCopy[64];
421
422 while (*p++ != '}') {
423 }
424 stringLength = (int32_t)(p - stringStart - 1);
425 strncpy(strCopy, stringStart, stringLength);
426 strCopy[stringLength] = 0;
427
428 u_charsToUChars(stringStart, ustr, stringLength);
429 ustr[stringLength] = 0;
430
431 if (itemLen == 0) {
432 log_err("FAIL: for %s expect \"%s\" next, but got a char\n",
433 pat, strCopy);
434 return;
435 }
436
437 if (u_strcmp(ustr, itemStr) != 0) {
438 log_err("FAIL: for %s expect \"%s\" next\n",
439 pat, strCopy);
440 return;
441 }
442 }
443
444 else {
445 UChar32 c;
446
447 u_charsToUChars(p, ustr, 1);
448 c = ustr[0];
449
450 if (itemLen != 0) {
451 log_err("FAIL: for %s expect '%c' next, but got a string\n",
452 pat, *p);
453 return;
454 }
455
456 if (c != start++) {
457 log_err("FAIL: for %s expect '%c' next\n",
458 pat, *p);
459 return;
460 }
461
462 ++p;
463 }
464 }
465
466 if (uset_size(set) == expectedSize) {
467 log_verbose("Ok: %s size is %d\n", pat, expectedSize);
468 } else {
469 log_err("FAIL: %s size is %d, expected %d\n",
470 pat, uset_size(set), expectedSize);
471 }
472 }
473
474 static void
475 TestSerialized() {
476 uint16_t buffer[1000];
477 USerializedSet sset;
478 USet *set;
479 UErrorCode errorCode;
480 UChar32 c;
481 int32_t length;
482
483 /* use a pattern that generates both BMP and supplementary code points */
484 U_STRING_DECL(pattern, "[:Cf:]", 6);
485 U_STRING_INIT(pattern, "[:Cf:]", 6);
486
487 errorCode=U_ZERO_ERROR;
488 set=uset_openPattern(pattern, -1, &errorCode);
489 if(U_FAILURE(errorCode)) {
490 log_data_err("uset_openPattern([:Cf:]) failed - %s (Are you missing data?)\n", u_errorName(errorCode));
491 return;
492 }
493
494 length=uset_serialize(set, buffer, UPRV_LENGTHOF(buffer), &errorCode);
495 if(U_FAILURE(errorCode)) {
496 log_err("unable to uset_serialize([:Cf:]) - %s\n", u_errorName(errorCode));
497 uset_close(set);
498 return;
499 }
500
501 uset_getSerializedSet(&sset, buffer, length);
502 for(c=0; c<=0x10ffff; ++c) {
503 if(uset_contains(set, c)!=uset_serializedContains(&sset, c)) {
504 log_err("uset_contains(U+%04x)!=uset_serializedContains(U+%04x)\n", c);
505 break;
506 }
507 }
508
509 uset_close(set);
510 }
511
512 /**
513 * Make sure that when non-invariant chars are passed to uset_openPattern
514 * they do not cause an ugly failure mode (e.g. assertion failure).
515 * JB#3795.
516 */
517 static void
518 TestNonInvariantPattern() {
519 UErrorCode ec = U_ZERO_ERROR;
520 /* The critical part of this test is that the following pattern
521 must contain a non-invariant character. */
522 static const char *pattern = "[:ccc!=0:]";
523 UChar buf[256];
524 int32_t len = u_unescape(pattern, buf, 256);
525 /* This test 'fails' by having an assertion failure within the
526 following call. It passes by running to completion with no
527 assertion failure. */
528 USet *set = uset_openPattern(buf, len, &ec);
529 uset_close(set);
530 }
531
532 static void TestBadPattern(void) {
533 UErrorCode status = U_ZERO_ERROR;
534 USet *pat;
535 U_STRING_DECL(pattern, "[", 1);
536 U_STRING_INIT(pattern, "[", 1);
537 pat = uset_openPatternOptions(pattern, u_strlen(pattern), 0, &status);
538 if (pat != NULL || U_SUCCESS(status)) {
539 log_err("uset_openPatternOptions did not fail as expected %s\n", u_errorName(status));
540 }
541 }
542
543 static USet *openIDSet() {
544 UErrorCode errorCode = U_ZERO_ERROR;
545 U_STRING_DECL(pattern, "[:ID_Continue:]", 15);
546 U_STRING_INIT(pattern, "[:ID_Continue:]", 15);
547 return uset_openPattern(pattern, 15, &errorCode);
548 }
549
550 static void TestFreezable() {
551 USet *idSet;
552 USet *frozen;
553 USet *thawed;
554
555 idSet=openIDSet();
556
557 if (idSet == NULL) {
558 log_data_err("openIDSet() returned NULL. (Are you missing data?)\n");
559 uset_close(idSet);
560 return;
561 }
562
563 frozen=uset_clone(idSet);
564
565 if (frozen == NULL) {
566 log_err("uset_Clone() returned NULL\n");
567 return;
568 }
569
570 if(!uset_equals(frozen, idSet)) {
571 log_err("uset_clone() did not make an equal copy\n");
572 }
573
574 uset_freeze(frozen);
575 uset_addRange(frozen, 0xd802, 0xd805);
576
577 if(uset_isFrozen(idSet) || !uset_isFrozen(frozen) || !uset_equals(frozen, idSet)) {
578 log_err("uset_freeze() or uset_isFrozen() does not work\n");
579 }
580
581 thawed=uset_cloneAsThawed(frozen);
582
583 if (thawed == NULL) {
584 log_err("uset_cloneAsThawed(frozen) returned NULL");
585 uset_close(frozen);
586 uset_close(idSet);
587 return;
588 }
589
590 uset_addRange(thawed, 0xd802, 0xd805);
591
592 if(uset_isFrozen(thawed) || uset_equals(thawed, idSet) || !uset_containsRange(thawed, 0xd802, 0xd805)) {
593 log_err("uset_cloneAsThawed() does not work\n");
594 }
595
596 uset_close(idSet);
597 uset_close(frozen);
598 uset_close(thawed);
599 }
600
601 static void TestSpan() {
602 static const UChar s16[2]={ 0xe01, 0x3000 };
603 static const char* s8="\xE0\xB8\x81\xE3\x80\x80";
604
605 USet *idSet=openIDSet();
606
607 if (idSet == NULL) {
608 log_data_err("openIDSet() returned NULL (Are you missing data?)\n");
609 return;
610 }
611
612 if(
613 1!=uset_span(idSet, s16, 2, USET_SPAN_CONTAINED) ||
614 0!=uset_span(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) ||
615 2!=uset_spanBack(idSet, s16, 2, USET_SPAN_CONTAINED) ||
616 1!=uset_spanBack(idSet, s16, 2, USET_SPAN_NOT_CONTAINED)
617 ) {
618 log_err("uset_span() or uset_spanBack() does not work\n");
619 }
620
621 if(
622 3!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
623 0!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) ||
624 6!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
625 3!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED)
626 ) {
627 log_err("uset_spanUTF8() or uset_spanBackUTF8() does not work\n");
628 }
629
630 uset_freeze(idSet);
631
632 if(
633 1!=uset_span(idSet, s16, 2, USET_SPAN_CONTAINED) ||
634 0!=uset_span(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) ||
635 2!=uset_spanBack(idSet, s16, 2, USET_SPAN_CONTAINED) ||
636 1!=uset_spanBack(idSet, s16, 2, USET_SPAN_NOT_CONTAINED)
637 ) {
638 log_err("uset_span(frozen) or uset_spanBack(frozen) does not work\n");
639 }
640
641 if(
642 3!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
643 0!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) ||
644 6!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
645 3!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED)
646 ) {
647 log_err("uset_spanUTF8(frozen) or uset_spanBackUTF8(frozen) does not work\n");
648 }
649
650 uset_close(idSet);
651 }
652
653 /*eof*/