]>
git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/usettest.c
2 **********************************************************************
3 * Copyright (c) 2002-2016, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
7 #include "unicode/uset.h"
8 #include "unicode/ustring.h"
14 #define TEST(x) addTest(root, &x, "uset/" # x)
16 static void TestAPI(void);
17 static void Testj2269(void);
18 static void TestSerialized(void);
19 static void TestNonInvariantPattern(void);
20 static void TestBadPattern(void);
21 static void TestFreezable(void);
22 static void TestSpan(void);
24 void addUSetTest(TestNode
** root
);
26 static void expect(const USet
* set
,
30 static void expectContainment(const USet
* set
,
33 static char oneUCharToChar(UChar32 c
);
34 static void expectItems(const USet
* set
,
38 addUSetTest(TestNode
** root
) {
42 TEST(TestNonInvariantPattern
);
48 /*------------------------------------------------------------------
50 *------------------------------------------------------------------*/
52 static void Testj2269() {
53 UErrorCode status
= U_ZERO_ERROR
;
54 UChar a
[4] = { 0x61, 0x62, 0x63, 0 };
55 USet
*s
= uset_open(1, 0);
56 uset_addString(s
, a
, 3);
57 a
[0] = 0x63; a
[1] = 0x63;
58 expect(s
, "{abc}", "{ccc}", &status
);
62 static const UChar PAT
[] = {91,97,45,99,123,97,98,125,93,0}; /* "[a-c{ab}]" */
63 static const int32_t PAT_LEN
= UPRV_LENGTHOF(PAT
) - 1;
65 static const UChar PAT_lb
[] = {0x6C, 0x62, 0}; /* "lb" */
66 static const int32_t PAT_lb_LEN
= UPRV_LENGTHOF(PAT_lb
) - 1;
68 static const UChar VAL_SP
[] = {0x53, 0x50, 0}; /* "SP" */
69 static const int32_t VAL_SP_LEN
= UPRV_LENGTHOF(VAL_SP
) - 1;
71 static const UChar STR_bc
[] = {98,99,0}; /* "bc" */
72 static const int32_t STR_bc_LEN
= UPRV_LENGTHOF(STR_bc
) - 1;
74 static const UChar STR_ab
[] = {97,98,0}; /* "ab" */
75 static const int32_t STR_ab_LEN
= UPRV_LENGTHOF(STR_ab
) - 1;
78 * Basic API test for uset.x
80 static void TestAPI() {
86 set
= uset_openEmpty();
87 expect(set
, "", "abc{ab}", NULL
);
90 set
= uset_open(1, 0);
91 expect(set
, "", "abc{ab}", NULL
);
94 set
= uset_open(1, 1);
96 expect(set
, "", "abc{ab}", NULL
);
100 set
= uset_open(0x0041, 0x0043);
101 expect(set
, "ABC", "DEF{ab}", NULL
);
106 set
= uset_openPattern(PAT
, PAT_LEN
, &ec
);
108 log_err("uset_openPattern([a-c{ab}]) failed - %s\n", u_errorName(ec
));
111 if(!uset_resemblesPattern(PAT
, PAT_LEN
, 0)) {
112 log_err("uset_resemblesPattern of PAT failed\n");
114 expect(set
, "abc{ab}", "def{bc}", &ec
);
118 expect(set
, "abcd{ab}", "ef{bc}", NULL
);
121 uset_remove(set
, 0x62);
122 uset_addString(set
, STR_bc
, STR_bc_LEN
);
123 expect(set
, "acd{ab}{bc}", "bef{cd}", NULL
);
126 uset_removeString(set
, STR_ab
, STR_ab_LEN
);
127 expect(set
, "acd{bc}", "bfg{ab}", NULL
);
130 uset_complement(set
);
131 expect(set
, "bef{bc}", "acd{ac}", NULL
);
134 uset_complement(set
);
135 uset_addRange(set
, 0x0062, 0x0065);
136 expect(set
, "abcde{bc}", "fg{ab}", NULL
);
139 uset_removeRange(set
, 0x0050, 0x0063);
140 expect(set
, "de{bc}", "bcfg{ab}", NULL
);
143 uset_set(set
, 0x0067, 0x006C);
144 expect(set
, "ghijkl", "de{bc}", NULL
);
146 if (uset_indexOf(set
, 0x0067) != 0) {
147 log_err("uset_indexOf failed finding correct index of 'g'\n");
150 if (uset_charAt(set
, 0) != 0x0067) {
151 log_err("uset_charAt failed finding correct char 'g' at index 0\n");
154 /* How to test this one...? */
158 uset_retain(set
, 0x0067, 0x0069);
159 expect(set
, "ghi", "dejkl{bc}", NULL
);
161 /* UCHAR_ASCII_HEX_DIGIT */
162 uset_applyIntPropertyValue(set
, UCHAR_ASCII_HEX_DIGIT
, 1, &ec
);
164 log_err("uset_applyIntPropertyValue([UCHAR_ASCII_HEX_DIGIT]) failed - %s\n", u_errorName(ec
));
167 expect(set
, "0123456789ABCDEFabcdef", "GHIjkl{bc}", NULL
);
171 uset_addAllCodePoints(set
, STR_ab
, STR_ab_LEN
);
172 expect(set
, "ab", "def{ab}", NULL
);
173 if (uset_containsAllCodePoints(set
, STR_bc
, STR_bc_LEN
)){
174 log_err("set should not conatin all characters of \"bc\" \n");
178 set2
= uset_open(1, 1);
182 uset_applyPropertyAlias(set2
, PAT_lb
, PAT_lb_LEN
, VAL_SP
, VAL_SP_LEN
, &ec
);
183 expect(set2
, " ", "abcdefghi{bc}", NULL
);
186 uset_set(set2
, 0x0061, 0x0063);
188 uset_set(set
, 0x0067, 0x0069);
191 if (uset_containsSome(set
, set2
)) {
192 log_err("set should not contain some of set2 yet\n");
194 uset_complementAll(set
, set2
);
195 if (!uset_containsSome(set
, set2
)) {
196 log_err("set should contain some of set2\n");
198 expect(set
, "abcghi", "def{bc}", NULL
);
201 uset_removeAll(set
, set2
);
202 expect(set
, "ghi", "abcdef{bc}", NULL
);
205 uset_addAll(set2
, set
);
206 expect(set2
, "abcghi", "def{bc}", NULL
);
209 uset_retainAll(set2
, set
);
210 expect(set2
, "ghi", "abcdef{bc}", NULL
);
216 /*------------------------------------------------------------------
218 *------------------------------------------------------------------*/
221 * Verifies that the given set contains the characters and strings in
222 * inList, and does not contain those in outList. Also verifies that
223 * 'set' is not NULL and that 'ec' succeeds.
224 * @param set the set to test, or NULL (on error)
225 * @param inList list of set contents, in iteration order. Format is
226 * list of individual strings, in iteration order, followed by sorted
227 * list of strings, delimited by {}. This means we do not test
228 * characters '{' or '}' and we do not test strings containing those
230 * @param outList list of things not in the set. Same format as
232 * @param ec an error code, checked for success. May be NULL in which
233 * case it is ignored.
235 static void expect(const USet
* set
,
239 if (ec
!=NULL
&& U_FAILURE(*ec
)) {
240 log_err("FAIL: %s\n", u_errorName(*ec
));
244 log_err("FAIL: USet is NULL\n");
247 expectContainment(set
, inList
, TRUE
);
248 expectContainment(set
, outList
, FALSE
);
249 expectItems(set
, inList
);
252 static void expectContainment(const USet
* set
,
255 const char* p
= list
;
259 int32_t rangeStart
= -1, rangeEnd
= -1, length
;
262 length
= uset_toPattern(set
, ustr
, sizeof(ustr
), TRUE
, &ec
);
264 log_err("FAIL: uset_toPattern() fails in expectContainment() - %s\n", u_errorName(ec
));
267 pat
=aescstrdup(ustr
, length
);
271 const char* stringStart
= ++p
;
272 int32_t stringLength
= 0;
275 while (*p
++ != '}') {
277 stringLength
= (int32_t)(p
- stringStart
- 1);
278 strncpy(strCopy
, stringStart
, stringLength
);
279 strCopy
[stringLength
] = 0;
281 u_charsToUChars(stringStart
, ustr
, stringLength
);
283 if (uset_containsString(set
, ustr
, stringLength
) == isIn
) {
284 log_verbose("Ok: %s %s \"%s\"\n", pat
,
285 (isIn
? "contains" : "does not contain"),
288 log_data_err("FAIL: %s %s \"%s\" (Are you missing data?)\n", pat
,
289 (isIn
? "does not contain" : "contains"),
297 u_charsToUChars(p
, ustr
, 1);
300 if (uset_contains(set
, c
) == isIn
) {
301 log_verbose("Ok: %s %s '%c'\n", pat
,
302 (isIn
? "contains" : "does not contain"),
305 log_data_err("FAIL: %s %s '%c' (Are you missing data?)\n", pat
,
306 (isIn
? "does not contain" : "contains"),
310 /* Test the range API too by looking for ranges */
311 if (c
== rangeEnd
+1) {
314 if (rangeStart
>= 0) {
315 if (uset_containsRange(set
, rangeStart
, rangeEnd
) == isIn
) {
316 log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat
,
317 (isIn
? "contains" : "does not contain"),
318 rangeStart
, rangeEnd
);
320 log_data_err("FAIL: %s %s U+%04X-U+%04X (Are you missing data?)\n", pat
,
321 (isIn
? "does not contain" : "contains"),
322 rangeStart
, rangeEnd
);
325 rangeStart
= rangeEnd
= c
;
332 if (rangeStart
>= 0) {
333 if (uset_containsRange(set
, rangeStart
, rangeEnd
) == isIn
) {
334 log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat
,
335 (isIn
? "contains" : "does not contain"),
336 rangeStart
, rangeEnd
);
338 log_data_err("FAIL: %s %s U+%04X-U+%04X (Are you missing data?)\n", pat
,
339 (isIn
? "does not contain" : "contains"),
340 rangeStart
, rangeEnd
);
345 /* This only works for invariant BMP chars */
346 static char oneUCharToChar(UChar32 c
) {
350 u_UCharsToChars(ubuf
, buf
, 1);
354 static void expectItems(const USet
* set
,
356 const char* p
= items
;
357 UChar ustr
[4096], itemStr
[4096];
361 int32_t expectedSize
= 0;
362 int32_t itemCount
= uset_getItemCount(set
);
363 int32_t itemIndex
= 0;
364 UChar32 start
= 1, end
= 0;
365 int32_t itemLen
= 0, length
;
368 length
= uset_toPattern(set
, ustr
, sizeof(ustr
), TRUE
, &ec
);
370 log_err("FAIL: uset_toPattern => %s\n", u_errorName(ec
));
373 pat
=aescstrdup(ustr
, length
);
375 if (uset_isEmpty(set
) != (strlen(items
)==0)) {
376 log_data_err("FAIL: %s should return %s from isEmpty (Are you missing data?)\n",
378 strlen(items
)==0 ? "TRUE" : "FALSE");
381 /* Don't test patterns starting with "[^" */
382 if (u_strlen(ustr
) > 2 && ustr
[1] == 0x5e /*'^'*/) {
390 if (start
> end
|| start
== -1) {
391 /* Fetch our next item */
392 if (itemIndex
>= itemCount
) {
393 log_data_err("FAIL: ran out of items iterating %s (Are you missing data?)\n", pat
);
397 itemLen
= uset_getItem(set
, itemIndex
, &start
, &end
,
398 itemStr
, sizeof(itemStr
), &ec
);
399 if (U_FAILURE(ec
) || itemLen
< 0) {
400 log_err("FAIL: uset_getItem => %s\n", u_errorName(ec
));
405 log_verbose("Ok: %s item %d is %c-%c\n", pat
,
406 itemIndex
, oneUCharToChar(start
),
407 oneUCharToChar(end
));
409 itemStr
[itemLen
] = 0;
410 u_UCharsToChars(itemStr
, buf
, itemLen
+1);
411 log_verbose("Ok: %s item %d is \"%s\"\n", pat
, itemIndex
, buf
);
418 const char* stringStart
= ++p
;
419 int32_t stringLength
= 0;
422 while (*p
++ != '}') {
424 stringLength
= (int32_t)(p
- stringStart
- 1);
425 strncpy(strCopy
, stringStart
, stringLength
);
426 strCopy
[stringLength
] = 0;
428 u_charsToUChars(stringStart
, ustr
, stringLength
);
429 ustr
[stringLength
] = 0;
432 log_err("FAIL: for %s expect \"%s\" next, but got a char\n",
437 if (u_strcmp(ustr
, itemStr
) != 0) {
438 log_err("FAIL: for %s expect \"%s\" next\n",
447 u_charsToUChars(p
, ustr
, 1);
451 log_err("FAIL: for %s expect '%c' next, but got a string\n",
457 log_err("FAIL: for %s expect '%c' next\n",
466 if (uset_size(set
) == expectedSize
) {
467 log_verbose("Ok: %s size is %d\n", pat
, expectedSize
);
469 log_err("FAIL: %s size is %d, expected %d\n",
470 pat
, uset_size(set
), expectedSize
);
476 uint16_t buffer
[1000];
479 UErrorCode errorCode
;
483 /* use a pattern that generates both BMP and supplementary code points */
484 U_STRING_DECL(pattern
, "[:Cf:]", 6);
485 U_STRING_INIT(pattern
, "[:Cf:]", 6);
487 errorCode
=U_ZERO_ERROR
;
488 set
=uset_openPattern(pattern
, -1, &errorCode
);
489 if(U_FAILURE(errorCode
)) {
490 log_data_err("uset_openPattern([:Cf:]) failed - %s (Are you missing data?)\n", u_errorName(errorCode
));
494 length
=uset_serialize(set
, buffer
, UPRV_LENGTHOF(buffer
), &errorCode
);
495 if(U_FAILURE(errorCode
)) {
496 log_err("unable to uset_serialize([:Cf:]) - %s\n", u_errorName(errorCode
));
501 uset_getSerializedSet(&sset
, buffer
, length
);
502 for(c
=0; c
<=0x10ffff; ++c
) {
503 if(uset_contains(set
, c
)!=uset_serializedContains(&sset
, c
)) {
504 log_err("uset_contains(U+%04x)!=uset_serializedContains(U+%04x)\n", c
);
513 * Make sure that when non-invariant chars are passed to uset_openPattern
514 * they do not cause an ugly failure mode (e.g. assertion failure).
518 TestNonInvariantPattern() {
519 UErrorCode ec
= U_ZERO_ERROR
;
520 /* The critical part of this test is that the following pattern
521 must contain a non-invariant character. */
522 static const char *pattern
= "[:ccc!=0:]";
524 int32_t len
= u_unescape(pattern
, buf
, 256);
525 /* This test 'fails' by having an assertion failure within the
526 following call. It passes by running to completion with no
527 assertion failure. */
528 USet
*set
= uset_openPattern(buf
, len
, &ec
);
532 static void TestBadPattern(void) {
533 UErrorCode status
= U_ZERO_ERROR
;
535 U_STRING_DECL(pattern
, "[", 1);
536 U_STRING_INIT(pattern
, "[", 1);
537 pat
= uset_openPatternOptions(pattern
, u_strlen(pattern
), 0, &status
);
538 if (pat
!= NULL
|| U_SUCCESS(status
)) {
539 log_err("uset_openPatternOptions did not fail as expected %s\n", u_errorName(status
));
543 static USet
*openIDSet() {
544 UErrorCode errorCode
= U_ZERO_ERROR
;
545 U_STRING_DECL(pattern
, "[:ID_Continue:]", 15);
546 U_STRING_INIT(pattern
, "[:ID_Continue:]", 15);
547 return uset_openPattern(pattern
, 15, &errorCode
);
550 static void TestFreezable() {
558 log_data_err("openIDSet() returned NULL. (Are you missing data?)\n");
563 frozen
=uset_clone(idSet
);
565 if (frozen
== NULL
) {
566 log_err("uset_Clone() returned NULL\n");
570 if(!uset_equals(frozen
, idSet
)) {
571 log_err("uset_clone() did not make an equal copy\n");
575 uset_addRange(frozen
, 0xd802, 0xd805);
577 if(uset_isFrozen(idSet
) || !uset_isFrozen(frozen
) || !uset_equals(frozen
, idSet
)) {
578 log_err("uset_freeze() or uset_isFrozen() does not work\n");
581 thawed
=uset_cloneAsThawed(frozen
);
583 if (thawed
== NULL
) {
584 log_err("uset_cloneAsThawed(frozen) returned NULL");
590 uset_addRange(thawed
, 0xd802, 0xd805);
592 if(uset_isFrozen(thawed
) || uset_equals(thawed
, idSet
) || !uset_containsRange(thawed
, 0xd802, 0xd805)) {
593 log_err("uset_cloneAsThawed() does not work\n");
601 static void TestSpan() {
602 static const UChar s16
[2]={ 0xe01, 0x3000 };
603 static const char* s8
="\xE0\xB8\x81\xE3\x80\x80";
605 USet
*idSet
=openIDSet();
608 log_data_err("openIDSet() returned NULL (Are you missing data?)\n");
613 1!=uset_span(idSet
, s16
, 2, USET_SPAN_CONTAINED
) ||
614 0!=uset_span(idSet
, s16
, 2, USET_SPAN_NOT_CONTAINED
) ||
615 2!=uset_spanBack(idSet
, s16
, 2, USET_SPAN_CONTAINED
) ||
616 1!=uset_spanBack(idSet
, s16
, 2, USET_SPAN_NOT_CONTAINED
)
618 log_err("uset_span() or uset_spanBack() does not work\n");
622 3!=uset_spanUTF8(idSet
, s8
, 6, USET_SPAN_CONTAINED
) ||
623 0!=uset_spanUTF8(idSet
, s8
, 6, USET_SPAN_NOT_CONTAINED
) ||
624 6!=uset_spanBackUTF8(idSet
, s8
, 6, USET_SPAN_CONTAINED
) ||
625 3!=uset_spanBackUTF8(idSet
, s8
, 6, USET_SPAN_NOT_CONTAINED
)
627 log_err("uset_spanUTF8() or uset_spanBackUTF8() does not work\n");
633 1!=uset_span(idSet
, s16
, 2, USET_SPAN_CONTAINED
) ||
634 0!=uset_span(idSet
, s16
, 2, USET_SPAN_NOT_CONTAINED
) ||
635 2!=uset_spanBack(idSet
, s16
, 2, USET_SPAN_CONTAINED
) ||
636 1!=uset_spanBack(idSet
, s16
, 2, USET_SPAN_NOT_CONTAINED
)
638 log_err("uset_span(frozen) or uset_spanBack(frozen) does not work\n");
642 3!=uset_spanUTF8(idSet
, s8
, 6, USET_SPAN_CONTAINED
) ||
643 0!=uset_spanUTF8(idSet
, s8
, 6, USET_SPAN_NOT_CONTAINED
) ||
644 6!=uset_spanBackUTF8(idSet
, s8
, 6, USET_SPAN_CONTAINED
) ||
645 3!=uset_spanBackUTF8(idSet
, s8
, 6, USET_SPAN_NOT_CONTAINED
)
647 log_err("uset_spanUTF8(frozen) or uset_spanBackUTF8(frozen) does not work\n");