]>
git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/usettest.c
2 **********************************************************************
3 * Copyright (c) 2002-2004, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
7 #include "unicode/uset.h"
8 #include "unicode/ustring.h"
13 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
15 #define TEST(x) addTest(root, &x, "uset/" # x)
17 static void TestAPI(void);
18 static void Testj2269(void);
19 static void TestSerialized(void);
20 static void TestNonInvariantPattern(void);
22 void addUSetTest(TestNode
** root
);
24 static void expect(const USet
* set
,
28 static void expectContainment(const USet
* set
,
31 static char oneUCharToChar(UChar32 c
);
32 static void expectItems(const USet
* set
,
36 addUSetTest(TestNode
** root
) {
40 TEST(TestNonInvariantPattern
);
43 /*------------------------------------------------------------------
45 *------------------------------------------------------------------*/
47 static void Testj2269() {
48 UErrorCode status
= U_ZERO_ERROR
;
49 UChar a
[4] = { 0x61, 0x62, 0x63, 0 };
50 USet
*s
= uset_open(1, 0);
51 uset_addString(s
, a
, 3);
52 a
[0] = 0x63; a
[1] = 0x63;
53 expect(s
, "{abc}", "{ccc}", &status
);
57 static const UChar PAT
[] = {91,97,45,99,123,97,98,125,93,0}; /* "[a-c{ab}]" */
58 static const int32_t PAT_LEN
= (sizeof(PAT
) / sizeof(PAT
[0])) - 1;
60 static const UChar PAT_lb
[] = {0x6C, 0x62, 0}; /* "lb" */
61 static const int32_t PAT_lb_LEN
= (sizeof(PAT_lb
) / sizeof(PAT_lb
[0])) - 1;
63 static const UChar VAL_SP
[] = {0x53, 0x50, 0}; /* "SP" */
64 static const int32_t VAL_SP_LEN
= (sizeof(VAL_SP
) / sizeof(VAL_SP
[0])) - 1;
66 static const UChar STR_bc
[] = {98,99,0}; /* "bc" */
67 static const int32_t STR_bc_LEN
= (sizeof(STR_bc
) / sizeof(STR_bc
[0])) - 1;
69 static const UChar STR_ab
[] = {97,98,0}; /* "ab" */
70 static const int32_t STR_ab_LEN
= (sizeof(STR_ab
) / sizeof(STR_ab
[0])) - 1;
73 * Basic API test for uset.x
75 static void TestAPI() {
81 set
= uset_open(1, 1);
83 expect(set
, "", "abc{ab}", NULL
);
87 set
= uset_open(0x0041, 0x0043);
88 expect(set
, "ABC", "DEF{ab}", NULL
);
93 set
= uset_openPattern(PAT
, PAT_LEN
, &ec
);
95 log_data_err("uset_openPattern([a-c{ab}]) failed - %s\n", u_errorName(ec
));
98 if(!uset_resemblesPattern(PAT
, PAT_LEN
, 0)) {
99 log_data_err("uset_resemblesPattern of PAT failed\n");
101 expect(set
, "abc{ab}", "def{bc}", &ec
);
105 expect(set
, "abcd{ab}", "ef{bc}", NULL
);
108 uset_remove(set
, 0x62);
109 uset_addString(set
, STR_bc
, STR_bc_LEN
);
110 expect(set
, "acd{ab}{bc}", "bef{cd}", NULL
);
113 uset_removeString(set
, STR_ab
, STR_ab_LEN
);
114 expect(set
, "acd{bc}", "bfg{ab}", NULL
);
117 uset_complement(set
);
118 expect(set
, "bef{bc}", "acd{ac}", NULL
);
121 uset_complement(set
);
122 uset_addRange(set
, 0x0062, 0x0065);
123 expect(set
, "abcde{bc}", "fg{ab}", NULL
);
126 uset_removeRange(set
, 0x0050, 0x0063);
127 expect(set
, "de{bc}", "bcfg{ab}", NULL
);
130 uset_set(set
, 0x0067, 0x006C);
131 expect(set
, "ghijkl", "de{bc}", NULL
);
133 if (uset_indexOf(set
, 0x0067) != 0) {
134 log_data_err("uset_indexOf failed finding correct index of 'g'\n");
137 if (uset_charAt(set
, 0) != 0x0067) {
138 log_data_err("uset_charAt failed finding correct char 'g' at index 0\n");
141 /* How to test this one...? */
145 uset_retain(set
, 0x0067, 0x0069);
146 expect(set
, "ghi", "dejkl{bc}", NULL
);
148 /* UCHAR_ASCII_HEX_DIGIT */
149 uset_applyIntPropertyValue(set
, UCHAR_ASCII_HEX_DIGIT
, 1, &ec
);
151 log_data_err("uset_applyIntPropertyValue([UCHAR_ASCII_HEX_DIGIT]) failed - %s\n", u_errorName(ec
));
154 expect(set
, "0123456789ABCDEFabcdef", "GHIjkl{bc}", NULL
);
157 set2
= uset_open(1, 1);
161 uset_applyPropertyAlias(set2
, PAT_lb
, PAT_lb_LEN
, VAL_SP
, VAL_SP_LEN
, &ec
);
162 expect(set2
, " ", "abcdefghi{bc}", NULL
);
165 uset_set(set2
, 0x0061, 0x0063);
167 uset_set(set
, 0x0067, 0x0069);
170 uset_complementAll(set
, set2
);
171 expect(set
, "abcghi", "def{bc}", NULL
);
174 uset_removeAll(set
, set2
);
175 expect(set
, "ghi", "abcdef{bc}", NULL
);
178 uset_addAll(set2
, set
);
179 expect(set2
, "abcghi", "def{bc}", NULL
);
182 uset_retainAll(set2
, set
);
183 expect(set2
, "ghi", "abcdef{bc}", NULL
);
189 /*------------------------------------------------------------------
191 *------------------------------------------------------------------*/
194 * Verifies that the given set contains the characters and strings in
195 * inList, and does not contain those in outList. Also verifies that
196 * 'set' is not NULL and that 'ec' succeeds.
197 * @param set the set to test, or NULL (on error)
198 * @param inList list of set contents, in iteration order. Format is
199 * list of individual strings, in iteration order, followed by sorted
200 * list of strings, delimited by {}. This means we do not test
201 * characters '{' or '}' and we do not test strings containing those
203 * @param outList list of things not in the set. Same format as
205 * @param ec an error code, checked for success. May be NULL in which
206 * case it is ignored.
208 static void expect(const USet
* set
,
212 if (ec
!=NULL
&& U_FAILURE(*ec
)) {
213 log_err("FAIL: %s\n", u_errorName(*ec
));
217 log_err("FAIL: USet is NULL\n");
220 expectContainment(set
, inList
, TRUE
);
221 expectContainment(set
, outList
, FALSE
);
222 expectItems(set
, inList
);
225 static void expectContainment(const USet
* set
,
228 const char* p
= list
;
232 int32_t rangeStart
= -1, rangeEnd
= -1, length
;
235 length
= uset_toPattern(set
, ustr
, sizeof(ustr
), TRUE
, &ec
);
237 log_err("FAIL: uset_toPattern() fails in expectContainment() - %s\n", u_errorName(ec
));
240 pat
=aescstrdup(ustr
, length
);
244 const char* stringStart
= ++p
;
245 int32_t stringLength
= 0;
248 while (*p
++ != '}') {
250 stringLength
= (int32_t)(p
- stringStart
- 1);
251 strncpy(strCopy
, stringStart
, stringLength
);
252 strCopy
[stringLength
] = 0;
254 u_charsToUChars(stringStart
, ustr
, stringLength
);
256 if (uset_containsString(set
, ustr
, stringLength
) == isIn
) {
257 log_verbose("Ok: %s %s \"%s\"\n", pat
,
258 (isIn
? "contains" : "does not contain"),
261 log_err("FAIL: %s %s \"%s\"\n", pat
,
262 (isIn
? "does not contain" : "contains"),
270 u_charsToUChars(p
, ustr
, 1);
273 if (uset_contains(set
, c
) == isIn
) {
274 log_verbose("Ok: %s %s '%c'\n", pat
,
275 (isIn
? "contains" : "does not contain"),
278 log_err("FAIL: %s %s '%c'\n", pat
,
279 (isIn
? "does not contain" : "contains"),
283 /* Test the range API too by looking for ranges */
284 if (c
== rangeEnd
+1) {
287 if (rangeStart
>= 0) {
288 if (uset_containsRange(set
, rangeStart
, rangeEnd
) == isIn
) {
289 log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat
,
290 (isIn
? "contains" : "does not contain"),
291 rangeStart
, rangeEnd
);
293 log_err("FAIL: %s %s U+%04X-U+%04X\n", pat
,
294 (isIn
? "does not contain" : "contains"),
295 rangeStart
, rangeEnd
);
298 rangeStart
= rangeEnd
= c
;
305 if (rangeStart
>= 0) {
306 if (uset_containsRange(set
, rangeStart
, rangeEnd
) == isIn
) {
307 log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat
,
308 (isIn
? "contains" : "does not contain"),
309 rangeStart
, rangeEnd
);
311 log_err("FAIL: %s %s U+%04X-U+%04X\n", pat
,
312 (isIn
? "does not contain" : "contains"),
313 rangeStart
, rangeEnd
);
318 /* This only works for invariant BMP chars */
319 static char oneUCharToChar(UChar32 c
) {
323 u_UCharsToChars(ubuf
, buf
, 1);
327 static void expectItems(const USet
* set
,
329 const char* p
= items
;
330 UChar ustr
[4096], itemStr
[4096];
334 int32_t expectedSize
= 0;
335 int32_t itemCount
= uset_getItemCount(set
);
336 int32_t itemIndex
= 0;
337 UChar32 start
= 1, end
= 0;
338 int32_t itemLen
= 0, length
;
341 length
= uset_toPattern(set
, ustr
, sizeof(ustr
), TRUE
, &ec
);
343 log_err("FAIL: uset_toPattern => %s\n", u_errorName(ec
));
346 pat
=aescstrdup(ustr
, length
);
348 if (uset_isEmpty(set
) != (strlen(items
)==0)) {
349 log_err("FAIL: %s should return %s from isEmpty\n",
351 strlen(items
)==0 ? "TRUE" : "FALSE");
354 /* Don't test patterns starting with "[^" */
355 if (u_strlen(ustr
) > 2 && ustr
[1] == 0x5e /*'^'*/) {
363 if (start
> end
|| start
== -1) {
364 /* Fetch our next item */
365 if (itemIndex
>= itemCount
) {
366 log_err("FAIL: ran out of items iterating %s\n", pat
);
370 itemLen
= uset_getItem(set
, itemIndex
, &start
, &end
,
371 itemStr
, sizeof(itemStr
), &ec
);
372 if (U_FAILURE(ec
) || itemLen
< 0) {
373 log_err("FAIL: uset_getItem => %s\n", u_errorName(ec
));
378 log_verbose("Ok: %s item %d is %c-%c\n", pat
,
379 itemIndex
, oneUCharToChar(start
),
380 oneUCharToChar(end
));
382 itemStr
[itemLen
] = 0;
383 u_UCharsToChars(itemStr
, buf
, itemLen
+1);
384 log_verbose("Ok: %s item %d is \"%s\"\n", pat
, itemIndex
, buf
);
391 const char* stringStart
= ++p
;
392 int32_t stringLength
= 0;
395 while (*p
++ != '}') {
397 stringLength
= (int32_t)(p
- stringStart
- 1);
398 strncpy(strCopy
, stringStart
, stringLength
);
399 strCopy
[stringLength
] = 0;
401 u_charsToUChars(stringStart
, ustr
, stringLength
);
402 ustr
[stringLength
] = 0;
405 log_err("FAIL: for %s expect \"%s\" next, but got a char\n",
410 if (u_strcmp(ustr
, itemStr
) != 0) {
411 log_err("FAIL: for %s expect \"%s\" next\n",
420 u_charsToUChars(p
, ustr
, 1);
424 log_err("FAIL: for %s expect '%c' next, but got a string\n",
430 log_err("FAIL: for %s expect '%c' next\n",
439 if (uset_size(set
) == expectedSize
) {
440 log_verbose("Ok: %s size is %d\n", pat
, expectedSize
);
442 log_err("FAIL: %s size is %d, expected %d\n",
443 pat
, uset_size(set
), expectedSize
);
449 uint16_t buffer
[1000];
452 UErrorCode errorCode
;
456 /* use a pattern that generates both BMP and supplementary code points */
457 U_STRING_DECL(pattern
, "[:Cf:]", 6);
458 U_STRING_INIT(pattern
, "[:Cf:]", 6);
460 errorCode
=U_ZERO_ERROR
;
461 set
=uset_openPattern(pattern
, -1, &errorCode
);
462 if(U_FAILURE(errorCode
)) {
463 log_data_err("uset_openPattern([:Cf:]) failed - %s\n", u_errorName(errorCode
));
467 length
=uset_serialize(set
, buffer
, LENGTHOF(buffer
), &errorCode
);
468 if(U_FAILURE(errorCode
)) {
469 log_err("unable to uset_serialize([:Cf:]) - %s\n", u_errorName(errorCode
));
474 uset_getSerializedSet(&sset
, buffer
, length
);
475 for(c
=0; c
<=0x10ffff; ++c
) {
476 if(uset_contains(set
, c
)!=uset_serializedContains(&sset
, c
)) {
477 log_err("uset_contains(U+%04x)!=uset_serializedContains(U+%04x)\n", c
);
486 * Make sure that when non-invariant chars are passed to uset_openPattern
487 * they do not cause an ugly failure mode (e.g. assertion failure).
491 TestNonInvariantPattern() {
492 UErrorCode ec
= U_ZERO_ERROR
;
493 /* The critical part of this test is that the following pattern
494 must contain a non-invariant character. */
495 static const char *pattern
= "[:ccc!=0:]";
497 int32_t len
= u_unescape(pattern
, buf
, 256);
498 /* This test 'fails' by having an assertion failure within the
499 following call. It passes by running to completion with no
500 assertion failure. */
501 USet
*set
= uset_openPattern(buf
, len
, &ec
);