]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/usettest.c
ICU-6.2.4.tar.gz
[apple/icu.git] / icuSources / test / cintltst / usettest.c
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
374ca955 3* Copyright (c) 2002-2004, International Business Machines
b75a7d8f
A
4* Corporation and others. All Rights Reserved.
5**********************************************************************
b75a7d8f
A
6*/
7#include "unicode/uset.h"
8#include "unicode/ustring.h"
9#include "cintltst.h"
10#include <stdlib.h>
11#include <string.h>
12
13#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
14
15#define TEST(x) addTest(root, &x, "uset/" # x)
16
17static void TestAPI(void);
18static void Testj2269(void);
19static void TestSerialized(void);
374ca955 20static void TestNonInvariantPattern(void);
b75a7d8f
A
21
22void addUSetTest(TestNode** root);
23
24static void expect(const USet* set,
25 const char* inList,
26 const char* outList,
27 UErrorCode* ec);
28static void expectContainment(const USet* set,
29 const char* list,
30 UBool isIn);
31static char oneUCharToChar(UChar32 c);
32static void expectItems(const USet* set,
33 const char* items);
34
35void
36addUSetTest(TestNode** root) {
37 TEST(TestAPI);
38 TEST(Testj2269);
39 TEST(TestSerialized);
374ca955 40 TEST(TestNonInvariantPattern);
b75a7d8f
A
41}
42
43/*------------------------------------------------------------------
44 * Tests
45 *------------------------------------------------------------------*/
46
47static void Testj2269() {
48 UErrorCode status = U_ZERO_ERROR;
49 UChar a[4] = { 0x61, 0x62, 0x63, 0 };
50 USet *s = uset_open(1, 0);
51 uset_addString(s, a, 3);
52 a[0] = 0x63; a[1] = 0x63;
53 expect(s, "{abc}", "{ccc}", &status);
54 uset_close(s);
55}
56
57static const UChar PAT[] = {91,97,45,99,123,97,98,125,93,0}; /* "[a-c{ab}]" */
58static const int32_t PAT_LEN = (sizeof(PAT) / sizeof(PAT[0])) - 1;
59
374ca955
A
60static const UChar PAT_lb[] = {0x6C, 0x62, 0}; /* "lb" */
61static const int32_t PAT_lb_LEN = (sizeof(PAT_lb) / sizeof(PAT_lb[0])) - 1;
62
63static const UChar VAL_SP[] = {0x53, 0x50, 0}; /* "SP" */
64static const int32_t VAL_SP_LEN = (sizeof(VAL_SP) / sizeof(VAL_SP[0])) - 1;
65
b75a7d8f
A
66static const UChar STR_bc[] = {98,99,0}; /* "bc" */
67static const int32_t STR_bc_LEN = (sizeof(STR_bc) / sizeof(STR_bc[0])) - 1;
68
69static const UChar STR_ab[] = {97,98,0}; /* "ab" */
70static const int32_t STR_ab_LEN = (sizeof(STR_ab) / sizeof(STR_ab[0])) - 1;
71
72/**
73 * Basic API test for uset.x
74 */
75static void TestAPI() {
76 USet* set;
374ca955 77 USet* set2;
b75a7d8f
A
78 UErrorCode ec;
79
80 /* [] */
81 set = uset_open(1, 1);
82 uset_clear(set);
83 expect(set, "", "abc{ab}", NULL);
84 uset_close(set);
85
86 /* [ABC] */
87 set = uset_open(0x0041, 0x0043);
88 expect(set, "ABC", "DEF{ab}", NULL);
89 uset_close(set);
90
91 /* [a-c{ab}] */
92 ec = U_ZERO_ERROR;
93 set = uset_openPattern(PAT, PAT_LEN, &ec);
94 if(U_FAILURE(ec)) {
95 log_data_err("uset_openPattern([a-c{ab}]) failed - %s\n", u_errorName(ec));
96 return;
97 }
374ca955
A
98 if(!uset_resemblesPattern(PAT, PAT_LEN, 0)) {
99 log_data_err("uset_resemblesPattern of PAT failed\n");
100 }
b75a7d8f
A
101 expect(set, "abc{ab}", "def{bc}", &ec);
102
103 /* [a-d{ab}] */
104 uset_add(set, 0x64);
105 expect(set, "abcd{ab}", "ef{bc}", NULL);
106
107 /* [acd{ab}{bc}] */
108 uset_remove(set, 0x62);
109 uset_addString(set, STR_bc, STR_bc_LEN);
110 expect(set, "acd{ab}{bc}", "bef{cd}", NULL);
111
112 /* [acd{bc}] */
113 uset_removeString(set, STR_ab, STR_ab_LEN);
114 expect(set, "acd{bc}", "bfg{ab}", NULL);
115
116 /* [^acd{bc}] */
117 uset_complement(set);
118 expect(set, "bef{bc}", "acd{ac}", NULL);
119
120 /* [a-e{bc}] */
121 uset_complement(set);
122 uset_addRange(set, 0x0062, 0x0065);
123 expect(set, "abcde{bc}", "fg{ab}", NULL);
124
125 /* [de{bc}] */
126 uset_removeRange(set, 0x0050, 0x0063);
127 expect(set, "de{bc}", "bcfg{ab}", NULL);
128
374ca955
A
129 /* [g-l] */
130 uset_set(set, 0x0067, 0x006C);
131 expect(set, "ghijkl", "de{bc}", NULL);
132
133 if (uset_indexOf(set, 0x0067) != 0) {
134 log_data_err("uset_indexOf failed finding correct index of 'g'\n");
135 }
136
137 if (uset_charAt(set, 0) != 0x0067) {
138 log_data_err("uset_charAt failed finding correct char 'g' at index 0\n");
139 }
140
141 /* How to test this one...? */
142 uset_compact(set);
143
144 /* [g-i] */
145 uset_retain(set, 0x0067, 0x0069);
146 expect(set, "ghi", "dejkl{bc}", NULL);
147
148 /* UCHAR_ASCII_HEX_DIGIT */
149 uset_applyIntPropertyValue(set, UCHAR_ASCII_HEX_DIGIT, 1, &ec);
150 if(U_FAILURE(ec)) {
151 log_data_err("uset_applyIntPropertyValue([UCHAR_ASCII_HEX_DIGIT]) failed - %s\n", u_errorName(ec));
152 return;
153 }
154 expect(set, "0123456789ABCDEFabcdef", "GHIjkl{bc}", NULL);
155
156 /* [] */
157 set2 = uset_open(1, 1);
158 uset_clear(set2);
159
160 /* space */
161 uset_applyPropertyAlias(set2, PAT_lb, PAT_lb_LEN, VAL_SP, VAL_SP_LEN, &ec);
162 expect(set2, " ", "abcdefghi{bc}", NULL);
163
164 /* [a-c] */
165 uset_set(set2, 0x0061, 0x0063);
166 /* [g-i] */
167 uset_set(set, 0x0067, 0x0069);
168
169 /* [a-c g-i] */
170 uset_complementAll(set, set2);
171 expect(set, "abcghi", "def{bc}", NULL);
172
173 /* [g-i] */
174 uset_removeAll(set, set2);
175 expect(set, "ghi", "abcdef{bc}", NULL);
176
177 /* [a-c g-i] */
178 uset_addAll(set2, set);
179 expect(set2, "abcghi", "def{bc}", NULL);
180
181 /* [g-i] */
182 uset_retainAll(set2, set);
183 expect(set2, "ghi", "abcdef{bc}", NULL);
184
b75a7d8f 185 uset_close(set);
374ca955 186 uset_close(set2);
b75a7d8f
A
187}
188
189/*------------------------------------------------------------------
190 * Support
191 *------------------------------------------------------------------*/
192
193/**
194 * Verifies that the given set contains the characters and strings in
195 * inList, and does not contain those in outList. Also verifies that
196 * 'set' is not NULL and that 'ec' succeeds.
197 * @param set the set to test, or NULL (on error)
198 * @param inList list of set contents, in iteration order. Format is
199 * list of individual strings, in iteration order, followed by sorted
200 * list of strings, delimited by {}. This means we do not test
201 * characters '{' or '}' and we do not test strings containing those
202 * characters either.
203 * @param outList list of things not in the set. Same format as
204 * inList.
205 * @param ec an error code, checked for success. May be NULL in which
206 * case it is ignored.
207 */
208static void expect(const USet* set,
209 const char* inList,
210 const char* outList,
211 UErrorCode* ec) {
212 if (ec!=NULL && U_FAILURE(*ec)) {
213 log_err("FAIL: %s\n", u_errorName(*ec));
214 return;
215 }
216 if (set == NULL) {
217 log_err("FAIL: USet is NULL\n");
218 return;
219 }
220 expectContainment(set, inList, TRUE);
221 expectContainment(set, outList, FALSE);
222 expectItems(set, inList);
223}
224
225static void expectContainment(const USet* set,
226 const char* list,
227 UBool isIn) {
228 const char* p = list;
374ca955
A
229 UChar ustr[4096];
230 char *pat;
b75a7d8f 231 UErrorCode ec;
374ca955 232 int32_t rangeStart = -1, rangeEnd = -1, length;
b75a7d8f
A
233
234 ec = U_ZERO_ERROR;
374ca955
A
235 length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
236 if(U_FAILURE(ec)) {
237 log_err("FAIL: uset_toPattern() fails in expectContainment() - %s\n", u_errorName(ec));
238 return;
239 }
240 pat=aescstrdup(ustr, length);
b75a7d8f
A
241
242 while (*p) {
243 if (*p=='{') {
244 const char* stringStart = ++p;
245 int32_t stringLength = 0;
246 char strCopy[64];
247
374ca955
A
248 while (*p++ != '}') {
249 }
250 stringLength = (int32_t)(p - stringStart - 1);
b75a7d8f
A
251 strncpy(strCopy, stringStart, stringLength);
252 strCopy[stringLength] = 0;
253
254 u_charsToUChars(stringStart, ustr, stringLength);
255
256 if (uset_containsString(set, ustr, stringLength) == isIn) {
257 log_verbose("Ok: %s %s \"%s\"\n", pat,
258 (isIn ? "contains" : "does not contain"),
259 strCopy);
260 } else {
261 log_err("FAIL: %s %s \"%s\"\n", pat,
262 (isIn ? "does not contain" : "contains"),
263 strCopy);
264 }
265 }
266
267 else {
268 UChar32 c;
269
270 u_charsToUChars(p, ustr, 1);
271 c = ustr[0];
272
273 if (uset_contains(set, c) == isIn) {
274 log_verbose("Ok: %s %s '%c'\n", pat,
275 (isIn ? "contains" : "does not contain"),
276 *p);
277 } else {
278 log_err("FAIL: %s %s '%c'\n", pat,
279 (isIn ? "does not contain" : "contains"),
280 *p);
281 }
282
283 /* Test the range API too by looking for ranges */
284 if (c == rangeEnd+1) {
285 rangeEnd = c;
286 } else {
287 if (rangeStart >= 0) {
288 if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) {
289 log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat,
290 (isIn ? "contains" : "does not contain"),
291 rangeStart, rangeEnd);
292 } else {
293 log_err("FAIL: %s %s U+%04X-U+%04X\n", pat,
294 (isIn ? "does not contain" : "contains"),
295 rangeStart, rangeEnd);
296 }
297 }
298 rangeStart = rangeEnd = c;
299 }
300
301 ++p;
302 }
303 }
304
305 if (rangeStart >= 0) {
306 if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) {
307 log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat,
308 (isIn ? "contains" : "does not contain"),
309 rangeStart, rangeEnd);
310 } else {
311 log_err("FAIL: %s %s U+%04X-U+%04X\n", pat,
312 (isIn ? "does not contain" : "contains"),
313 rangeStart, rangeEnd);
314 }
315 }
316}
317
374ca955 318/* This only works for invariant BMP chars */
b75a7d8f
A
319static char oneUCharToChar(UChar32 c) {
320 UChar ubuf[1];
321 char buf[1];
322 ubuf[0] = (UChar) c;
323 u_UCharsToChars(ubuf, buf, 1);
324 return buf[0];
325}
326
327static void expectItems(const USet* set,
328 const char* items) {
329 const char* p = items;
374ca955
A
330 UChar ustr[4096], itemStr[4096];
331 char buf[4096];
332 char *pat;
b75a7d8f
A
333 UErrorCode ec;
334 int32_t expectedSize = 0;
335 int32_t itemCount = uset_getItemCount(set);
336 int32_t itemIndex = 0;
337 UChar32 start = 1, end = 0;
374ca955 338 int32_t itemLen = 0, length;
b75a7d8f
A
339
340 ec = U_ZERO_ERROR;
374ca955 341 length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
b75a7d8f
A
342 if (U_FAILURE(ec)) {
343 log_err("FAIL: uset_toPattern => %s\n", u_errorName(ec));
344 return;
345 }
374ca955 346 pat=aescstrdup(ustr, length);
b75a7d8f
A
347
348 if (uset_isEmpty(set) != (strlen(items)==0)) {
349 log_err("FAIL: %s should return %s from isEmpty\n",
350 pat,
351 strlen(items)==0 ? "TRUE" : "FALSE");
352 }
353
354 /* Don't test patterns starting with "[^" */
374ca955 355 if (u_strlen(ustr) > 2 && ustr[1] == 0x5e /*'^'*/) {
b75a7d8f
A
356 return;
357 }
358
359 while (*p) {
360
361 ++expectedSize;
362
363 if (start > end || start == -1) {
364 /* Fetch our next item */
365 if (itemIndex >= itemCount) {
366 log_err("FAIL: ran out of items iterating %s\n", pat);
367 return;
368 }
369
370 itemLen = uset_getItem(set, itemIndex, &start, &end,
371 itemStr, sizeof(itemStr), &ec);
372 if (U_FAILURE(ec) || itemLen < 0) {
373 log_err("FAIL: uset_getItem => %s\n", u_errorName(ec));
374 return;
375 }
376
377 if (itemLen == 0) {
378 log_verbose("Ok: %s item %d is %c-%c\n", pat,
379 itemIndex, oneUCharToChar(start),
380 oneUCharToChar(end));
381 } else {
382 itemStr[itemLen] = 0;
383 u_UCharsToChars(itemStr, buf, itemLen+1);
384 log_verbose("Ok: %s item %d is \"%s\"\n", pat, itemIndex, buf);
385 }
386
387 ++itemIndex;
388 }
389
390 if (*p=='{') {
391 const char* stringStart = ++p;
392 int32_t stringLength = 0;
393 char strCopy[64];
394
374ca955
A
395 while (*p++ != '}') {
396 }
397 stringLength = (int32_t)(p - stringStart - 1);
b75a7d8f
A
398 strncpy(strCopy, stringStart, stringLength);
399 strCopy[stringLength] = 0;
400
401 u_charsToUChars(stringStart, ustr, stringLength);
402 ustr[stringLength] = 0;
403
404 if (itemLen == 0) {
405 log_err("FAIL: for %s expect \"%s\" next, but got a char\n",
406 pat, strCopy);
407 return;
408 }
409
410 if (u_strcmp(ustr, itemStr) != 0) {
411 log_err("FAIL: for %s expect \"%s\" next\n",
412 pat, strCopy);
413 return;
414 }
415 }
416
417 else {
418 UChar32 c;
419
420 u_charsToUChars(p, ustr, 1);
421 c = ustr[0];
422
423 if (itemLen != 0) {
424 log_err("FAIL: for %s expect '%c' next, but got a string\n",
425 pat, *p);
426 return;
427 }
428
429 if (c != start++) {
430 log_err("FAIL: for %s expect '%c' next\n",
431 pat, *p);
432 return;
433 }
434
435 ++p;
436 }
437 }
438
439 if (uset_size(set) == expectedSize) {
440 log_verbose("Ok: %s size is %d\n", pat, expectedSize);
441 } else {
442 log_err("FAIL: %s size is %d, expected %d\n",
443 pat, uset_size(set), expectedSize);
444 }
445}
446
447static void
448TestSerialized() {
449 uint16_t buffer[1000];
450 USerializedSet sset;
451 USet *set;
452 UErrorCode errorCode;
453 UChar32 c;
454 int32_t length;
455
456 /* use a pattern that generates both BMP and supplementary code points */
457 U_STRING_DECL(pattern, "[:Cf:]", 6);
458 U_STRING_INIT(pattern, "[:Cf:]", 6);
459
460 errorCode=U_ZERO_ERROR;
461 set=uset_openPattern(pattern, -1, &errorCode);
462 if(U_FAILURE(errorCode)) {
463 log_data_err("uset_openPattern([:Cf:]) failed - %s\n", u_errorName(errorCode));
464 return;
465 }
466
467 length=uset_serialize(set, buffer, LENGTHOF(buffer), &errorCode);
468 if(U_FAILURE(errorCode)) {
469 log_err("unable to uset_serialize([:Cf:]) - %s\n", u_errorName(errorCode));
470 uset_close(set);
471 return;
472 }
473
474 uset_getSerializedSet(&sset, buffer, length);
475 for(c=0; c<=0x10ffff; ++c) {
476 if(uset_contains(set, c)!=uset_serializedContains(&sset, c)) {
477 log_err("uset_contains(U+%04x)!=uset_serializedContains(U+%04x)\n", c);
478 break;
479 }
480 }
481
482 uset_close(set);
483}
484
374ca955
A
485/**
486 * Make sure that when non-invariant chars are passed to uset_openPattern
487 * they do not cause an ugly failure mode (e.g. assertion failure).
488 * JB#3795.
489 */
490static void
491TestNonInvariantPattern() {
492 UErrorCode ec = U_ZERO_ERROR;
493 /* The critical part of this test is that the following pattern
494 must contain a non-invariant character. */
495 static const char *pattern = "[:ccc!=0:]";
496 UChar buf[256];
497 int32_t len = u_unescape(pattern, buf, 256);
498 /* This test 'fails' by having an assertion failure within the
499 following call. It passes by running to completion with no
500 assertion failure. */
501 USet *set = uset_openPattern(buf, len, &ec);
502 uset_close(set);
503}
504
b75a7d8f 505/*eof*/