]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/usettest.c
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / test / cintltst / usettest.c
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
3* Copyright (c) 2002-2003, International Business Machines
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6* $Source: /cvs/root/ICU/icuSources/test/cintltst/usettest.c,v $
7**********************************************************************
8*/
9#include "unicode/uset.h"
10#include "unicode/ustring.h"
11#include "cintltst.h"
12#include <stdlib.h>
13#include <string.h>
14
15#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
16
17#define TEST(x) addTest(root, &x, "uset/" # x)
18
19static void TestAPI(void);
20static void Testj2269(void);
21static void TestSerialized(void);
22
23void addUSetTest(TestNode** root);
24
25static void expect(const USet* set,
26 const char* inList,
27 const char* outList,
28 UErrorCode* ec);
29static void expectContainment(const USet* set,
30 const char* list,
31 UBool isIn);
32static char oneUCharToChar(UChar32 c);
33static void expectItems(const USet* set,
34 const char* items);
35
36void
37addUSetTest(TestNode** root) {
38 TEST(TestAPI);
39 TEST(Testj2269);
40 TEST(TestSerialized);
41}
42
43/*------------------------------------------------------------------
44 * Tests
45 *------------------------------------------------------------------*/
46
47static void Testj2269() {
48 UErrorCode status = U_ZERO_ERROR;
49 UChar a[4] = { 0x61, 0x62, 0x63, 0 };
50 USet *s = uset_open(1, 0);
51 uset_addString(s, a, 3);
52 a[0] = 0x63; a[1] = 0x63;
53 expect(s, "{abc}", "{ccc}", &status);
54 uset_close(s);
55}
56
57static const UChar PAT[] = {91,97,45,99,123,97,98,125,93,0}; /* "[a-c{ab}]" */
58static const int32_t PAT_LEN = (sizeof(PAT) / sizeof(PAT[0])) - 1;
59
60static const UChar STR_bc[] = {98,99,0}; /* "bc" */
61static const int32_t STR_bc_LEN = (sizeof(STR_bc) / sizeof(STR_bc[0])) - 1;
62
63static const UChar STR_ab[] = {97,98,0}; /* "ab" */
64static const int32_t STR_ab_LEN = (sizeof(STR_ab) / sizeof(STR_ab[0])) - 1;
65
66/**
67 * Basic API test for uset.x
68 */
69static void TestAPI() {
70 USet* set;
71 UErrorCode ec;
72
73 /* [] */
74 set = uset_open(1, 1);
75 uset_clear(set);
76 expect(set, "", "abc{ab}", NULL);
77 uset_close(set);
78
79 /* [ABC] */
80 set = uset_open(0x0041, 0x0043);
81 expect(set, "ABC", "DEF{ab}", NULL);
82 uset_close(set);
83
84 /* [a-c{ab}] */
85 ec = U_ZERO_ERROR;
86 set = uset_openPattern(PAT, PAT_LEN, &ec);
87 if(U_FAILURE(ec)) {
88 log_data_err("uset_openPattern([a-c{ab}]) failed - %s\n", u_errorName(ec));
89 return;
90 }
91 expect(set, "abc{ab}", "def{bc}", &ec);
92
93 /* [a-d{ab}] */
94 uset_add(set, 0x64);
95 expect(set, "abcd{ab}", "ef{bc}", NULL);
96
97 /* [acd{ab}{bc}] */
98 uset_remove(set, 0x62);
99 uset_addString(set, STR_bc, STR_bc_LEN);
100 expect(set, "acd{ab}{bc}", "bef{cd}", NULL);
101
102 /* [acd{bc}] */
103 uset_removeString(set, STR_ab, STR_ab_LEN);
104 expect(set, "acd{bc}", "bfg{ab}", NULL);
105
106 /* [^acd{bc}] */
107 uset_complement(set);
108 expect(set, "bef{bc}", "acd{ac}", NULL);
109
110 /* [a-e{bc}] */
111 uset_complement(set);
112 uset_addRange(set, 0x0062, 0x0065);
113 expect(set, "abcde{bc}", "fg{ab}", NULL);
114
115 /* [de{bc}] */
116 uset_removeRange(set, 0x0050, 0x0063);
117 expect(set, "de{bc}", "bcfg{ab}", NULL);
118
119 uset_close(set);
120}
121
122/*------------------------------------------------------------------
123 * Support
124 *------------------------------------------------------------------*/
125
126/**
127 * Verifies that the given set contains the characters and strings in
128 * inList, and does not contain those in outList. Also verifies that
129 * 'set' is not NULL and that 'ec' succeeds.
130 * @param set the set to test, or NULL (on error)
131 * @param inList list of set contents, in iteration order. Format is
132 * list of individual strings, in iteration order, followed by sorted
133 * list of strings, delimited by {}. This means we do not test
134 * characters '{' or '}' and we do not test strings containing those
135 * characters either.
136 * @param outList list of things not in the set. Same format as
137 * inList.
138 * @param ec an error code, checked for success. May be NULL in which
139 * case it is ignored.
140 */
141static void expect(const USet* set,
142 const char* inList,
143 const char* outList,
144 UErrorCode* ec) {
145 if (ec!=NULL && U_FAILURE(*ec)) {
146 log_err("FAIL: %s\n", u_errorName(*ec));
147 return;
148 }
149 if (set == NULL) {
150 log_err("FAIL: USet is NULL\n");
151 return;
152 }
153 expectContainment(set, inList, TRUE);
154 expectContainment(set, outList, FALSE);
155 expectItems(set, inList);
156}
157
158static void expectContainment(const USet* set,
159 const char* list,
160 UBool isIn) {
161 const char* p = list;
162 UChar ustr[128];
163 char pat[128];
164 UErrorCode ec;
165 int32_t rangeStart = -1, rangeEnd = -1;
166
167 ec = U_ZERO_ERROR;
168 uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
169 u_UCharsToChars(ustr, pat, u_strlen(ustr)+1);
170
171 while (*p) {
172 if (*p=='{') {
173 const char* stringStart = ++p;
174 int32_t stringLength = 0;
175 char strCopy[64];
176
177 while (*p++ != '}') {}
178 stringLength = p - stringStart - 1;
179 strncpy(strCopy, stringStart, stringLength);
180 strCopy[stringLength] = 0;
181
182 u_charsToUChars(stringStart, ustr, stringLength);
183
184 if (uset_containsString(set, ustr, stringLength) == isIn) {
185 log_verbose("Ok: %s %s \"%s\"\n", pat,
186 (isIn ? "contains" : "does not contain"),
187 strCopy);
188 } else {
189 log_err("FAIL: %s %s \"%s\"\n", pat,
190 (isIn ? "does not contain" : "contains"),
191 strCopy);
192 }
193 }
194
195 else {
196 UChar32 c;
197
198 u_charsToUChars(p, ustr, 1);
199 c = ustr[0];
200
201 if (uset_contains(set, c) == isIn) {
202 log_verbose("Ok: %s %s '%c'\n", pat,
203 (isIn ? "contains" : "does not contain"),
204 *p);
205 } else {
206 log_err("FAIL: %s %s '%c'\n", pat,
207 (isIn ? "does not contain" : "contains"),
208 *p);
209 }
210
211 /* Test the range API too by looking for ranges */
212 if (c == rangeEnd+1) {
213 rangeEnd = c;
214 } else {
215 if (rangeStart >= 0) {
216 if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) {
217 log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat,
218 (isIn ? "contains" : "does not contain"),
219 rangeStart, rangeEnd);
220 } else {
221 log_err("FAIL: %s %s U+%04X-U+%04X\n", pat,
222 (isIn ? "does not contain" : "contains"),
223 rangeStart, rangeEnd);
224 }
225 }
226 rangeStart = rangeEnd = c;
227 }
228
229 ++p;
230 }
231 }
232
233 if (rangeStart >= 0) {
234 if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) {
235 log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat,
236 (isIn ? "contains" : "does not contain"),
237 rangeStart, rangeEnd);
238 } else {
239 log_err("FAIL: %s %s U+%04X-U+%04X\n", pat,
240 (isIn ? "does not contain" : "contains"),
241 rangeStart, rangeEnd);
242 }
243 }
244}
245
246/* This only works for BMP chars */
247static char oneUCharToChar(UChar32 c) {
248 UChar ubuf[1];
249 char buf[1];
250 ubuf[0] = (UChar) c;
251 u_UCharsToChars(ubuf, buf, 1);
252 return buf[0];
253}
254
255static void expectItems(const USet* set,
256 const char* items) {
257 const char* p = items;
258 UChar ustr[128], itemStr[128];
259 char pat[128], buf[128];
260 UErrorCode ec;
261 int32_t expectedSize = 0;
262 int32_t itemCount = uset_getItemCount(set);
263 int32_t itemIndex = 0;
264 UChar32 start = 1, end = 0;
265 int32_t itemLen = 0;
266
267 ec = U_ZERO_ERROR;
268 uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
269 if (U_FAILURE(ec)) {
270 log_err("FAIL: uset_toPattern => %s\n", u_errorName(ec));
271 return;
272 }
273 u_UCharsToChars(ustr, pat, u_strlen(ustr)+1);
274
275 if (uset_isEmpty(set) != (strlen(items)==0)) {
276 log_err("FAIL: %s should return %s from isEmpty\n",
277 pat,
278 strlen(items)==0 ? "TRUE" : "FALSE");
279 }
280
281 /* Don't test patterns starting with "[^" */
282 if (strlen(pat) > 2 && pat[1] == '^') {
283 return;
284 }
285
286 while (*p) {
287
288 ++expectedSize;
289
290 if (start > end || start == -1) {
291 /* Fetch our next item */
292 if (itemIndex >= itemCount) {
293 log_err("FAIL: ran out of items iterating %s\n", pat);
294 return;
295 }
296
297 itemLen = uset_getItem(set, itemIndex, &start, &end,
298 itemStr, sizeof(itemStr), &ec);
299 if (U_FAILURE(ec) || itemLen < 0) {
300 log_err("FAIL: uset_getItem => %s\n", u_errorName(ec));
301 return;
302 }
303
304 if (itemLen == 0) {
305 log_verbose("Ok: %s item %d is %c-%c\n", pat,
306 itemIndex, oneUCharToChar(start),
307 oneUCharToChar(end));
308 } else {
309 itemStr[itemLen] = 0;
310 u_UCharsToChars(itemStr, buf, itemLen+1);
311 log_verbose("Ok: %s item %d is \"%s\"\n", pat, itemIndex, buf);
312 }
313
314 ++itemIndex;
315 }
316
317 if (*p=='{') {
318 const char* stringStart = ++p;
319 int32_t stringLength = 0;
320 char strCopy[64];
321
322 while (*p++ != '}') {}
323 stringLength = p - stringStart - 1;
324 strncpy(strCopy, stringStart, stringLength);
325 strCopy[stringLength] = 0;
326
327 u_charsToUChars(stringStart, ustr, stringLength);
328 ustr[stringLength] = 0;
329
330 if (itemLen == 0) {
331 log_err("FAIL: for %s expect \"%s\" next, but got a char\n",
332 pat, strCopy);
333 return;
334 }
335
336 if (u_strcmp(ustr, itemStr) != 0) {
337 log_err("FAIL: for %s expect \"%s\" next\n",
338 pat, strCopy);
339 return;
340 }
341 }
342
343 else {
344 UChar32 c;
345
346 u_charsToUChars(p, ustr, 1);
347 c = ustr[0];
348
349 if (itemLen != 0) {
350 log_err("FAIL: for %s expect '%c' next, but got a string\n",
351 pat, *p);
352 return;
353 }
354
355 if (c != start++) {
356 log_err("FAIL: for %s expect '%c' next\n",
357 pat, *p);
358 return;
359 }
360
361 ++p;
362 }
363 }
364
365 if (uset_size(set) == expectedSize) {
366 log_verbose("Ok: %s size is %d\n", pat, expectedSize);
367 } else {
368 log_err("FAIL: %s size is %d, expected %d\n",
369 pat, uset_size(set), expectedSize);
370 }
371}
372
373static void
374TestSerialized() {
375 uint16_t buffer[1000];
376 USerializedSet sset;
377 USet *set;
378 UErrorCode errorCode;
379 UChar32 c;
380 int32_t length;
381
382 /* use a pattern that generates both BMP and supplementary code points */
383 U_STRING_DECL(pattern, "[:Cf:]", 6);
384 U_STRING_INIT(pattern, "[:Cf:]", 6);
385
386 errorCode=U_ZERO_ERROR;
387 set=uset_openPattern(pattern, -1, &errorCode);
388 if(U_FAILURE(errorCode)) {
389 log_data_err("uset_openPattern([:Cf:]) failed - %s\n", u_errorName(errorCode));
390 return;
391 }
392
393 length=uset_serialize(set, buffer, LENGTHOF(buffer), &errorCode);
394 if(U_FAILURE(errorCode)) {
395 log_err("unable to uset_serialize([:Cf:]) - %s\n", u_errorName(errorCode));
396 uset_close(set);
397 return;
398 }
399
400 uset_getSerializedSet(&sset, buffer, length);
401 for(c=0; c<=0x10ffff; ++c) {
402 if(uset_contains(set, c)!=uset_serializedContains(&sset, c)) {
403 log_err("uset_contains(U+%04x)!=uset_serializedContains(U+%04x)\n", c);
404 break;
405 }
406 }
407
408 uset_close(set);
409}
410
411/*eof*/