]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ********************************************************************** | |
3 | * Copyright (c) 2002-2003, International Business Machines | |
4 | * Corporation and others. All Rights Reserved. | |
5 | ********************************************************************** | |
6 | * $Source: /cvs/root/ICU/icuSources/test/cintltst/usettest.c,v $ | |
7 | ********************************************************************** | |
8 | */ | |
9 | #include "unicode/uset.h" | |
10 | #include "unicode/ustring.h" | |
11 | #include "cintltst.h" | |
12 | #include <stdlib.h> | |
13 | #include <string.h> | |
14 | ||
15 | #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
16 | ||
17 | #define TEST(x) addTest(root, &x, "uset/" # x) | |
18 | ||
19 | static void TestAPI(void); | |
20 | static void Testj2269(void); | |
21 | static void TestSerialized(void); | |
22 | ||
23 | void addUSetTest(TestNode** root); | |
24 | ||
25 | static void expect(const USet* set, | |
26 | const char* inList, | |
27 | const char* outList, | |
28 | UErrorCode* ec); | |
29 | static void expectContainment(const USet* set, | |
30 | const char* list, | |
31 | UBool isIn); | |
32 | static char oneUCharToChar(UChar32 c); | |
33 | static void expectItems(const USet* set, | |
34 | const char* items); | |
35 | ||
36 | void | |
37 | addUSetTest(TestNode** root) { | |
38 | TEST(TestAPI); | |
39 | TEST(Testj2269); | |
40 | TEST(TestSerialized); | |
41 | } | |
42 | ||
43 | /*------------------------------------------------------------------ | |
44 | * Tests | |
45 | *------------------------------------------------------------------*/ | |
46 | ||
47 | static void Testj2269() { | |
48 | UErrorCode status = U_ZERO_ERROR; | |
49 | UChar a[4] = { 0x61, 0x62, 0x63, 0 }; | |
50 | USet *s = uset_open(1, 0); | |
51 | uset_addString(s, a, 3); | |
52 | a[0] = 0x63; a[1] = 0x63; | |
53 | expect(s, "{abc}", "{ccc}", &status); | |
54 | uset_close(s); | |
55 | } | |
56 | ||
57 | static const UChar PAT[] = {91,97,45,99,123,97,98,125,93,0}; /* "[a-c{ab}]" */ | |
58 | static const int32_t PAT_LEN = (sizeof(PAT) / sizeof(PAT[0])) - 1; | |
59 | ||
60 | static const UChar STR_bc[] = {98,99,0}; /* "bc" */ | |
61 | static const int32_t STR_bc_LEN = (sizeof(STR_bc) / sizeof(STR_bc[0])) - 1; | |
62 | ||
63 | static const UChar STR_ab[] = {97,98,0}; /* "ab" */ | |
64 | static const int32_t STR_ab_LEN = (sizeof(STR_ab) / sizeof(STR_ab[0])) - 1; | |
65 | ||
66 | /** | |
67 | * Basic API test for uset.x | |
68 | */ | |
69 | static void TestAPI() { | |
70 | USet* set; | |
71 | UErrorCode ec; | |
72 | ||
73 | /* [] */ | |
74 | set = uset_open(1, 1); | |
75 | uset_clear(set); | |
76 | expect(set, "", "abc{ab}", NULL); | |
77 | uset_close(set); | |
78 | ||
79 | /* [ABC] */ | |
80 | set = uset_open(0x0041, 0x0043); | |
81 | expect(set, "ABC", "DEF{ab}", NULL); | |
82 | uset_close(set); | |
83 | ||
84 | /* [a-c{ab}] */ | |
85 | ec = U_ZERO_ERROR; | |
86 | set = uset_openPattern(PAT, PAT_LEN, &ec); | |
87 | if(U_FAILURE(ec)) { | |
88 | log_data_err("uset_openPattern([a-c{ab}]) failed - %s\n", u_errorName(ec)); | |
89 | return; | |
90 | } | |
91 | expect(set, "abc{ab}", "def{bc}", &ec); | |
92 | ||
93 | /* [a-d{ab}] */ | |
94 | uset_add(set, 0x64); | |
95 | expect(set, "abcd{ab}", "ef{bc}", NULL); | |
96 | ||
97 | /* [acd{ab}{bc}] */ | |
98 | uset_remove(set, 0x62); | |
99 | uset_addString(set, STR_bc, STR_bc_LEN); | |
100 | expect(set, "acd{ab}{bc}", "bef{cd}", NULL); | |
101 | ||
102 | /* [acd{bc}] */ | |
103 | uset_removeString(set, STR_ab, STR_ab_LEN); | |
104 | expect(set, "acd{bc}", "bfg{ab}", NULL); | |
105 | ||
106 | /* [^acd{bc}] */ | |
107 | uset_complement(set); | |
108 | expect(set, "bef{bc}", "acd{ac}", NULL); | |
109 | ||
110 | /* [a-e{bc}] */ | |
111 | uset_complement(set); | |
112 | uset_addRange(set, 0x0062, 0x0065); | |
113 | expect(set, "abcde{bc}", "fg{ab}", NULL); | |
114 | ||
115 | /* [de{bc}] */ | |
116 | uset_removeRange(set, 0x0050, 0x0063); | |
117 | expect(set, "de{bc}", "bcfg{ab}", NULL); | |
118 | ||
119 | uset_close(set); | |
120 | } | |
121 | ||
122 | /*------------------------------------------------------------------ | |
123 | * Support | |
124 | *------------------------------------------------------------------*/ | |
125 | ||
126 | /** | |
127 | * Verifies that the given set contains the characters and strings in | |
128 | * inList, and does not contain those in outList. Also verifies that | |
129 | * 'set' is not NULL and that 'ec' succeeds. | |
130 | * @param set the set to test, or NULL (on error) | |
131 | * @param inList list of set contents, in iteration order. Format is | |
132 | * list of individual strings, in iteration order, followed by sorted | |
133 | * list of strings, delimited by {}. This means we do not test | |
134 | * characters '{' or '}' and we do not test strings containing those | |
135 | * characters either. | |
136 | * @param outList list of things not in the set. Same format as | |
137 | * inList. | |
138 | * @param ec an error code, checked for success. May be NULL in which | |
139 | * case it is ignored. | |
140 | */ | |
141 | static void expect(const USet* set, | |
142 | const char* inList, | |
143 | const char* outList, | |
144 | UErrorCode* ec) { | |
145 | if (ec!=NULL && U_FAILURE(*ec)) { | |
146 | log_err("FAIL: %s\n", u_errorName(*ec)); | |
147 | return; | |
148 | } | |
149 | if (set == NULL) { | |
150 | log_err("FAIL: USet is NULL\n"); | |
151 | return; | |
152 | } | |
153 | expectContainment(set, inList, TRUE); | |
154 | expectContainment(set, outList, FALSE); | |
155 | expectItems(set, inList); | |
156 | } | |
157 | ||
158 | static void expectContainment(const USet* set, | |
159 | const char* list, | |
160 | UBool isIn) { | |
161 | const char* p = list; | |
162 | UChar ustr[128]; | |
163 | char pat[128]; | |
164 | UErrorCode ec; | |
165 | int32_t rangeStart = -1, rangeEnd = -1; | |
166 | ||
167 | ec = U_ZERO_ERROR; | |
168 | uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec); | |
169 | u_UCharsToChars(ustr, pat, u_strlen(ustr)+1); | |
170 | ||
171 | while (*p) { | |
172 | if (*p=='{') { | |
173 | const char* stringStart = ++p; | |
174 | int32_t stringLength = 0; | |
175 | char strCopy[64]; | |
176 | ||
177 | while (*p++ != '}') {} | |
178 | stringLength = p - stringStart - 1; | |
179 | strncpy(strCopy, stringStart, stringLength); | |
180 | strCopy[stringLength] = 0; | |
181 | ||
182 | u_charsToUChars(stringStart, ustr, stringLength); | |
183 | ||
184 | if (uset_containsString(set, ustr, stringLength) == isIn) { | |
185 | log_verbose("Ok: %s %s \"%s\"\n", pat, | |
186 | (isIn ? "contains" : "does not contain"), | |
187 | strCopy); | |
188 | } else { | |
189 | log_err("FAIL: %s %s \"%s\"\n", pat, | |
190 | (isIn ? "does not contain" : "contains"), | |
191 | strCopy); | |
192 | } | |
193 | } | |
194 | ||
195 | else { | |
196 | UChar32 c; | |
197 | ||
198 | u_charsToUChars(p, ustr, 1); | |
199 | c = ustr[0]; | |
200 | ||
201 | if (uset_contains(set, c) == isIn) { | |
202 | log_verbose("Ok: %s %s '%c'\n", pat, | |
203 | (isIn ? "contains" : "does not contain"), | |
204 | *p); | |
205 | } else { | |
206 | log_err("FAIL: %s %s '%c'\n", pat, | |
207 | (isIn ? "does not contain" : "contains"), | |
208 | *p); | |
209 | } | |
210 | ||
211 | /* Test the range API too by looking for ranges */ | |
212 | if (c == rangeEnd+1) { | |
213 | rangeEnd = c; | |
214 | } else { | |
215 | if (rangeStart >= 0) { | |
216 | if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) { | |
217 | log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat, | |
218 | (isIn ? "contains" : "does not contain"), | |
219 | rangeStart, rangeEnd); | |
220 | } else { | |
221 | log_err("FAIL: %s %s U+%04X-U+%04X\n", pat, | |
222 | (isIn ? "does not contain" : "contains"), | |
223 | rangeStart, rangeEnd); | |
224 | } | |
225 | } | |
226 | rangeStart = rangeEnd = c; | |
227 | } | |
228 | ||
229 | ++p; | |
230 | } | |
231 | } | |
232 | ||
233 | if (rangeStart >= 0) { | |
234 | if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) { | |
235 | log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat, | |
236 | (isIn ? "contains" : "does not contain"), | |
237 | rangeStart, rangeEnd); | |
238 | } else { | |
239 | log_err("FAIL: %s %s U+%04X-U+%04X\n", pat, | |
240 | (isIn ? "does not contain" : "contains"), | |
241 | rangeStart, rangeEnd); | |
242 | } | |
243 | } | |
244 | } | |
245 | ||
246 | /* This only works for BMP chars */ | |
247 | static char oneUCharToChar(UChar32 c) { | |
248 | UChar ubuf[1]; | |
249 | char buf[1]; | |
250 | ubuf[0] = (UChar) c; | |
251 | u_UCharsToChars(ubuf, buf, 1); | |
252 | return buf[0]; | |
253 | } | |
254 | ||
255 | static void expectItems(const USet* set, | |
256 | const char* items) { | |
257 | const char* p = items; | |
258 | UChar ustr[128], itemStr[128]; | |
259 | char pat[128], buf[128]; | |
260 | UErrorCode ec; | |
261 | int32_t expectedSize = 0; | |
262 | int32_t itemCount = uset_getItemCount(set); | |
263 | int32_t itemIndex = 0; | |
264 | UChar32 start = 1, end = 0; | |
265 | int32_t itemLen = 0; | |
266 | ||
267 | ec = U_ZERO_ERROR; | |
268 | uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec); | |
269 | if (U_FAILURE(ec)) { | |
270 | log_err("FAIL: uset_toPattern => %s\n", u_errorName(ec)); | |
271 | return; | |
272 | } | |
273 | u_UCharsToChars(ustr, pat, u_strlen(ustr)+1); | |
274 | ||
275 | if (uset_isEmpty(set) != (strlen(items)==0)) { | |
276 | log_err("FAIL: %s should return %s from isEmpty\n", | |
277 | pat, | |
278 | strlen(items)==0 ? "TRUE" : "FALSE"); | |
279 | } | |
280 | ||
281 | /* Don't test patterns starting with "[^" */ | |
282 | if (strlen(pat) > 2 && pat[1] == '^') { | |
283 | return; | |
284 | } | |
285 | ||
286 | while (*p) { | |
287 | ||
288 | ++expectedSize; | |
289 | ||
290 | if (start > end || start == -1) { | |
291 | /* Fetch our next item */ | |
292 | if (itemIndex >= itemCount) { | |
293 | log_err("FAIL: ran out of items iterating %s\n", pat); | |
294 | return; | |
295 | } | |
296 | ||
297 | itemLen = uset_getItem(set, itemIndex, &start, &end, | |
298 | itemStr, sizeof(itemStr), &ec); | |
299 | if (U_FAILURE(ec) || itemLen < 0) { | |
300 | log_err("FAIL: uset_getItem => %s\n", u_errorName(ec)); | |
301 | return; | |
302 | } | |
303 | ||
304 | if (itemLen == 0) { | |
305 | log_verbose("Ok: %s item %d is %c-%c\n", pat, | |
306 | itemIndex, oneUCharToChar(start), | |
307 | oneUCharToChar(end)); | |
308 | } else { | |
309 | itemStr[itemLen] = 0; | |
310 | u_UCharsToChars(itemStr, buf, itemLen+1); | |
311 | log_verbose("Ok: %s item %d is \"%s\"\n", pat, itemIndex, buf); | |
312 | } | |
313 | ||
314 | ++itemIndex; | |
315 | } | |
316 | ||
317 | if (*p=='{') { | |
318 | const char* stringStart = ++p; | |
319 | int32_t stringLength = 0; | |
320 | char strCopy[64]; | |
321 | ||
322 | while (*p++ != '}') {} | |
323 | stringLength = p - stringStart - 1; | |
324 | strncpy(strCopy, stringStart, stringLength); | |
325 | strCopy[stringLength] = 0; | |
326 | ||
327 | u_charsToUChars(stringStart, ustr, stringLength); | |
328 | ustr[stringLength] = 0; | |
329 | ||
330 | if (itemLen == 0) { | |
331 | log_err("FAIL: for %s expect \"%s\" next, but got a char\n", | |
332 | pat, strCopy); | |
333 | return; | |
334 | } | |
335 | ||
336 | if (u_strcmp(ustr, itemStr) != 0) { | |
337 | log_err("FAIL: for %s expect \"%s\" next\n", | |
338 | pat, strCopy); | |
339 | return; | |
340 | } | |
341 | } | |
342 | ||
343 | else { | |
344 | UChar32 c; | |
345 | ||
346 | u_charsToUChars(p, ustr, 1); | |
347 | c = ustr[0]; | |
348 | ||
349 | if (itemLen != 0) { | |
350 | log_err("FAIL: for %s expect '%c' next, but got a string\n", | |
351 | pat, *p); | |
352 | return; | |
353 | } | |
354 | ||
355 | if (c != start++) { | |
356 | log_err("FAIL: for %s expect '%c' next\n", | |
357 | pat, *p); | |
358 | return; | |
359 | } | |
360 | ||
361 | ++p; | |
362 | } | |
363 | } | |
364 | ||
365 | if (uset_size(set) == expectedSize) { | |
366 | log_verbose("Ok: %s size is %d\n", pat, expectedSize); | |
367 | } else { | |
368 | log_err("FAIL: %s size is %d, expected %d\n", | |
369 | pat, uset_size(set), expectedSize); | |
370 | } | |
371 | } | |
372 | ||
373 | static void | |
374 | TestSerialized() { | |
375 | uint16_t buffer[1000]; | |
376 | USerializedSet sset; | |
377 | USet *set; | |
378 | UErrorCode errorCode; | |
379 | UChar32 c; | |
380 | int32_t length; | |
381 | ||
382 | /* use a pattern that generates both BMP and supplementary code points */ | |
383 | U_STRING_DECL(pattern, "[:Cf:]", 6); | |
384 | U_STRING_INIT(pattern, "[:Cf:]", 6); | |
385 | ||
386 | errorCode=U_ZERO_ERROR; | |
387 | set=uset_openPattern(pattern, -1, &errorCode); | |
388 | if(U_FAILURE(errorCode)) { | |
389 | log_data_err("uset_openPattern([:Cf:]) failed - %s\n", u_errorName(errorCode)); | |
390 | return; | |
391 | } | |
392 | ||
393 | length=uset_serialize(set, buffer, LENGTHOF(buffer), &errorCode); | |
394 | if(U_FAILURE(errorCode)) { | |
395 | log_err("unable to uset_serialize([:Cf:]) - %s\n", u_errorName(errorCode)); | |
396 | uset_close(set); | |
397 | return; | |
398 | } | |
399 | ||
400 | uset_getSerializedSet(&sset, buffer, length); | |
401 | for(c=0; c<=0x10ffff; ++c) { | |
402 | if(uset_contains(set, c)!=uset_serializedContains(&sset, c)) { | |
403 | log_err("uset_contains(U+%04x)!=uset_serializedContains(U+%04x)\n", c); | |
404 | break; | |
405 | } | |
406 | } | |
407 | ||
408 | uset_close(set); | |
409 | } | |
410 | ||
411 | /*eof*/ |