]>
Commit | Line | Data |
---|---|---|
729e4ab9 A |
1 | /******************************************************************** |
2 | * Copyright (c) 1997-2010, International Business Machines | |
3 | * Corporation and others. All Rights Reserved. | |
4 | ******************************************************************** | |
5 | * | |
6 | * File UCNVSELTST.C | |
7 | * | |
8 | * Modification History: | |
9 | * Name Description | |
10 | * MOHAMED ELDAWY Creation | |
11 | ******************************************************************** | |
12 | */ | |
13 | ||
14 | /* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/ | |
15 | ||
16 | #include "ucnvseltst.h" | |
17 | ||
18 | #include <stdio.h> | |
19 | ||
20 | #include "unicode/utypes.h" | |
21 | #include "unicode/ucnvsel.h" | |
22 | #include "unicode/ustring.h" | |
23 | #include "cmemory.h" | |
24 | #include "cstring.h" | |
25 | #include "propsvec.h" | |
26 | ||
27 | #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
28 | ||
29 | #define FILENAME_BUFFER 1024 | |
30 | ||
31 | #define TDSRCPATH ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING | |
32 | ||
33 | static void TestSelector(void); | |
34 | static void TestUPropsVector(void); | |
35 | void addCnvSelTest(TestNode** root); /* Declaration required to suppress compiler warnings. */ | |
36 | ||
37 | void addCnvSelTest(TestNode** root) | |
38 | { | |
39 | addTest(root, &TestSelector, "tsconv/ucnvseltst/TestSelector"); | |
40 | addTest(root, &TestUPropsVector, "tsconv/ucnvseltst/TestUPropsVector"); | |
41 | } | |
42 | ||
43 | static const char **gAvailableNames = NULL; | |
44 | static int32_t gCountAvailable = 0; | |
45 | ||
46 | static UBool | |
47 | getAvailableNames() { | |
48 | int32_t i; | |
49 | if (gAvailableNames != NULL) { | |
50 | return TRUE; | |
51 | } | |
52 | gCountAvailable = ucnv_countAvailable(); | |
53 | if (gCountAvailable == 0) { | |
54 | log_data_err("No converters available.\n"); | |
55 | return FALSE; | |
56 | } | |
57 | gAvailableNames = (const char **)uprv_malloc(gCountAvailable * sizeof(const char *)); | |
58 | if (gAvailableNames == NULL) { | |
59 | log_err("unable to allocate memory for %ld available converter names\n", | |
60 | (long)gCountAvailable); | |
61 | return FALSE; | |
62 | } | |
63 | for (i = 0; i < gCountAvailable; ++i) { | |
64 | gAvailableNames[i] = ucnv_getAvailableName(i); | |
65 | } | |
66 | return TRUE; | |
67 | } | |
68 | ||
69 | static void | |
70 | releaseAvailableNames() { | |
71 | uprv_free((void *)gAvailableNames); | |
72 | gAvailableNames = NULL; | |
73 | gCountAvailable = 0; | |
74 | } | |
75 | ||
76 | static const char ** | |
77 | getEncodings(int32_t start, int32_t step, int32_t count, int32_t *pCount) { | |
78 | const char **names; | |
79 | int32_t i; | |
80 | ||
81 | *pCount = 0; | |
82 | if (count <= 0) { | |
83 | return NULL; | |
84 | } | |
85 | names = (const char **)uprv_malloc(count * sizeof(char *)); | |
86 | if (names == NULL) { | |
87 | log_err("memory allocation error for %ld pointers\n", (long)count); | |
88 | return NULL; | |
89 | } | |
90 | if (step == 0 && count > 0) { | |
91 | step = 1; | |
92 | } | |
93 | for (i = 0; i < count; ++i) { | |
94 | if (0 <= start && start < gCountAvailable) { | |
95 | names[i] = gAvailableNames[start]; | |
96 | start += step; | |
97 | ++*pCount; | |
98 | } | |
99 | } | |
100 | return names; | |
101 | } | |
102 | ||
103 | #if 0 | |
104 | /* | |
105 | * ucnvsel_open() does not support "no encodings": | |
106 | * Given 0 encodings it will open a selector for all available ones. | |
107 | */ | |
108 | static const char ** | |
109 | getNoEncodings(int32_t *pCount) { | |
110 | *pCount = 0; | |
111 | return NULL; | |
112 | } | |
113 | #endif | |
114 | ||
115 | static const char ** | |
116 | getOneEncoding(int32_t *pCount) { | |
117 | return getEncodings(1, 0, 1, pCount); | |
118 | } | |
119 | ||
120 | static const char ** | |
121 | getFirstEvenEncodings(int32_t *pCount) { | |
122 | return getEncodings(0, 2, 25, pCount); | |
123 | } | |
124 | ||
125 | static const char ** | |
126 | getMiddleEncodings(int32_t *pCount) { | |
127 | return getEncodings(gCountAvailable - 12, 1, 22, pCount); | |
128 | } | |
129 | ||
130 | static const char ** | |
131 | getLastEncodings(int32_t *pCount) { | |
132 | return getEncodings(gCountAvailable - 1, -1, 25, pCount); | |
133 | } | |
134 | ||
135 | static const char ** | |
136 | getSomeEncodings(int32_t *pCount) { | |
137 | /* 20 evenly distributed */ | |
138 | return getEncodings(5, (gCountAvailable + 19)/ 20, 20, pCount); | |
139 | } | |
140 | ||
141 | static const char ** | |
142 | getEveryThirdEncoding(int32_t *pCount) { | |
143 | return getEncodings(2, 3, (gCountAvailable + 2 )/ 3, pCount); | |
144 | } | |
145 | ||
146 | static const char ** | |
147 | getAllEncodings(int32_t *pCount) { | |
148 | return getEncodings(0, 1, gCountAvailable, pCount); | |
149 | } | |
150 | ||
151 | typedef const char **GetEncodingsFn(int32_t *); | |
152 | ||
153 | static GetEncodingsFn *const getEncodingsFns[] = { | |
154 | getOneEncoding, | |
155 | getFirstEvenEncodings, | |
156 | getMiddleEncodings, | |
157 | getLastEncodings, | |
158 | getSomeEncodings, | |
159 | getEveryThirdEncoding, | |
160 | getAllEncodings | |
161 | }; | |
162 | ||
163 | static FILE *fopenOrError(const char *filename) { | |
164 | int32_t needLen; | |
165 | FILE *f; | |
166 | char fnbuf[FILENAME_BUFFER]; | |
167 | const char* directory= ctest_dataSrcDir(); | |
168 | needLen = uprv_strlen(directory)+uprv_strlen(TDSRCPATH)+uprv_strlen(filename)+1; | |
169 | if(needLen > FILENAME_BUFFER) { | |
170 | log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d but buffer is %d\n", | |
171 | filename, needLen, FILENAME_BUFFER); | |
172 | return NULL; | |
173 | } | |
174 | ||
175 | strcpy(fnbuf, directory); | |
176 | strcat(fnbuf, TDSRCPATH); | |
177 | strcat(fnbuf, filename); | |
178 | ||
179 | f = fopen(fnbuf, "rb"); | |
180 | ||
181 | if(f == NULL) { | |
182 | log_data_err("FAIL: Could not load %s [%s]\n", fnbuf, filename); | |
183 | } | |
184 | return f; | |
185 | } | |
186 | ||
187 | typedef struct TestText { | |
188 | char *text, *textLimit; | |
189 | char *limit; | |
190 | int32_t number; | |
191 | } TestText; | |
192 | ||
193 | static void | |
194 | text_reset(TestText *tt) { | |
195 | tt->limit = tt->text; | |
196 | tt->number = 0; | |
197 | } | |
198 | ||
199 | static char * | |
200 | text_nextString(TestText *tt, int32_t *pLength) { | |
201 | char *s = tt->limit; | |
202 | if (s == tt->textLimit) { | |
203 | /* we already delivered the last string */ | |
204 | return NULL; | |
205 | } else if (s == tt->text) { | |
206 | /* first string */ | |
207 | if ((tt->textLimit - tt->text) >= 3 && | |
208 | s[0] == (char)0xef && s[1] == (char)0xbb && s[2] == (char)0xbf | |
209 | ) { | |
210 | s += 3; /* skip the UTF-8 signature byte sequence (U+FEFF) */ | |
211 | } | |
212 | } else { | |
213 | /* skip the string terminator */ | |
214 | ++s; | |
215 | ++tt->number; | |
216 | } | |
217 | ||
218 | /* find the end of this string */ | |
219 | tt->limit = uprv_strchr(s, 0); | |
220 | *pLength = (int32_t)(tt->limit - s); | |
221 | return s; | |
222 | } | |
223 | ||
224 | static UBool | |
225 | text_open(TestText *tt) { | |
226 | FILE *f; | |
227 | char *s; | |
228 | int32_t length; | |
229 | uprv_memset(tt, 0, sizeof(TestText)); | |
230 | f = fopenOrError("ConverterSelectorTestUTF8.txt"); | |
231 | if(!f) { | |
232 | return FALSE; | |
233 | } | |
234 | fseek(f, 0, SEEK_END); | |
235 | length = (int32_t)ftell(f); | |
236 | fseek(f, 0, SEEK_SET); | |
237 | tt->text = (char *)uprv_malloc(length + 1); | |
238 | if (tt->text == NULL) { | |
239 | fclose(f); | |
240 | return FALSE; | |
241 | } | |
242 | if (length != fread(tt->text, 1, length, f)) { | |
243 | log_err("error reading %ld bytes from test text file\n", (long)length); | |
244 | length = 0; | |
245 | uprv_free(tt->text); | |
246 | } | |
247 | fclose(f); | |
248 | tt->textLimit = tt->text + length; | |
249 | *tt->textLimit = 0; | |
250 | /* replace all Unicode '#' (U+0023) with NUL */ | |
251 | for(s = tt->text; (s = uprv_strchr(s, 0x23)) != NULL; *s++ = 0) {} | |
252 | text_reset(tt); | |
253 | return TRUE; | |
254 | } | |
255 | ||
256 | static void | |
257 | text_close(TestText *tt) { | |
258 | uprv_free(tt->text); | |
259 | } | |
260 | ||
261 | static int32_t findIndex(const char* converterName) { | |
262 | int32_t i; | |
263 | for (i = 0 ; i < gCountAvailable; i++) { | |
264 | if(ucnv_compareNames(gAvailableNames[i], converterName) == 0) { | |
265 | return i; | |
266 | } | |
267 | } | |
268 | return -1; | |
269 | } | |
270 | ||
271 | static UBool * | |
272 | getResultsManually(const char** encodings, int32_t num_encodings, | |
273 | const char *utf8, int32_t length, | |
274 | const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) { | |
275 | UBool* resultsManually; | |
276 | int32_t i; | |
277 | ||
278 | resultsManually = (UBool*) uprv_malloc(gCountAvailable); | |
279 | uprv_memset(resultsManually, 0, gCountAvailable); | |
280 | ||
281 | for(i = 0 ; i < num_encodings ; i++) { | |
282 | UErrorCode status = U_ZERO_ERROR; | |
283 | /* get unicode set for that converter */ | |
284 | USet* set; | |
285 | UConverter* test_converter; | |
286 | UChar32 cp; | |
287 | int32_t encIndex, offset; | |
288 | ||
289 | set = uset_openEmpty(); | |
290 | test_converter = ucnv_open(encodings[i], &status); | |
291 | ucnv_getUnicodeSet(test_converter, set, | |
292 | whichSet, &status); | |
293 | if (excludedCodePoints != NULL) { | |
294 | uset_addAll(set, excludedCodePoints); | |
295 | } | |
296 | uset_freeze(set); | |
297 | offset = 0; | |
298 | cp = 0; | |
299 | ||
300 | encIndex = findIndex(encodings[i]); | |
301 | /* | |
302 | * The following is almost, but not entirely, the same as | |
303 | * resultsManually[encIndex] = | |
304 | * (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length); | |
305 | * They might be different if the set contains strings, | |
306 | * or if the utf8 string contains an illegal sequence. | |
307 | * | |
308 | * The UConverterSelector does not currently handle strings that can be | |
309 | * converted, and it treats an illegal sequence as convertible | |
310 | * while uset_spanUTF8() treats it like U+FFFD which may not be convertible. | |
311 | */ | |
312 | resultsManually[encIndex] = TRUE; | |
313 | while(offset<length) { | |
314 | U8_NEXT(utf8, offset, length, cp); | |
315 | if (cp >= 0 && !uset_contains(set, cp)) { | |
316 | resultsManually[encIndex] = FALSE; | |
317 | break; | |
318 | } | |
319 | } | |
320 | uset_close(set); | |
321 | ucnv_close(test_converter); | |
322 | } | |
323 | return resultsManually; | |
324 | } | |
325 | ||
326 | /* closes res but does not free resultsManually */ | |
327 | static void verifyResult(UEnumeration* res, const UBool *resultsManually) { | |
328 | UBool* resultsFromSystem = (UBool*) uprv_malloc(gCountAvailable * sizeof(UBool)); | |
329 | const char* name; | |
330 | UErrorCode status = U_ZERO_ERROR; | |
331 | int32_t i; | |
332 | ||
333 | /* fill the bool for the selector results! */ | |
334 | uprv_memset(resultsFromSystem, 0, gCountAvailable); | |
335 | while ((name = uenum_next(res,NULL, &status)) != NULL) { | |
336 | resultsFromSystem[findIndex(name)] = TRUE; | |
337 | } | |
338 | for(i = 0 ; i < gCountAvailable; i++) { | |
339 | if(resultsManually[i] != resultsFromSystem[i]) { | |
340 | log_err("failure in converter selector\n" | |
341 | "converter %s had conflicting results -- manual: %d, system %d\n", | |
342 | gAvailableNames[i], resultsManually[i], resultsFromSystem[i]); | |
343 | } | |
344 | } | |
345 | uprv_free(resultsFromSystem); | |
346 | uenum_close(res); | |
347 | } | |
348 | ||
349 | static UConverterSelector * | |
350 | serializeAndUnserialize(UConverterSelector *sel, char **buffer, UErrorCode *status) { | |
351 | char *new_buffer; | |
352 | int32_t ser_len, ser_len2; | |
353 | /* preflight */ | |
354 | ser_len = ucnvsel_serialize(sel, NULL, 0, status); | |
355 | if (*status != U_BUFFER_OVERFLOW_ERROR) { | |
356 | log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status)); | |
357 | return sel; | |
358 | } | |
359 | new_buffer = (char *)uprv_malloc(ser_len); | |
360 | *status = U_ZERO_ERROR; | |
361 | ser_len2 = ucnvsel_serialize(sel, new_buffer, ser_len, status); | |
362 | if (U_FAILURE(*status) || ser_len != ser_len2) { | |
363 | log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status)); | |
364 | uprv_free(new_buffer); | |
365 | return sel; | |
366 | } | |
367 | ucnvsel_close(sel); | |
368 | uprv_free(*buffer); | |
369 | *buffer = new_buffer; | |
370 | sel = ucnvsel_openFromSerialized(new_buffer, ser_len, status); | |
371 | if (U_FAILURE(*status)) { | |
372 | log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status)); | |
373 | return NULL; | |
374 | } | |
375 | return sel; | |
376 | } | |
377 | ||
378 | static void TestSelector() | |
379 | { | |
380 | TestText text; | |
381 | USet* excluded_sets[3] = { NULL }; | |
382 | int32_t i, testCaseIdx; | |
383 | ||
384 | if (!getAvailableNames()) { | |
385 | return; | |
386 | } | |
387 | if (!text_open(&text)) { | |
388 | releaseAvailableNames();; | |
389 | } | |
390 | ||
391 | excluded_sets[0] = uset_openEmpty(); | |
392 | for(i = 1 ; i < 3 ; i++) { | |
393 | excluded_sets[i] = uset_open(i*30, i*30+500); | |
394 | } | |
395 | ||
396 | for(testCaseIdx = 0; testCaseIdx < LENGTHOF(getEncodingsFns); testCaseIdx++) | |
397 | { | |
398 | int32_t excluded_set_id; | |
399 | int32_t num_encodings; | |
400 | const char **encodings = getEncodingsFns[testCaseIdx](&num_encodings); | |
401 | if (getTestOption(QUICK_OPTION) && num_encodings > 25) { | |
402 | uprv_free((void *)encodings); | |
403 | continue; | |
404 | } | |
405 | ||
406 | /* | |
407 | * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++) | |
408 | * | |
409 | * This loop was replaced by the following statement because | |
410 | * the loop made the test run longer without adding to the code coverage. | |
411 | * The handling of the exclusion set is independent of the | |
412 | * set of encodings, so there is no need to test every combination. | |
413 | */ | |
414 | excluded_set_id = testCaseIdx % LENGTHOF(excluded_sets); | |
415 | { | |
416 | UConverterSelector *sel_rt, *sel_fb; | |
417 | char *buffer_fb = NULL; | |
418 | UErrorCode status = U_ZERO_ERROR; | |
419 | sel_rt = ucnvsel_open(encodings, num_encodings, | |
420 | excluded_sets[excluded_set_id], | |
421 | UCNV_ROUNDTRIP_SET, &status); | |
422 | if (num_encodings == gCountAvailable) { | |
423 | /* test the special "all converters" parameter values */ | |
424 | sel_fb = ucnvsel_open(NULL, 0, | |
425 | excluded_sets[excluded_set_id], | |
426 | UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); | |
427 | } else if (uset_isEmpty(excluded_sets[excluded_set_id])) { | |
428 | /* test that a NULL set gives the same results as an empty set */ | |
429 | sel_fb = ucnvsel_open(encodings, num_encodings, | |
430 | NULL, | |
431 | UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); | |
432 | } else { | |
433 | sel_fb = ucnvsel_open(encodings, num_encodings, | |
434 | excluded_sets[excluded_set_id], | |
435 | UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); | |
436 | } | |
437 | if (U_FAILURE(status)) { | |
438 | log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx, u_errorName(status)); | |
439 | ucnvsel_close(sel_rt); | |
440 | uprv_free((void *)encodings); | |
441 | continue; | |
442 | } | |
443 | ||
444 | text_reset(&text); | |
445 | for (;;) { | |
446 | UBool *manual_rt, *manual_fb; | |
447 | static UChar utf16[10000]; | |
448 | char *s; | |
449 | int32_t length8, length16; | |
450 | ||
451 | s = text_nextString(&text, &length8); | |
452 | if (s == NULL || (getTestOption(QUICK_OPTION) && text.number > 3)) { | |
453 | break; | |
454 | } | |
455 | ||
456 | manual_rt = getResultsManually(encodings, num_encodings, | |
457 | s, length8, | |
458 | excluded_sets[excluded_set_id], | |
459 | UCNV_ROUNDTRIP_SET); | |
460 | manual_fb = getResultsManually(encodings, num_encodings, | |
461 | s, length8, | |
462 | excluded_sets[excluded_set_id], | |
463 | UCNV_ROUNDTRIP_AND_FALLBACK_SET); | |
464 | /* UTF-8 with length */ | |
465 | status = U_ZERO_ERROR; | |
466 | verifyResult(ucnvsel_selectForUTF8(sel_rt, s, length8, &status), manual_rt); | |
467 | verifyResult(ucnvsel_selectForUTF8(sel_fb, s, length8, &status), manual_fb); | |
468 | /* UTF-8 NUL-terminated */ | |
469 | verifyResult(ucnvsel_selectForUTF8(sel_rt, s, -1, &status), manual_rt); | |
470 | verifyResult(ucnvsel_selectForUTF8(sel_fb, s, -1, &status), manual_fb); | |
471 | ||
472 | u_strFromUTF8(utf16, LENGTHOF(utf16), &length16, s, length8, &status); | |
473 | if (U_FAILURE(status)) { | |
474 | log_err("error converting the test text (string %ld) to UTF-16 - %s\n", | |
475 | (long)text.number, u_errorName(status)); | |
476 | } else { | |
477 | if (text.number == 0) { | |
478 | sel_fb = serializeAndUnserialize(sel_fb, &buffer_fb, &status); | |
479 | } | |
480 | if (U_SUCCESS(status)) { | |
481 | /* UTF-16 with length */ | |
482 | verifyResult(ucnvsel_selectForString(sel_rt, utf16, length16, &status), manual_rt); | |
483 | verifyResult(ucnvsel_selectForString(sel_fb, utf16, length16, &status), manual_fb); | |
484 | /* UTF-16 NUL-terminated */ | |
485 | verifyResult(ucnvsel_selectForString(sel_rt, utf16, -1, &status), manual_rt); | |
486 | verifyResult(ucnvsel_selectForString(sel_fb, utf16, -1, &status), manual_fb); | |
487 | } | |
488 | } | |
489 | ||
490 | uprv_free(manual_rt); | |
491 | uprv_free(manual_fb); | |
492 | } | |
493 | ucnvsel_close(sel_rt); | |
494 | ucnvsel_close(sel_fb); | |
495 | uprv_free(buffer_fb); | |
496 | } | |
497 | uprv_free((void *)encodings); | |
498 | } | |
499 | ||
500 | releaseAvailableNames(); | |
501 | text_close(&text); | |
502 | for(i = 0 ; i < 3 ; i++) { | |
503 | uset_close(excluded_sets[i]); | |
504 | } | |
505 | } | |
506 | ||
507 | /* Improve code coverage of UPropsVectors */ | |
508 | static void TestUPropsVector() { | |
509 | uint32_t value; | |
510 | UErrorCode errorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
511 | UPropsVectors *pv = upvec_open(100, &errorCode); | |
512 | if (pv != NULL) { | |
513 | log_err("Should have returned NULL if UErrorCode is an error."); | |
514 | return; | |
515 | } | |
516 | errorCode = U_ZERO_ERROR; | |
517 | pv = upvec_open(-1, &errorCode); | |
518 | if (pv != NULL || U_SUCCESS(errorCode)) { | |
519 | log_err("Should have returned NULL if column is less than 0.\n"); | |
520 | return; | |
521 | } | |
522 | errorCode = U_ZERO_ERROR; | |
523 | pv = upvec_open(100, &errorCode); | |
524 | if (pv == NULL || U_FAILURE(errorCode)) { | |
525 | log_err("Unable to open UPropsVectors.\n"); | |
526 | return; | |
527 | } | |
528 | ||
529 | if (upvec_getValue(pv, 0, 1) != 0) { | |
530 | log_err("upvec_getValue should return 0.\n"); | |
531 | } | |
532 | if (upvec_getRow(pv, 0, NULL, NULL) == NULL) { | |
533 | log_err("upvec_getRow should not return NULL.\n"); | |
534 | } | |
535 | if (upvec_getArray(pv, NULL, NULL) != NULL) { | |
536 | log_err("upvec_getArray should return NULL.\n"); | |
537 | } | |
538 | ||
539 | upvec_close(pv); | |
540 | } |