]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
729e4ab9 | 3 | /******************************************************************** |
b331163b | 4 | * Copyright (c) 1997-2014, International Business Machines |
729e4ab9 A |
5 | * Corporation and others. All Rights Reserved. |
6 | ******************************************************************** | |
7 | * | |
8 | * File UCNVSELTST.C | |
9 | * | |
10 | * Modification History: | |
11 | * Name Description | |
12 | * MOHAMED ELDAWY Creation | |
13 | ******************************************************************** | |
14 | */ | |
15 | ||
16 | /* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/ | |
17 | ||
18 | #include "ucnvseltst.h" | |
19 | ||
20 | #include <stdio.h> | |
21 | ||
22 | #include "unicode/utypes.h" | |
23 | #include "unicode/ucnvsel.h" | |
24 | #include "unicode/ustring.h" | |
0f5d89e8 | 25 | #include "unicode/utf8.h" |
729e4ab9 A |
26 | #include "cmemory.h" |
27 | #include "cstring.h" | |
28 | #include "propsvec.h" | |
29 | ||
729e4ab9 A |
30 | #define FILENAME_BUFFER 1024 |
31 | ||
32 | #define TDSRCPATH ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING | |
33 | ||
34 | static void TestSelector(void); | |
35 | static void TestUPropsVector(void); | |
36 | void addCnvSelTest(TestNode** root); /* Declaration required to suppress compiler warnings. */ | |
37 | ||
38 | void addCnvSelTest(TestNode** root) | |
39 | { | |
40 | addTest(root, &TestSelector, "tsconv/ucnvseltst/TestSelector"); | |
41 | addTest(root, &TestUPropsVector, "tsconv/ucnvseltst/TestUPropsVector"); | |
42 | } | |
43 | ||
44 | static const char **gAvailableNames = NULL; | |
45 | static int32_t gCountAvailable = 0; | |
46 | ||
47 | static UBool | |
48 | getAvailableNames() { | |
49 | int32_t i; | |
50 | if (gAvailableNames != NULL) { | |
51 | return TRUE; | |
52 | } | |
53 | gCountAvailable = ucnv_countAvailable(); | |
54 | if (gCountAvailable == 0) { | |
55 | log_data_err("No converters available.\n"); | |
56 | return FALSE; | |
57 | } | |
58 | gAvailableNames = (const char **)uprv_malloc(gCountAvailable * sizeof(const char *)); | |
59 | if (gAvailableNames == NULL) { | |
60 | log_err("unable to allocate memory for %ld available converter names\n", | |
61 | (long)gCountAvailable); | |
62 | return FALSE; | |
63 | } | |
64 | for (i = 0; i < gCountAvailable; ++i) { | |
65 | gAvailableNames[i] = ucnv_getAvailableName(i); | |
66 | } | |
67 | return TRUE; | |
68 | } | |
69 | ||
70 | static void | |
71 | releaseAvailableNames() { | |
72 | uprv_free((void *)gAvailableNames); | |
73 | gAvailableNames = NULL; | |
74 | gCountAvailable = 0; | |
75 | } | |
76 | ||
77 | static const char ** | |
78 | getEncodings(int32_t start, int32_t step, int32_t count, int32_t *pCount) { | |
79 | const char **names; | |
80 | int32_t i; | |
81 | ||
82 | *pCount = 0; | |
83 | if (count <= 0) { | |
84 | return NULL; | |
85 | } | |
86 | names = (const char **)uprv_malloc(count * sizeof(char *)); | |
87 | if (names == NULL) { | |
88 | log_err("memory allocation error for %ld pointers\n", (long)count); | |
89 | return NULL; | |
90 | } | |
91 | if (step == 0 && count > 0) { | |
92 | step = 1; | |
93 | } | |
94 | for (i = 0; i < count; ++i) { | |
95 | if (0 <= start && start < gCountAvailable) { | |
96 | names[i] = gAvailableNames[start]; | |
97 | start += step; | |
98 | ++*pCount; | |
99 | } | |
100 | } | |
101 | return names; | |
102 | } | |
103 | ||
104 | #if 0 | |
105 | /* | |
106 | * ucnvsel_open() does not support "no encodings": | |
107 | * Given 0 encodings it will open a selector for all available ones. | |
108 | */ | |
109 | static const char ** | |
110 | getNoEncodings(int32_t *pCount) { | |
111 | *pCount = 0; | |
112 | return NULL; | |
113 | } | |
114 | #endif | |
115 | ||
116 | static const char ** | |
117 | getOneEncoding(int32_t *pCount) { | |
118 | return getEncodings(1, 0, 1, pCount); | |
119 | } | |
120 | ||
121 | static const char ** | |
122 | getFirstEvenEncodings(int32_t *pCount) { | |
123 | return getEncodings(0, 2, 25, pCount); | |
124 | } | |
125 | ||
126 | static const char ** | |
127 | getMiddleEncodings(int32_t *pCount) { | |
128 | return getEncodings(gCountAvailable - 12, 1, 22, pCount); | |
129 | } | |
130 | ||
131 | static const char ** | |
132 | getLastEncodings(int32_t *pCount) { | |
133 | return getEncodings(gCountAvailable - 1, -1, 25, pCount); | |
134 | } | |
135 | ||
136 | static const char ** | |
137 | getSomeEncodings(int32_t *pCount) { | |
138 | /* 20 evenly distributed */ | |
139 | return getEncodings(5, (gCountAvailable + 19)/ 20, 20, pCount); | |
140 | } | |
141 | ||
142 | static const char ** | |
143 | getEveryThirdEncoding(int32_t *pCount) { | |
144 | return getEncodings(2, 3, (gCountAvailable + 2 )/ 3, pCount); | |
145 | } | |
146 | ||
147 | static const char ** | |
148 | getAllEncodings(int32_t *pCount) { | |
149 | return getEncodings(0, 1, gCountAvailable, pCount); | |
150 | } | |
151 | ||
152 | typedef const char **GetEncodingsFn(int32_t *); | |
153 | ||
154 | static GetEncodingsFn *const getEncodingsFns[] = { | |
155 | getOneEncoding, | |
156 | getFirstEvenEncodings, | |
157 | getMiddleEncodings, | |
158 | getLastEncodings, | |
159 | getSomeEncodings, | |
160 | getEveryThirdEncoding, | |
161 | getAllEncodings | |
162 | }; | |
163 | ||
164 | static FILE *fopenOrError(const char *filename) { | |
165 | int32_t needLen; | |
166 | FILE *f; | |
167 | char fnbuf[FILENAME_BUFFER]; | |
3d1f044b A |
168 | const char* directory = ctest_dataSrcDir(); |
169 | needLen = (int32_t)(uprv_strlen(directory) + uprv_strlen(TDSRCPATH) + uprv_strlen(filename) + 1); | |
729e4ab9 A |
170 | if(needLen > FILENAME_BUFFER) { |
171 | log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d but buffer is %d\n", | |
172 | filename, needLen, FILENAME_BUFFER); | |
173 | return NULL; | |
174 | } | |
175 | ||
176 | strcpy(fnbuf, directory); | |
177 | strcat(fnbuf, TDSRCPATH); | |
178 | strcat(fnbuf, filename); | |
179 | ||
180 | f = fopen(fnbuf, "rb"); | |
181 | ||
182 | if(f == NULL) { | |
183 | log_data_err("FAIL: Could not load %s [%s]\n", fnbuf, filename); | |
184 | } | |
185 | return f; | |
186 | } | |
187 | ||
188 | typedef struct TestText { | |
189 | char *text, *textLimit; | |
190 | char *limit; | |
191 | int32_t number; | |
192 | } TestText; | |
193 | ||
194 | static void | |
195 | text_reset(TestText *tt) { | |
196 | tt->limit = tt->text; | |
197 | tt->number = 0; | |
198 | } | |
199 | ||
200 | static char * | |
201 | text_nextString(TestText *tt, int32_t *pLength) { | |
202 | char *s = tt->limit; | |
203 | if (s == tt->textLimit) { | |
204 | /* we already delivered the last string */ | |
205 | return NULL; | |
206 | } else if (s == tt->text) { | |
207 | /* first string */ | |
208 | if ((tt->textLimit - tt->text) >= 3 && | |
209 | s[0] == (char)0xef && s[1] == (char)0xbb && s[2] == (char)0xbf | |
210 | ) { | |
211 | s += 3; /* skip the UTF-8 signature byte sequence (U+FEFF) */ | |
212 | } | |
213 | } else { | |
214 | /* skip the string terminator */ | |
215 | ++s; | |
216 | ++tt->number; | |
217 | } | |
218 | ||
219 | /* find the end of this string */ | |
220 | tt->limit = uprv_strchr(s, 0); | |
221 | *pLength = (int32_t)(tt->limit - s); | |
222 | return s; | |
223 | } | |
224 | ||
225 | static UBool | |
226 | text_open(TestText *tt) { | |
227 | FILE *f; | |
228 | char *s; | |
229 | int32_t length; | |
230 | uprv_memset(tt, 0, sizeof(TestText)); | |
231 | f = fopenOrError("ConverterSelectorTestUTF8.txt"); | |
232 | if(!f) { | |
233 | return FALSE; | |
234 | } | |
235 | fseek(f, 0, SEEK_END); | |
236 | length = (int32_t)ftell(f); | |
237 | fseek(f, 0, SEEK_SET); | |
238 | tt->text = (char *)uprv_malloc(length + 1); | |
239 | if (tt->text == NULL) { | |
240 | fclose(f); | |
241 | return FALSE; | |
242 | } | |
340931cb | 243 | if (length != (int32_t)fread(tt->text, 1, length, f)) { |
729e4ab9 A |
244 | log_err("error reading %ld bytes from test text file\n", (long)length); |
245 | length = 0; | |
246 | uprv_free(tt->text); | |
247 | } | |
248 | fclose(f); | |
249 | tt->textLimit = tt->text + length; | |
250 | *tt->textLimit = 0; | |
251 | /* replace all Unicode '#' (U+0023) with NUL */ | |
252 | for(s = tt->text; (s = uprv_strchr(s, 0x23)) != NULL; *s++ = 0) {} | |
253 | text_reset(tt); | |
254 | return TRUE; | |
255 | } | |
256 | ||
257 | static void | |
258 | text_close(TestText *tt) { | |
259 | uprv_free(tt->text); | |
260 | } | |
261 | ||
262 | static int32_t findIndex(const char* converterName) { | |
263 | int32_t i; | |
264 | for (i = 0 ; i < gCountAvailable; i++) { | |
265 | if(ucnv_compareNames(gAvailableNames[i], converterName) == 0) { | |
266 | return i; | |
267 | } | |
268 | } | |
269 | return -1; | |
270 | } | |
271 | ||
272 | static UBool * | |
273 | getResultsManually(const char** encodings, int32_t num_encodings, | |
274 | const char *utf8, int32_t length, | |
275 | const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) { | |
276 | UBool* resultsManually; | |
277 | int32_t i; | |
278 | ||
279 | resultsManually = (UBool*) uprv_malloc(gCountAvailable); | |
280 | uprv_memset(resultsManually, 0, gCountAvailable); | |
281 | ||
282 | for(i = 0 ; i < num_encodings ; i++) { | |
283 | UErrorCode status = U_ZERO_ERROR; | |
284 | /* get unicode set for that converter */ | |
285 | USet* set; | |
286 | UConverter* test_converter; | |
287 | UChar32 cp; | |
288 | int32_t encIndex, offset; | |
289 | ||
290 | set = uset_openEmpty(); | |
291 | test_converter = ucnv_open(encodings[i], &status); | |
292 | ucnv_getUnicodeSet(test_converter, set, | |
293 | whichSet, &status); | |
294 | if (excludedCodePoints != NULL) { | |
295 | uset_addAll(set, excludedCodePoints); | |
296 | } | |
297 | uset_freeze(set); | |
298 | offset = 0; | |
299 | cp = 0; | |
300 | ||
301 | encIndex = findIndex(encodings[i]); | |
302 | /* | |
303 | * The following is almost, but not entirely, the same as | |
304 | * resultsManually[encIndex] = | |
305 | * (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length); | |
306 | * They might be different if the set contains strings, | |
307 | * or if the utf8 string contains an illegal sequence. | |
308 | * | |
309 | * The UConverterSelector does not currently handle strings that can be | |
310 | * converted, and it treats an illegal sequence as convertible | |
311 | * while uset_spanUTF8() treats it like U+FFFD which may not be convertible. | |
312 | */ | |
313 | resultsManually[encIndex] = TRUE; | |
314 | while(offset<length) { | |
315 | U8_NEXT(utf8, offset, length, cp); | |
316 | if (cp >= 0 && !uset_contains(set, cp)) { | |
317 | resultsManually[encIndex] = FALSE; | |
318 | break; | |
319 | } | |
320 | } | |
321 | uset_close(set); | |
322 | ucnv_close(test_converter); | |
323 | } | |
324 | return resultsManually; | |
325 | } | |
326 | ||
327 | /* closes res but does not free resultsManually */ | |
328 | static void verifyResult(UEnumeration* res, const UBool *resultsManually) { | |
329 | UBool* resultsFromSystem = (UBool*) uprv_malloc(gCountAvailable * sizeof(UBool)); | |
330 | const char* name; | |
331 | UErrorCode status = U_ZERO_ERROR; | |
332 | int32_t i; | |
333 | ||
334 | /* fill the bool for the selector results! */ | |
335 | uprv_memset(resultsFromSystem, 0, gCountAvailable); | |
336 | while ((name = uenum_next(res,NULL, &status)) != NULL) { | |
337 | resultsFromSystem[findIndex(name)] = TRUE; | |
338 | } | |
339 | for(i = 0 ; i < gCountAvailable; i++) { | |
340 | if(resultsManually[i] != resultsFromSystem[i]) { | |
341 | log_err("failure in converter selector\n" | |
342 | "converter %s had conflicting results -- manual: %d, system %d\n", | |
343 | gAvailableNames[i], resultsManually[i], resultsFromSystem[i]); | |
344 | } | |
345 | } | |
346 | uprv_free(resultsFromSystem); | |
347 | uenum_close(res); | |
348 | } | |
349 | ||
350 | static UConverterSelector * | |
351 | serializeAndUnserialize(UConverterSelector *sel, char **buffer, UErrorCode *status) { | |
352 | char *new_buffer; | |
353 | int32_t ser_len, ser_len2; | |
354 | /* preflight */ | |
355 | ser_len = ucnvsel_serialize(sel, NULL, 0, status); | |
356 | if (*status != U_BUFFER_OVERFLOW_ERROR) { | |
357 | log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status)); | |
358 | return sel; | |
359 | } | |
360 | new_buffer = (char *)uprv_malloc(ser_len); | |
361 | *status = U_ZERO_ERROR; | |
362 | ser_len2 = ucnvsel_serialize(sel, new_buffer, ser_len, status); | |
363 | if (U_FAILURE(*status) || ser_len != ser_len2) { | |
364 | log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status)); | |
365 | uprv_free(new_buffer); | |
366 | return sel; | |
367 | } | |
368 | ucnvsel_close(sel); | |
369 | uprv_free(*buffer); | |
370 | *buffer = new_buffer; | |
371 | sel = ucnvsel_openFromSerialized(new_buffer, ser_len, status); | |
372 | if (U_FAILURE(*status)) { | |
373 | log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status)); | |
374 | return NULL; | |
375 | } | |
376 | return sel; | |
377 | } | |
378 | ||
379 | static void TestSelector() | |
380 | { | |
381 | TestText text; | |
382 | USet* excluded_sets[3] = { NULL }; | |
383 | int32_t i, testCaseIdx; | |
384 | ||
385 | if (!getAvailableNames()) { | |
386 | return; | |
387 | } | |
388 | if (!text_open(&text)) { | |
340931cb | 389 | releaseAvailableNames(); |
729e4ab9 A |
390 | } |
391 | ||
392 | excluded_sets[0] = uset_openEmpty(); | |
393 | for(i = 1 ; i < 3 ; i++) { | |
394 | excluded_sets[i] = uset_open(i*30, i*30+500); | |
395 | } | |
396 | ||
b331163b | 397 | for(testCaseIdx = 0; testCaseIdx < UPRV_LENGTHOF(getEncodingsFns); testCaseIdx++) |
729e4ab9 A |
398 | { |
399 | int32_t excluded_set_id; | |
400 | int32_t num_encodings; | |
401 | const char **encodings = getEncodingsFns[testCaseIdx](&num_encodings); | |
402 | if (getTestOption(QUICK_OPTION) && num_encodings > 25) { | |
403 | uprv_free((void *)encodings); | |
404 | continue; | |
405 | } | |
406 | ||
407 | /* | |
408 | * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++) | |
409 | * | |
410 | * This loop was replaced by the following statement because | |
411 | * the loop made the test run longer without adding to the code coverage. | |
412 | * The handling of the exclusion set is independent of the | |
413 | * set of encodings, so there is no need to test every combination. | |
414 | */ | |
b331163b | 415 | excluded_set_id = testCaseIdx % UPRV_LENGTHOF(excluded_sets); |
729e4ab9 A |
416 | { |
417 | UConverterSelector *sel_rt, *sel_fb; | |
418 | char *buffer_fb = NULL; | |
419 | UErrorCode status = U_ZERO_ERROR; | |
420 | sel_rt = ucnvsel_open(encodings, num_encodings, | |
421 | excluded_sets[excluded_set_id], | |
422 | UCNV_ROUNDTRIP_SET, &status); | |
423 | if (num_encodings == gCountAvailable) { | |
424 | /* test the special "all converters" parameter values */ | |
425 | sel_fb = ucnvsel_open(NULL, 0, | |
426 | excluded_sets[excluded_set_id], | |
427 | UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); | |
428 | } else if (uset_isEmpty(excluded_sets[excluded_set_id])) { | |
429 | /* test that a NULL set gives the same results as an empty set */ | |
430 | sel_fb = ucnvsel_open(encodings, num_encodings, | |
431 | NULL, | |
432 | UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); | |
433 | } else { | |
434 | sel_fb = ucnvsel_open(encodings, num_encodings, | |
435 | excluded_sets[excluded_set_id], | |
436 | UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); | |
437 | } | |
438 | if (U_FAILURE(status)) { | |
439 | log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx, u_errorName(status)); | |
440 | ucnvsel_close(sel_rt); | |
441 | uprv_free((void *)encodings); | |
442 | continue; | |
443 | } | |
444 | ||
445 | text_reset(&text); | |
446 | for (;;) { | |
447 | UBool *manual_rt, *manual_fb; | |
448 | static UChar utf16[10000]; | |
449 | char *s; | |
450 | int32_t length8, length16; | |
451 | ||
452 | s = text_nextString(&text, &length8); | |
453 | if (s == NULL || (getTestOption(QUICK_OPTION) && text.number > 3)) { | |
454 | break; | |
455 | } | |
456 | ||
457 | manual_rt = getResultsManually(encodings, num_encodings, | |
458 | s, length8, | |
459 | excluded_sets[excluded_set_id], | |
460 | UCNV_ROUNDTRIP_SET); | |
461 | manual_fb = getResultsManually(encodings, num_encodings, | |
462 | s, length8, | |
463 | excluded_sets[excluded_set_id], | |
464 | UCNV_ROUNDTRIP_AND_FALLBACK_SET); | |
465 | /* UTF-8 with length */ | |
466 | status = U_ZERO_ERROR; | |
467 | verifyResult(ucnvsel_selectForUTF8(sel_rt, s, length8, &status), manual_rt); | |
468 | verifyResult(ucnvsel_selectForUTF8(sel_fb, s, length8, &status), manual_fb); | |
469 | /* UTF-8 NUL-terminated */ | |
470 | verifyResult(ucnvsel_selectForUTF8(sel_rt, s, -1, &status), manual_rt); | |
471 | verifyResult(ucnvsel_selectForUTF8(sel_fb, s, -1, &status), manual_fb); | |
472 | ||
b331163b | 473 | u_strFromUTF8(utf16, UPRV_LENGTHOF(utf16), &length16, s, length8, &status); |
729e4ab9 A |
474 | if (U_FAILURE(status)) { |
475 | log_err("error converting the test text (string %ld) to UTF-16 - %s\n", | |
476 | (long)text.number, u_errorName(status)); | |
477 | } else { | |
478 | if (text.number == 0) { | |
479 | sel_fb = serializeAndUnserialize(sel_fb, &buffer_fb, &status); | |
480 | } | |
481 | if (U_SUCCESS(status)) { | |
482 | /* UTF-16 with length */ | |
483 | verifyResult(ucnvsel_selectForString(sel_rt, utf16, length16, &status), manual_rt); | |
484 | verifyResult(ucnvsel_selectForString(sel_fb, utf16, length16, &status), manual_fb); | |
485 | /* UTF-16 NUL-terminated */ | |
486 | verifyResult(ucnvsel_selectForString(sel_rt, utf16, -1, &status), manual_rt); | |
487 | verifyResult(ucnvsel_selectForString(sel_fb, utf16, -1, &status), manual_fb); | |
488 | } | |
489 | } | |
490 | ||
491 | uprv_free(manual_rt); | |
492 | uprv_free(manual_fb); | |
493 | } | |
494 | ucnvsel_close(sel_rt); | |
495 | ucnvsel_close(sel_fb); | |
496 | uprv_free(buffer_fb); | |
497 | } | |
498 | uprv_free((void *)encodings); | |
499 | } | |
500 | ||
501 | releaseAvailableNames(); | |
502 | text_close(&text); | |
503 | for(i = 0 ; i < 3 ; i++) { | |
504 | uset_close(excluded_sets[i]); | |
505 | } | |
506 | } | |
507 | ||
508 | /* Improve code coverage of UPropsVectors */ | |
509 | static void TestUPropsVector() { | |
729e4ab9 A |
510 | UErrorCode errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
511 | UPropsVectors *pv = upvec_open(100, &errorCode); | |
512 | if (pv != NULL) { | |
513 | log_err("Should have returned NULL if UErrorCode is an error."); | |
514 | return; | |
515 | } | |
516 | errorCode = U_ZERO_ERROR; | |
517 | pv = upvec_open(-1, &errorCode); | |
518 | if (pv != NULL || U_SUCCESS(errorCode)) { | |
519 | log_err("Should have returned NULL if column is less than 0.\n"); | |
520 | return; | |
521 | } | |
522 | errorCode = U_ZERO_ERROR; | |
523 | pv = upvec_open(100, &errorCode); | |
524 | if (pv == NULL || U_FAILURE(errorCode)) { | |
525 | log_err("Unable to open UPropsVectors.\n"); | |
526 | return; | |
527 | } | |
528 | ||
529 | if (upvec_getValue(pv, 0, 1) != 0) { | |
530 | log_err("upvec_getValue should return 0.\n"); | |
531 | } | |
532 | if (upvec_getRow(pv, 0, NULL, NULL) == NULL) { | |
533 | log_err("upvec_getRow should not return NULL.\n"); | |
534 | } | |
535 | if (upvec_getArray(pv, NULL, NULL) != NULL) { | |
536 | log_err("upvec_getArray should return NULL.\n"); | |
537 | } | |
538 | ||
539 | upvec_close(pv); | |
540 | } |