1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * Copyright (c) 1997-2014, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 ********************************************************************
10 * Modification History:
12 * MOHAMED ELDAWY Creation
13 ********************************************************************
16 /* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/
18 #include "ucnvseltst.h"
22 #include "unicode/utypes.h"
23 #include "unicode/ucnvsel.h"
24 #include "unicode/ustring.h"
25 #include "unicode/utf8.h"
30 #define FILENAME_BUFFER 1024
32 #define TDSRCPATH ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING
34 static void TestSelector(void);
35 static void TestUPropsVector(void);
36 void addCnvSelTest(TestNode
** root
); /* Declaration required to suppress compiler warnings. */
38 void addCnvSelTest(TestNode
** root
)
40 addTest(root
, &TestSelector
, "tsconv/ucnvseltst/TestSelector");
41 addTest(root
, &TestUPropsVector
, "tsconv/ucnvseltst/TestUPropsVector");
44 static const char **gAvailableNames
= NULL
;
45 static int32_t gCountAvailable
= 0;
50 if (gAvailableNames
!= NULL
) {
53 gCountAvailable
= ucnv_countAvailable();
54 if (gCountAvailable
== 0) {
55 log_data_err("No converters available.\n");
58 gAvailableNames
= (const char **)uprv_malloc(gCountAvailable
* sizeof(const char *));
59 if (gAvailableNames
== NULL
) {
60 log_err("unable to allocate memory for %ld available converter names\n",
61 (long)gCountAvailable
);
64 for (i
= 0; i
< gCountAvailable
; ++i
) {
65 gAvailableNames
[i
] = ucnv_getAvailableName(i
);
71 releaseAvailableNames() {
72 uprv_free((void *)gAvailableNames
);
73 gAvailableNames
= NULL
;
78 getEncodings(int32_t start
, int32_t step
, int32_t count
, int32_t *pCount
) {
86 names
= (const char **)uprv_malloc(count
* sizeof(char *));
88 log_err("memory allocation error for %ld pointers\n", (long)count
);
91 if (step
== 0 && count
> 0) {
94 for (i
= 0; i
< count
; ++i
) {
95 if (0 <= start
&& start
< gCountAvailable
) {
96 names
[i
] = gAvailableNames
[start
];
106 * ucnvsel_open() does not support "no encodings":
107 * Given 0 encodings it will open a selector for all available ones.
110 getNoEncodings(int32_t *pCount
) {
117 getOneEncoding(int32_t *pCount
) {
118 return getEncodings(1, 0, 1, pCount
);
122 getFirstEvenEncodings(int32_t *pCount
) {
123 return getEncodings(0, 2, 25, pCount
);
127 getMiddleEncodings(int32_t *pCount
) {
128 return getEncodings(gCountAvailable
- 12, 1, 22, pCount
);
132 getLastEncodings(int32_t *pCount
) {
133 return getEncodings(gCountAvailable
- 1, -1, 25, pCount
);
137 getSomeEncodings(int32_t *pCount
) {
138 /* 20 evenly distributed */
139 return getEncodings(5, (gCountAvailable
+ 19)/ 20, 20, pCount
);
143 getEveryThirdEncoding(int32_t *pCount
) {
144 return getEncodings(2, 3, (gCountAvailable
+ 2 )/ 3, pCount
);
148 getAllEncodings(int32_t *pCount
) {
149 return getEncodings(0, 1, gCountAvailable
, pCount
);
152 typedef const char **GetEncodingsFn(int32_t *);
154 static GetEncodingsFn
*const getEncodingsFns
[] = {
156 getFirstEvenEncodings
,
160 getEveryThirdEncoding
,
164 static FILE *fopenOrError(const char *filename
) {
167 char fnbuf
[FILENAME_BUFFER
];
168 const char* directory
= ctest_dataSrcDir();
169 needLen
= (int32_t)(uprv_strlen(directory
) + uprv_strlen(TDSRCPATH
) + uprv_strlen(filename
) + 1);
170 if(needLen
> FILENAME_BUFFER
) {
171 log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d but buffer is %d\n",
172 filename
, needLen
, FILENAME_BUFFER
);
176 strcpy(fnbuf
, directory
);
177 strcat(fnbuf
, TDSRCPATH
);
178 strcat(fnbuf
, filename
);
180 f
= fopen(fnbuf
, "rb");
183 log_data_err("FAIL: Could not load %s [%s]\n", fnbuf
, filename
);
188 typedef struct TestText
{
189 char *text
, *textLimit
;
195 text_reset(TestText
*tt
) {
196 tt
->limit
= tt
->text
;
201 text_nextString(TestText
*tt
, int32_t *pLength
) {
203 if (s
== tt
->textLimit
) {
204 /* we already delivered the last string */
206 } else if (s
== tt
->text
) {
208 if ((tt
->textLimit
- tt
->text
) >= 3 &&
209 s
[0] == (char)0xef && s
[1] == (char)0xbb && s
[2] == (char)0xbf
211 s
+= 3; /* skip the UTF-8 signature byte sequence (U+FEFF) */
214 /* skip the string terminator */
219 /* find the end of this string */
220 tt
->limit
= uprv_strchr(s
, 0);
221 *pLength
= (int32_t)(tt
->limit
- s
);
226 text_open(TestText
*tt
) {
230 uprv_memset(tt
, 0, sizeof(TestText
));
231 f
= fopenOrError("ConverterSelectorTestUTF8.txt");
235 fseek(f
, 0, SEEK_END
);
236 length
= (int32_t)ftell(f
);
237 fseek(f
, 0, SEEK_SET
);
238 tt
->text
= (char *)uprv_malloc(length
+ 1);
239 if (tt
->text
== NULL
) {
243 if (length
!= fread(tt
->text
, 1, length
, f
)) {
244 log_err("error reading %ld bytes from test text file\n", (long)length
);
249 tt
->textLimit
= tt
->text
+ length
;
251 /* replace all Unicode '#' (U+0023) with NUL */
252 for(s
= tt
->text
; (s
= uprv_strchr(s
, 0x23)) != NULL
; *s
++ = 0) {}
258 text_close(TestText
*tt
) {
262 static int32_t findIndex(const char* converterName
) {
264 for (i
= 0 ; i
< gCountAvailable
; i
++) {
265 if(ucnv_compareNames(gAvailableNames
[i
], converterName
) == 0) {
273 getResultsManually(const char** encodings
, int32_t num_encodings
,
274 const char *utf8
, int32_t length
,
275 const USet
* excludedCodePoints
, const UConverterUnicodeSet whichSet
) {
276 UBool
* resultsManually
;
279 resultsManually
= (UBool
*) uprv_malloc(gCountAvailable
);
280 uprv_memset(resultsManually
, 0, gCountAvailable
);
282 for(i
= 0 ; i
< num_encodings
; i
++) {
283 UErrorCode status
= U_ZERO_ERROR
;
284 /* get unicode set for that converter */
286 UConverter
* test_converter
;
288 int32_t encIndex
, offset
;
290 set
= uset_openEmpty();
291 test_converter
= ucnv_open(encodings
[i
], &status
);
292 ucnv_getUnicodeSet(test_converter
, set
,
294 if (excludedCodePoints
!= NULL
) {
295 uset_addAll(set
, excludedCodePoints
);
301 encIndex
= findIndex(encodings
[i
]);
303 * The following is almost, but not entirely, the same as
304 * resultsManually[encIndex] =
305 * (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length);
306 * They might be different if the set contains strings,
307 * or if the utf8 string contains an illegal sequence.
309 * The UConverterSelector does not currently handle strings that can be
310 * converted, and it treats an illegal sequence as convertible
311 * while uset_spanUTF8() treats it like U+FFFD which may not be convertible.
313 resultsManually
[encIndex
] = TRUE
;
314 while(offset
<length
) {
315 U8_NEXT(utf8
, offset
, length
, cp
);
316 if (cp
>= 0 && !uset_contains(set
, cp
)) {
317 resultsManually
[encIndex
] = FALSE
;
322 ucnv_close(test_converter
);
324 return resultsManually
;
327 /* closes res but does not free resultsManually */
328 static void verifyResult(UEnumeration
* res
, const UBool
*resultsManually
) {
329 UBool
* resultsFromSystem
= (UBool
*) uprv_malloc(gCountAvailable
* sizeof(UBool
));
331 UErrorCode status
= U_ZERO_ERROR
;
334 /* fill the bool for the selector results! */
335 uprv_memset(resultsFromSystem
, 0, gCountAvailable
);
336 while ((name
= uenum_next(res
,NULL
, &status
)) != NULL
) {
337 resultsFromSystem
[findIndex(name
)] = TRUE
;
339 for(i
= 0 ; i
< gCountAvailable
; i
++) {
340 if(resultsManually
[i
] != resultsFromSystem
[i
]) {
341 log_err("failure in converter selector\n"
342 "converter %s had conflicting results -- manual: %d, system %d\n",
343 gAvailableNames
[i
], resultsManually
[i
], resultsFromSystem
[i
]);
346 uprv_free(resultsFromSystem
);
350 static UConverterSelector
*
351 serializeAndUnserialize(UConverterSelector
*sel
, char **buffer
, UErrorCode
*status
) {
353 int32_t ser_len
, ser_len2
;
355 ser_len
= ucnvsel_serialize(sel
, NULL
, 0, status
);
356 if (*status
!= U_BUFFER_OVERFLOW_ERROR
) {
357 log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status
));
360 new_buffer
= (char *)uprv_malloc(ser_len
);
361 *status
= U_ZERO_ERROR
;
362 ser_len2
= ucnvsel_serialize(sel
, new_buffer
, ser_len
, status
);
363 if (U_FAILURE(*status
) || ser_len
!= ser_len2
) {
364 log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status
));
365 uprv_free(new_buffer
);
370 *buffer
= new_buffer
;
371 sel
= ucnvsel_openFromSerialized(new_buffer
, ser_len
, status
);
372 if (U_FAILURE(*status
)) {
373 log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status
));
379 static void TestSelector()
382 USet
* excluded_sets
[3] = { NULL
};
383 int32_t i
, testCaseIdx
;
385 if (!getAvailableNames()) {
388 if (!text_open(&text
)) {
389 releaseAvailableNames();;
392 excluded_sets
[0] = uset_openEmpty();
393 for(i
= 1 ; i
< 3 ; i
++) {
394 excluded_sets
[i
] = uset_open(i
*30, i
*30+500);
397 for(testCaseIdx
= 0; testCaseIdx
< UPRV_LENGTHOF(getEncodingsFns
); testCaseIdx
++)
399 int32_t excluded_set_id
;
400 int32_t num_encodings
;
401 const char **encodings
= getEncodingsFns
[testCaseIdx
](&num_encodings
);
402 if (getTestOption(QUICK_OPTION
) && num_encodings
> 25) {
403 uprv_free((void *)encodings
);
408 * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++)
410 * This loop was replaced by the following statement because
411 * the loop made the test run longer without adding to the code coverage.
412 * The handling of the exclusion set is independent of the
413 * set of encodings, so there is no need to test every combination.
415 excluded_set_id
= testCaseIdx
% UPRV_LENGTHOF(excluded_sets
);
417 UConverterSelector
*sel_rt
, *sel_fb
;
418 char *buffer_fb
= NULL
;
419 UErrorCode status
= U_ZERO_ERROR
;
420 sel_rt
= ucnvsel_open(encodings
, num_encodings
,
421 excluded_sets
[excluded_set_id
],
422 UCNV_ROUNDTRIP_SET
, &status
);
423 if (num_encodings
== gCountAvailable
) {
424 /* test the special "all converters" parameter values */
425 sel_fb
= ucnvsel_open(NULL
, 0,
426 excluded_sets
[excluded_set_id
],
427 UCNV_ROUNDTRIP_AND_FALLBACK_SET
, &status
);
428 } else if (uset_isEmpty(excluded_sets
[excluded_set_id
])) {
429 /* test that a NULL set gives the same results as an empty set */
430 sel_fb
= ucnvsel_open(encodings
, num_encodings
,
432 UCNV_ROUNDTRIP_AND_FALLBACK_SET
, &status
);
434 sel_fb
= ucnvsel_open(encodings
, num_encodings
,
435 excluded_sets
[excluded_set_id
],
436 UCNV_ROUNDTRIP_AND_FALLBACK_SET
, &status
);
438 if (U_FAILURE(status
)) {
439 log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx
, u_errorName(status
));
440 ucnvsel_close(sel_rt
);
441 uprv_free((void *)encodings
);
447 UBool
*manual_rt
, *manual_fb
;
448 static UChar utf16
[10000];
450 int32_t length8
, length16
;
452 s
= text_nextString(&text
, &length8
);
453 if (s
== NULL
|| (getTestOption(QUICK_OPTION
) && text
.number
> 3)) {
457 manual_rt
= getResultsManually(encodings
, num_encodings
,
459 excluded_sets
[excluded_set_id
],
461 manual_fb
= getResultsManually(encodings
, num_encodings
,
463 excluded_sets
[excluded_set_id
],
464 UCNV_ROUNDTRIP_AND_FALLBACK_SET
);
465 /* UTF-8 with length */
466 status
= U_ZERO_ERROR
;
467 verifyResult(ucnvsel_selectForUTF8(sel_rt
, s
, length8
, &status
), manual_rt
);
468 verifyResult(ucnvsel_selectForUTF8(sel_fb
, s
, length8
, &status
), manual_fb
);
469 /* UTF-8 NUL-terminated */
470 verifyResult(ucnvsel_selectForUTF8(sel_rt
, s
, -1, &status
), manual_rt
);
471 verifyResult(ucnvsel_selectForUTF8(sel_fb
, s
, -1, &status
), manual_fb
);
473 u_strFromUTF8(utf16
, UPRV_LENGTHOF(utf16
), &length16
, s
, length8
, &status
);
474 if (U_FAILURE(status
)) {
475 log_err("error converting the test text (string %ld) to UTF-16 - %s\n",
476 (long)text
.number
, u_errorName(status
));
478 if (text
.number
== 0) {
479 sel_fb
= serializeAndUnserialize(sel_fb
, &buffer_fb
, &status
);
481 if (U_SUCCESS(status
)) {
482 /* UTF-16 with length */
483 verifyResult(ucnvsel_selectForString(sel_rt
, utf16
, length16
, &status
), manual_rt
);
484 verifyResult(ucnvsel_selectForString(sel_fb
, utf16
, length16
, &status
), manual_fb
);
485 /* UTF-16 NUL-terminated */
486 verifyResult(ucnvsel_selectForString(sel_rt
, utf16
, -1, &status
), manual_rt
);
487 verifyResult(ucnvsel_selectForString(sel_fb
, utf16
, -1, &status
), manual_fb
);
491 uprv_free(manual_rt
);
492 uprv_free(manual_fb
);
494 ucnvsel_close(sel_rt
);
495 ucnvsel_close(sel_fb
);
496 uprv_free(buffer_fb
);
498 uprv_free((void *)encodings
);
501 releaseAvailableNames();
503 for(i
= 0 ; i
< 3 ; i
++) {
504 uset_close(excluded_sets
[i
]);
508 /* Improve code coverage of UPropsVectors */
509 static void TestUPropsVector() {
510 UErrorCode errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
511 UPropsVectors
*pv
= upvec_open(100, &errorCode
);
513 log_err("Should have returned NULL if UErrorCode is an error.");
516 errorCode
= U_ZERO_ERROR
;
517 pv
= upvec_open(-1, &errorCode
);
518 if (pv
!= NULL
|| U_SUCCESS(errorCode
)) {
519 log_err("Should have returned NULL if column is less than 0.\n");
522 errorCode
= U_ZERO_ERROR
;
523 pv
= upvec_open(100, &errorCode
);
524 if (pv
== NULL
|| U_FAILURE(errorCode
)) {
525 log_err("Unable to open UPropsVectors.\n");
529 if (upvec_getValue(pv
, 0, 1) != 0) {
530 log_err("upvec_getValue should return 0.\n");
532 if (upvec_getRow(pv
, 0, NULL
, NULL
) == NULL
) {
533 log_err("upvec_getRow should not return NULL.\n");
535 if (upvec_getArray(pv
, NULL
, NULL
) != NULL
) {
536 log_err("upvec_getArray should return NULL.\n");