1 /********************************************************************
2 * Copyright (c) 1997-2014, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************
8 * Modification History:
10 * MOHAMED ELDAWY Creation
11 ********************************************************************
14 /* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/
16 #include "ucnvseltst.h"
20 #include "unicode/utypes.h"
21 #include "unicode/ucnvsel.h"
22 #include "unicode/ustring.h"
27 #define FILENAME_BUFFER 1024
29 #define TDSRCPATH ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING
31 static void TestSelector(void);
32 static void TestUPropsVector(void);
33 void addCnvSelTest(TestNode
** root
); /* Declaration required to suppress compiler warnings. */
35 void addCnvSelTest(TestNode
** root
)
37 addTest(root
, &TestSelector
, "tsconv/ucnvseltst/TestSelector");
38 addTest(root
, &TestUPropsVector
, "tsconv/ucnvseltst/TestUPropsVector");
41 static const char **gAvailableNames
= NULL
;
42 static int32_t gCountAvailable
= 0;
47 if (gAvailableNames
!= NULL
) {
50 gCountAvailable
= ucnv_countAvailable();
51 if (gCountAvailable
== 0) {
52 log_data_err("No converters available.\n");
55 gAvailableNames
= (const char **)uprv_malloc(gCountAvailable
* sizeof(const char *));
56 if (gAvailableNames
== NULL
) {
57 log_err("unable to allocate memory for %ld available converter names\n",
58 (long)gCountAvailable
);
61 for (i
= 0; i
< gCountAvailable
; ++i
) {
62 gAvailableNames
[i
] = ucnv_getAvailableName(i
);
68 releaseAvailableNames() {
69 uprv_free((void *)gAvailableNames
);
70 gAvailableNames
= NULL
;
75 getEncodings(int32_t start
, int32_t step
, int32_t count
, int32_t *pCount
) {
83 names
= (const char **)uprv_malloc(count
* sizeof(char *));
85 log_err("memory allocation error for %ld pointers\n", (long)count
);
88 if (step
== 0 && count
> 0) {
91 for (i
= 0; i
< count
; ++i
) {
92 if (0 <= start
&& start
< gCountAvailable
) {
93 names
[i
] = gAvailableNames
[start
];
103 * ucnvsel_open() does not support "no encodings":
104 * Given 0 encodings it will open a selector for all available ones.
107 getNoEncodings(int32_t *pCount
) {
114 getOneEncoding(int32_t *pCount
) {
115 return getEncodings(1, 0, 1, pCount
);
119 getFirstEvenEncodings(int32_t *pCount
) {
120 return getEncodings(0, 2, 25, pCount
);
124 getMiddleEncodings(int32_t *pCount
) {
125 return getEncodings(gCountAvailable
- 12, 1, 22, pCount
);
129 getLastEncodings(int32_t *pCount
) {
130 return getEncodings(gCountAvailable
- 1, -1, 25, pCount
);
134 getSomeEncodings(int32_t *pCount
) {
135 /* 20 evenly distributed */
136 return getEncodings(5, (gCountAvailable
+ 19)/ 20, 20, pCount
);
140 getEveryThirdEncoding(int32_t *pCount
) {
141 return getEncodings(2, 3, (gCountAvailable
+ 2 )/ 3, pCount
);
145 getAllEncodings(int32_t *pCount
) {
146 return getEncodings(0, 1, gCountAvailable
, pCount
);
149 typedef const char **GetEncodingsFn(int32_t *);
151 static GetEncodingsFn
*const getEncodingsFns
[] = {
153 getFirstEvenEncodings
,
157 getEveryThirdEncoding
,
161 static FILE *fopenOrError(const char *filename
) {
164 char fnbuf
[FILENAME_BUFFER
];
165 const char* directory
= ctest_dataSrcDir();
166 needLen
= uprv_strlen(directory
)+uprv_strlen(TDSRCPATH
)+uprv_strlen(filename
)+1;
167 if(needLen
> FILENAME_BUFFER
) {
168 log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d but buffer is %d\n",
169 filename
, needLen
, FILENAME_BUFFER
);
173 strcpy(fnbuf
, directory
);
174 strcat(fnbuf
, TDSRCPATH
);
175 strcat(fnbuf
, filename
);
177 f
= fopen(fnbuf
, "rb");
180 log_data_err("FAIL: Could not load %s [%s]\n", fnbuf
, filename
);
185 typedef struct TestText
{
186 char *text
, *textLimit
;
192 text_reset(TestText
*tt
) {
193 tt
->limit
= tt
->text
;
198 text_nextString(TestText
*tt
, int32_t *pLength
) {
200 if (s
== tt
->textLimit
) {
201 /* we already delivered the last string */
203 } else if (s
== tt
->text
) {
205 if ((tt
->textLimit
- tt
->text
) >= 3 &&
206 s
[0] == (char)0xef && s
[1] == (char)0xbb && s
[2] == (char)0xbf
208 s
+= 3; /* skip the UTF-8 signature byte sequence (U+FEFF) */
211 /* skip the string terminator */
216 /* find the end of this string */
217 tt
->limit
= uprv_strchr(s
, 0);
218 *pLength
= (int32_t)(tt
->limit
- s
);
223 text_open(TestText
*tt
) {
227 uprv_memset(tt
, 0, sizeof(TestText
));
228 f
= fopenOrError("ConverterSelectorTestUTF8.txt");
232 fseek(f
, 0, SEEK_END
);
233 length
= (int32_t)ftell(f
);
234 fseek(f
, 0, SEEK_SET
);
235 tt
->text
= (char *)uprv_malloc(length
+ 1);
236 if (tt
->text
== NULL
) {
240 if (length
!= fread(tt
->text
, 1, length
, f
)) {
241 log_err("error reading %ld bytes from test text file\n", (long)length
);
246 tt
->textLimit
= tt
->text
+ length
;
248 /* replace all Unicode '#' (U+0023) with NUL */
249 for(s
= tt
->text
; (s
= uprv_strchr(s
, 0x23)) != NULL
; *s
++ = 0) {}
255 text_close(TestText
*tt
) {
259 static int32_t findIndex(const char* converterName
) {
261 for (i
= 0 ; i
< gCountAvailable
; i
++) {
262 if(ucnv_compareNames(gAvailableNames
[i
], converterName
) == 0) {
270 getResultsManually(const char** encodings
, int32_t num_encodings
,
271 const char *utf8
, int32_t length
,
272 const USet
* excludedCodePoints
, const UConverterUnicodeSet whichSet
) {
273 UBool
* resultsManually
;
276 resultsManually
= (UBool
*) uprv_malloc(gCountAvailable
);
277 uprv_memset(resultsManually
, 0, gCountAvailable
);
279 for(i
= 0 ; i
< num_encodings
; i
++) {
280 UErrorCode status
= U_ZERO_ERROR
;
281 /* get unicode set for that converter */
283 UConverter
* test_converter
;
285 int32_t encIndex
, offset
;
287 set
= uset_openEmpty();
288 test_converter
= ucnv_open(encodings
[i
], &status
);
289 ucnv_getUnicodeSet(test_converter
, set
,
291 if (excludedCodePoints
!= NULL
) {
292 uset_addAll(set
, excludedCodePoints
);
298 encIndex
= findIndex(encodings
[i
]);
300 * The following is almost, but not entirely, the same as
301 * resultsManually[encIndex] =
302 * (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length);
303 * They might be different if the set contains strings,
304 * or if the utf8 string contains an illegal sequence.
306 * The UConverterSelector does not currently handle strings that can be
307 * converted, and it treats an illegal sequence as convertible
308 * while uset_spanUTF8() treats it like U+FFFD which may not be convertible.
310 resultsManually
[encIndex
] = TRUE
;
311 while(offset
<length
) {
312 U8_NEXT(utf8
, offset
, length
, cp
);
313 if (cp
>= 0 && !uset_contains(set
, cp
)) {
314 resultsManually
[encIndex
] = FALSE
;
319 ucnv_close(test_converter
);
321 return resultsManually
;
324 /* closes res but does not free resultsManually */
325 static void verifyResult(UEnumeration
* res
, const UBool
*resultsManually
) {
326 UBool
* resultsFromSystem
= (UBool
*) uprv_malloc(gCountAvailable
* sizeof(UBool
));
328 UErrorCode status
= U_ZERO_ERROR
;
331 /* fill the bool for the selector results! */
332 uprv_memset(resultsFromSystem
, 0, gCountAvailable
);
333 while ((name
= uenum_next(res
,NULL
, &status
)) != NULL
) {
334 resultsFromSystem
[findIndex(name
)] = TRUE
;
336 for(i
= 0 ; i
< gCountAvailable
; i
++) {
337 if(resultsManually
[i
] != resultsFromSystem
[i
]) {
338 log_err("failure in converter selector\n"
339 "converter %s had conflicting results -- manual: %d, system %d\n",
340 gAvailableNames
[i
], resultsManually
[i
], resultsFromSystem
[i
]);
343 uprv_free(resultsFromSystem
);
347 static UConverterSelector
*
348 serializeAndUnserialize(UConverterSelector
*sel
, char **buffer
, UErrorCode
*status
) {
350 int32_t ser_len
, ser_len2
;
352 ser_len
= ucnvsel_serialize(sel
, NULL
, 0, status
);
353 if (*status
!= U_BUFFER_OVERFLOW_ERROR
) {
354 log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status
));
357 new_buffer
= (char *)uprv_malloc(ser_len
);
358 *status
= U_ZERO_ERROR
;
359 ser_len2
= ucnvsel_serialize(sel
, new_buffer
, ser_len
, status
);
360 if (U_FAILURE(*status
) || ser_len
!= ser_len2
) {
361 log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status
));
362 uprv_free(new_buffer
);
367 *buffer
= new_buffer
;
368 sel
= ucnvsel_openFromSerialized(new_buffer
, ser_len
, status
);
369 if (U_FAILURE(*status
)) {
370 log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status
));
376 static void TestSelector()
379 USet
* excluded_sets
[3] = { NULL
};
380 int32_t i
, testCaseIdx
;
382 if (!getAvailableNames()) {
385 if (!text_open(&text
)) {
386 releaseAvailableNames();;
389 excluded_sets
[0] = uset_openEmpty();
390 for(i
= 1 ; i
< 3 ; i
++) {
391 excluded_sets
[i
] = uset_open(i
*30, i
*30+500);
394 for(testCaseIdx
= 0; testCaseIdx
< UPRV_LENGTHOF(getEncodingsFns
); testCaseIdx
++)
396 int32_t excluded_set_id
;
397 int32_t num_encodings
;
398 const char **encodings
= getEncodingsFns
[testCaseIdx
](&num_encodings
);
399 if (getTestOption(QUICK_OPTION
) && num_encodings
> 25) {
400 uprv_free((void *)encodings
);
405 * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++)
407 * This loop was replaced by the following statement because
408 * the loop made the test run longer without adding to the code coverage.
409 * The handling of the exclusion set is independent of the
410 * set of encodings, so there is no need to test every combination.
412 excluded_set_id
= testCaseIdx
% UPRV_LENGTHOF(excluded_sets
);
414 UConverterSelector
*sel_rt
, *sel_fb
;
415 char *buffer_fb
= NULL
;
416 UErrorCode status
= U_ZERO_ERROR
;
417 sel_rt
= ucnvsel_open(encodings
, num_encodings
,
418 excluded_sets
[excluded_set_id
],
419 UCNV_ROUNDTRIP_SET
, &status
);
420 if (num_encodings
== gCountAvailable
) {
421 /* test the special "all converters" parameter values */
422 sel_fb
= ucnvsel_open(NULL
, 0,
423 excluded_sets
[excluded_set_id
],
424 UCNV_ROUNDTRIP_AND_FALLBACK_SET
, &status
);
425 } else if (uset_isEmpty(excluded_sets
[excluded_set_id
])) {
426 /* test that a NULL set gives the same results as an empty set */
427 sel_fb
= ucnvsel_open(encodings
, num_encodings
,
429 UCNV_ROUNDTRIP_AND_FALLBACK_SET
, &status
);
431 sel_fb
= ucnvsel_open(encodings
, num_encodings
,
432 excluded_sets
[excluded_set_id
],
433 UCNV_ROUNDTRIP_AND_FALLBACK_SET
, &status
);
435 if (U_FAILURE(status
)) {
436 log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx
, u_errorName(status
));
437 ucnvsel_close(sel_rt
);
438 uprv_free((void *)encodings
);
444 UBool
*manual_rt
, *manual_fb
;
445 static UChar utf16
[10000];
447 int32_t length8
, length16
;
449 s
= text_nextString(&text
, &length8
);
450 if (s
== NULL
|| (getTestOption(QUICK_OPTION
) && text
.number
> 3)) {
454 manual_rt
= getResultsManually(encodings
, num_encodings
,
456 excluded_sets
[excluded_set_id
],
458 manual_fb
= getResultsManually(encodings
, num_encodings
,
460 excluded_sets
[excluded_set_id
],
461 UCNV_ROUNDTRIP_AND_FALLBACK_SET
);
462 /* UTF-8 with length */
463 status
= U_ZERO_ERROR
;
464 verifyResult(ucnvsel_selectForUTF8(sel_rt
, s
, length8
, &status
), manual_rt
);
465 verifyResult(ucnvsel_selectForUTF8(sel_fb
, s
, length8
, &status
), manual_fb
);
466 /* UTF-8 NUL-terminated */
467 verifyResult(ucnvsel_selectForUTF8(sel_rt
, s
, -1, &status
), manual_rt
);
468 verifyResult(ucnvsel_selectForUTF8(sel_fb
, s
, -1, &status
), manual_fb
);
470 u_strFromUTF8(utf16
, UPRV_LENGTHOF(utf16
), &length16
, s
, length8
, &status
);
471 if (U_FAILURE(status
)) {
472 log_err("error converting the test text (string %ld) to UTF-16 - %s\n",
473 (long)text
.number
, u_errorName(status
));
475 if (text
.number
== 0) {
476 sel_fb
= serializeAndUnserialize(sel_fb
, &buffer_fb
, &status
);
478 if (U_SUCCESS(status
)) {
479 /* UTF-16 with length */
480 verifyResult(ucnvsel_selectForString(sel_rt
, utf16
, length16
, &status
), manual_rt
);
481 verifyResult(ucnvsel_selectForString(sel_fb
, utf16
, length16
, &status
), manual_fb
);
482 /* UTF-16 NUL-terminated */
483 verifyResult(ucnvsel_selectForString(sel_rt
, utf16
, -1, &status
), manual_rt
);
484 verifyResult(ucnvsel_selectForString(sel_fb
, utf16
, -1, &status
), manual_fb
);
488 uprv_free(manual_rt
);
489 uprv_free(manual_fb
);
491 ucnvsel_close(sel_rt
);
492 ucnvsel_close(sel_fb
);
493 uprv_free(buffer_fb
);
495 uprv_free((void *)encodings
);
498 releaseAvailableNames();
500 for(i
= 0 ; i
< 3 ; i
++) {
501 uset_close(excluded_sets
[i
]);
505 /* Improve code coverage of UPropsVectors */
506 static void TestUPropsVector() {
507 UErrorCode errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
508 UPropsVectors
*pv
= upvec_open(100, &errorCode
);
510 log_err("Should have returned NULL if UErrorCode is an error.");
513 errorCode
= U_ZERO_ERROR
;
514 pv
= upvec_open(-1, &errorCode
);
515 if (pv
!= NULL
|| U_SUCCESS(errorCode
)) {
516 log_err("Should have returned NULL if column is less than 0.\n");
519 errorCode
= U_ZERO_ERROR
;
520 pv
= upvec_open(100, &errorCode
);
521 if (pv
== NULL
|| U_FAILURE(errorCode
)) {
522 log_err("Unable to open UPropsVectors.\n");
526 if (upvec_getValue(pv
, 0, 1) != 0) {
527 log_err("upvec_getValue should return 0.\n");
529 if (upvec_getRow(pv
, 0, NULL
, NULL
) == NULL
) {
530 log_err("upvec_getRow should not return NULL.\n");
532 if (upvec_getArray(pv
, NULL
, NULL
) != NULL
) {
533 log_err("upvec_getArray should return NULL.\n");