1 /********************************************************************
2 * Copyright (c) 1997-2011, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************
8 * Modification History:
10 * MOHAMED ELDAWY Creation
11 ********************************************************************
14 /* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/
16 #include "ucnvseltst.h"
20 #include "unicode/utypes.h"
21 #include "unicode/ucnvsel.h"
22 #include "unicode/ustring.h"
27 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
29 #define FILENAME_BUFFER 1024
31 #define TDSRCPATH ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING
33 static void TestSelector(void);
34 static void TestUPropsVector(void);
35 void addCnvSelTest(TestNode
** root
); /* Declaration required to suppress compiler warnings. */
37 void addCnvSelTest(TestNode
** root
)
39 addTest(root
, &TestSelector
, "tsconv/ucnvseltst/TestSelector");
40 addTest(root
, &TestUPropsVector
, "tsconv/ucnvseltst/TestUPropsVector");
43 static const char **gAvailableNames
= NULL
;
44 static int32_t gCountAvailable
= 0;
49 if (gAvailableNames
!= NULL
) {
52 gCountAvailable
= ucnv_countAvailable();
53 if (gCountAvailable
== 0) {
54 log_data_err("No converters available.\n");
57 gAvailableNames
= (const char **)uprv_malloc(gCountAvailable
* sizeof(const char *));
58 if (gAvailableNames
== NULL
) {
59 log_err("unable to allocate memory for %ld available converter names\n",
60 (long)gCountAvailable
);
63 for (i
= 0; i
< gCountAvailable
; ++i
) {
64 gAvailableNames
[i
] = ucnv_getAvailableName(i
);
70 releaseAvailableNames() {
71 uprv_free((void *)gAvailableNames
);
72 gAvailableNames
= NULL
;
77 getEncodings(int32_t start
, int32_t step
, int32_t count
, int32_t *pCount
) {
85 names
= (const char **)uprv_malloc(count
* sizeof(char *));
87 log_err("memory allocation error for %ld pointers\n", (long)count
);
90 if (step
== 0 && count
> 0) {
93 for (i
= 0; i
< count
; ++i
) {
94 if (0 <= start
&& start
< gCountAvailable
) {
95 names
[i
] = gAvailableNames
[start
];
105 * ucnvsel_open() does not support "no encodings":
106 * Given 0 encodings it will open a selector for all available ones.
109 getNoEncodings(int32_t *pCount
) {
116 getOneEncoding(int32_t *pCount
) {
117 return getEncodings(1, 0, 1, pCount
);
121 getFirstEvenEncodings(int32_t *pCount
) {
122 return getEncodings(0, 2, 25, pCount
);
126 getMiddleEncodings(int32_t *pCount
) {
127 return getEncodings(gCountAvailable
- 12, 1, 22, pCount
);
131 getLastEncodings(int32_t *pCount
) {
132 return getEncodings(gCountAvailable
- 1, -1, 25, pCount
);
136 getSomeEncodings(int32_t *pCount
) {
137 /* 20 evenly distributed */
138 return getEncodings(5, (gCountAvailable
+ 19)/ 20, 20, pCount
);
142 getEveryThirdEncoding(int32_t *pCount
) {
143 return getEncodings(2, 3, (gCountAvailable
+ 2 )/ 3, pCount
);
147 getAllEncodings(int32_t *pCount
) {
148 return getEncodings(0, 1, gCountAvailable
, pCount
);
151 typedef const char **GetEncodingsFn(int32_t *);
153 static GetEncodingsFn
*const getEncodingsFns
[] = {
155 getFirstEvenEncodings
,
159 getEveryThirdEncoding
,
163 static FILE *fopenOrError(const char *filename
) {
166 char fnbuf
[FILENAME_BUFFER
];
167 const char* directory
= ctest_dataSrcDir();
168 needLen
= uprv_strlen(directory
)+uprv_strlen(TDSRCPATH
)+uprv_strlen(filename
)+1;
169 if(needLen
> FILENAME_BUFFER
) {
170 log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d but buffer is %d\n",
171 filename
, needLen
, FILENAME_BUFFER
);
175 strcpy(fnbuf
, directory
);
176 strcat(fnbuf
, TDSRCPATH
);
177 strcat(fnbuf
, filename
);
179 f
= fopen(fnbuf
, "rb");
182 log_data_err("FAIL: Could not load %s [%s]\n", fnbuf
, filename
);
187 typedef struct TestText
{
188 char *text
, *textLimit
;
194 text_reset(TestText
*tt
) {
195 tt
->limit
= tt
->text
;
200 text_nextString(TestText
*tt
, int32_t *pLength
) {
202 if (s
== tt
->textLimit
) {
203 /* we already delivered the last string */
205 } else if (s
== tt
->text
) {
207 if ((tt
->textLimit
- tt
->text
) >= 3 &&
208 s
[0] == (char)0xef && s
[1] == (char)0xbb && s
[2] == (char)0xbf
210 s
+= 3; /* skip the UTF-8 signature byte sequence (U+FEFF) */
213 /* skip the string terminator */
218 /* find the end of this string */
219 tt
->limit
= uprv_strchr(s
, 0);
220 *pLength
= (int32_t)(tt
->limit
- s
);
225 text_open(TestText
*tt
) {
229 uprv_memset(tt
, 0, sizeof(TestText
));
230 f
= fopenOrError("ConverterSelectorTestUTF8.txt");
234 fseek(f
, 0, SEEK_END
);
235 length
= (int32_t)ftell(f
);
236 fseek(f
, 0, SEEK_SET
);
237 tt
->text
= (char *)uprv_malloc(length
+ 1);
238 if (tt
->text
== NULL
) {
242 if (length
!= fread(tt
->text
, 1, length
, f
)) {
243 log_err("error reading %ld bytes from test text file\n", (long)length
);
248 tt
->textLimit
= tt
->text
+ length
;
250 /* replace all Unicode '#' (U+0023) with NUL */
251 for(s
= tt
->text
; (s
= uprv_strchr(s
, 0x23)) != NULL
; *s
++ = 0) {}
257 text_close(TestText
*tt
) {
261 static int32_t findIndex(const char* converterName
) {
263 for (i
= 0 ; i
< gCountAvailable
; i
++) {
264 if(ucnv_compareNames(gAvailableNames
[i
], converterName
) == 0) {
272 getResultsManually(const char** encodings
, int32_t num_encodings
,
273 const char *utf8
, int32_t length
,
274 const USet
* excludedCodePoints
, const UConverterUnicodeSet whichSet
) {
275 UBool
* resultsManually
;
278 resultsManually
= (UBool
*) uprv_malloc(gCountAvailable
);
279 uprv_memset(resultsManually
, 0, gCountAvailable
);
281 for(i
= 0 ; i
< num_encodings
; i
++) {
282 UErrorCode status
= U_ZERO_ERROR
;
283 /* get unicode set for that converter */
285 UConverter
* test_converter
;
287 int32_t encIndex
, offset
;
289 set
= uset_openEmpty();
290 test_converter
= ucnv_open(encodings
[i
], &status
);
291 ucnv_getUnicodeSet(test_converter
, set
,
293 if (excludedCodePoints
!= NULL
) {
294 uset_addAll(set
, excludedCodePoints
);
300 encIndex
= findIndex(encodings
[i
]);
302 * The following is almost, but not entirely, the same as
303 * resultsManually[encIndex] =
304 * (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length);
305 * They might be different if the set contains strings,
306 * or if the utf8 string contains an illegal sequence.
308 * The UConverterSelector does not currently handle strings that can be
309 * converted, and it treats an illegal sequence as convertible
310 * while uset_spanUTF8() treats it like U+FFFD which may not be convertible.
312 resultsManually
[encIndex
] = TRUE
;
313 while(offset
<length
) {
314 U8_NEXT(utf8
, offset
, length
, cp
);
315 if (cp
>= 0 && !uset_contains(set
, cp
)) {
316 resultsManually
[encIndex
] = FALSE
;
321 ucnv_close(test_converter
);
323 return resultsManually
;
326 /* closes res but does not free resultsManually */
327 static void verifyResult(UEnumeration
* res
, const UBool
*resultsManually
) {
328 UBool
* resultsFromSystem
= (UBool
*) uprv_malloc(gCountAvailable
* sizeof(UBool
));
330 UErrorCode status
= U_ZERO_ERROR
;
333 /* fill the bool for the selector results! */
334 uprv_memset(resultsFromSystem
, 0, gCountAvailable
);
335 while ((name
= uenum_next(res
,NULL
, &status
)) != NULL
) {
336 resultsFromSystem
[findIndex(name
)] = TRUE
;
338 for(i
= 0 ; i
< gCountAvailable
; i
++) {
339 if(resultsManually
[i
] != resultsFromSystem
[i
]) {
340 log_err("failure in converter selector\n"
341 "converter %s had conflicting results -- manual: %d, system %d\n",
342 gAvailableNames
[i
], resultsManually
[i
], resultsFromSystem
[i
]);
345 uprv_free(resultsFromSystem
);
349 static UConverterSelector
*
350 serializeAndUnserialize(UConverterSelector
*sel
, char **buffer
, UErrorCode
*status
) {
352 int32_t ser_len
, ser_len2
;
354 ser_len
= ucnvsel_serialize(sel
, NULL
, 0, status
);
355 if (*status
!= U_BUFFER_OVERFLOW_ERROR
) {
356 log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status
));
359 new_buffer
= (char *)uprv_malloc(ser_len
);
360 *status
= U_ZERO_ERROR
;
361 ser_len2
= ucnvsel_serialize(sel
, new_buffer
, ser_len
, status
);
362 if (U_FAILURE(*status
) || ser_len
!= ser_len2
) {
363 log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status
));
364 uprv_free(new_buffer
);
369 *buffer
= new_buffer
;
370 sel
= ucnvsel_openFromSerialized(new_buffer
, ser_len
, status
);
371 if (U_FAILURE(*status
)) {
372 log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status
));
378 static void TestSelector()
381 USet
* excluded_sets
[3] = { NULL
};
382 int32_t i
, testCaseIdx
;
384 if (!getAvailableNames()) {
387 if (!text_open(&text
)) {
388 releaseAvailableNames();;
391 excluded_sets
[0] = uset_openEmpty();
392 for(i
= 1 ; i
< 3 ; i
++) {
393 excluded_sets
[i
] = uset_open(i
*30, i
*30+500);
396 for(testCaseIdx
= 0; testCaseIdx
< LENGTHOF(getEncodingsFns
); testCaseIdx
++)
398 int32_t excluded_set_id
;
399 int32_t num_encodings
;
400 const char **encodings
= getEncodingsFns
[testCaseIdx
](&num_encodings
);
401 if (getTestOption(QUICK_OPTION
) && num_encodings
> 25) {
402 uprv_free((void *)encodings
);
407 * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++)
409 * This loop was replaced by the following statement because
410 * the loop made the test run longer without adding to the code coverage.
411 * The handling of the exclusion set is independent of the
412 * set of encodings, so there is no need to test every combination.
414 excluded_set_id
= testCaseIdx
% LENGTHOF(excluded_sets
);
416 UConverterSelector
*sel_rt
, *sel_fb
;
417 char *buffer_fb
= NULL
;
418 UErrorCode status
= U_ZERO_ERROR
;
419 sel_rt
= ucnvsel_open(encodings
, num_encodings
,
420 excluded_sets
[excluded_set_id
],
421 UCNV_ROUNDTRIP_SET
, &status
);
422 if (num_encodings
== gCountAvailable
) {
423 /* test the special "all converters" parameter values */
424 sel_fb
= ucnvsel_open(NULL
, 0,
425 excluded_sets
[excluded_set_id
],
426 UCNV_ROUNDTRIP_AND_FALLBACK_SET
, &status
);
427 } else if (uset_isEmpty(excluded_sets
[excluded_set_id
])) {
428 /* test that a NULL set gives the same results as an empty set */
429 sel_fb
= ucnvsel_open(encodings
, num_encodings
,
431 UCNV_ROUNDTRIP_AND_FALLBACK_SET
, &status
);
433 sel_fb
= ucnvsel_open(encodings
, num_encodings
,
434 excluded_sets
[excluded_set_id
],
435 UCNV_ROUNDTRIP_AND_FALLBACK_SET
, &status
);
437 if (U_FAILURE(status
)) {
438 log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx
, u_errorName(status
));
439 ucnvsel_close(sel_rt
);
440 uprv_free((void *)encodings
);
446 UBool
*manual_rt
, *manual_fb
;
447 static UChar utf16
[10000];
449 int32_t length8
, length16
;
451 s
= text_nextString(&text
, &length8
);
452 if (s
== NULL
|| (getTestOption(QUICK_OPTION
) && text
.number
> 3)) {
456 manual_rt
= getResultsManually(encodings
, num_encodings
,
458 excluded_sets
[excluded_set_id
],
460 manual_fb
= getResultsManually(encodings
, num_encodings
,
462 excluded_sets
[excluded_set_id
],
463 UCNV_ROUNDTRIP_AND_FALLBACK_SET
);
464 /* UTF-8 with length */
465 status
= U_ZERO_ERROR
;
466 verifyResult(ucnvsel_selectForUTF8(sel_rt
, s
, length8
, &status
), manual_rt
);
467 verifyResult(ucnvsel_selectForUTF8(sel_fb
, s
, length8
, &status
), manual_fb
);
468 /* UTF-8 NUL-terminated */
469 verifyResult(ucnvsel_selectForUTF8(sel_rt
, s
, -1, &status
), manual_rt
);
470 verifyResult(ucnvsel_selectForUTF8(sel_fb
, s
, -1, &status
), manual_fb
);
472 u_strFromUTF8(utf16
, LENGTHOF(utf16
), &length16
, s
, length8
, &status
);
473 if (U_FAILURE(status
)) {
474 log_err("error converting the test text (string %ld) to UTF-16 - %s\n",
475 (long)text
.number
, u_errorName(status
));
477 if (text
.number
== 0) {
478 sel_fb
= serializeAndUnserialize(sel_fb
, &buffer_fb
, &status
);
480 if (U_SUCCESS(status
)) {
481 /* UTF-16 with length */
482 verifyResult(ucnvsel_selectForString(sel_rt
, utf16
, length16
, &status
), manual_rt
);
483 verifyResult(ucnvsel_selectForString(sel_fb
, utf16
, length16
, &status
), manual_fb
);
484 /* UTF-16 NUL-terminated */
485 verifyResult(ucnvsel_selectForString(sel_rt
, utf16
, -1, &status
), manual_rt
);
486 verifyResult(ucnvsel_selectForString(sel_fb
, utf16
, -1, &status
), manual_fb
);
490 uprv_free(manual_rt
);
491 uprv_free(manual_fb
);
493 ucnvsel_close(sel_rt
);
494 ucnvsel_close(sel_fb
);
495 uprv_free(buffer_fb
);
497 uprv_free((void *)encodings
);
500 releaseAvailableNames();
502 for(i
= 0 ; i
< 3 ; i
++) {
503 uset_close(excluded_sets
[i
]);
507 /* Improve code coverage of UPropsVectors */
508 static void TestUPropsVector() {
509 UErrorCode errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
510 UPropsVectors
*pv
= upvec_open(100, &errorCode
);
512 log_err("Should have returned NULL if UErrorCode is an error.");
515 errorCode
= U_ZERO_ERROR
;
516 pv
= upvec_open(-1, &errorCode
);
517 if (pv
!= NULL
|| U_SUCCESS(errorCode
)) {
518 log_err("Should have returned NULL if column is less than 0.\n");
521 errorCode
= U_ZERO_ERROR
;
522 pv
= upvec_open(100, &errorCode
);
523 if (pv
== NULL
|| U_FAILURE(errorCode
)) {
524 log_err("Unable to open UPropsVectors.\n");
528 if (upvec_getValue(pv
, 0, 1) != 0) {
529 log_err("upvec_getValue should return 0.\n");
531 if (upvec_getRow(pv
, 0, NULL
, NULL
) == NULL
) {
532 log_err("upvec_getRow should not return NULL.\n");
534 if (upvec_getArray(pv
, NULL
, NULL
) != NULL
) {
535 log_err("upvec_getArray should return NULL.\n");