1 /********************************************************************
3 * Copyright (c) 1997-2010, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /*****************************************************************************
10 * Modification History:
12 * Madhu Katragadda Ported for C API
13 ******************************************************************************
19 #include "unicode/uloc.h"
20 #include "unicode/ucnv.h"
21 #include "unicode/ucnv_err.h"
22 #include "unicode/putil.h"
23 #include "unicode/uset.h"
24 #include "unicode/ustring.h"
25 #include "ucnv_bld.h" /* for sizeof(UConverter) */
26 #include "cmemory.h" /* for UAlignedMemory */
30 /* for not including "cstring.h" -begin*/
32 # define ctest_stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE _stricmp(str1, str2)
34 # define ctest_stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE strcasecmp(str1, str2)
36 # define ctest_stricmp(str1, str2) T_CString_stricmp(str1, str2)
40 T_CString_stricmp(const char *str1
, const char *str2
) {
47 } else if(str2
==NULL
) {
50 /* compare non-NULL strings lexically with lowercase */
54 c1
=(unsigned char)*str1
;
55 c2
=(unsigned char)*str2
;
65 /* compare non-zero characters with lowercase */
66 rc
=(int)(unsigned char)tolower(c1
)-(int)(unsigned char)tolower(c2
);
76 /* for not including "cstring.h" -end*/
78 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
80 #define NUM_CODEPAGE 1
81 #define MAX_FILE_LEN 1024*20
82 #define UCS_FILE_NAME_SIZE 512
84 /*returns an action other than the one provided*/
85 static UConverterFromUCallback
otherUnicodeAction(UConverterFromUCallback MIA
);
86 static UConverterToUCallback
otherCharAction(UConverterToUCallback MIA
);
89 cnv_open(const char *name
, UErrorCode
*pErrorCode
) {
90 if(name
!=NULL
&& name
[0]=='*') {
91 return ucnv_openPackage(loadTestData(pErrorCode
), name
+1, pErrorCode
);
93 return ucnv_open(name
, pErrorCode
);
98 static void ListNames(void);
99 static void TestFlushCache(void);
100 static void TestDuplicateAlias(void);
101 static void TestCCSID(void);
102 static void TestJ932(void);
103 static void TestJ1968(void);
104 static void TestLMBCSMaxChar(void);
106 #if !UCONFIG_NO_LEGACY_CONVERSION
107 static void TestConvertSafeCloneCallback(void);
110 static void TestEBCDICSwapLFNL(void);
111 static void TestConvertEx(void);
112 static void TestConvertExFromUTF8(void);
113 static void TestConvertExFromUTF8_C5F0(void);
114 static void TestConvertAlgorithmic(void);
115 void TestDefaultConverterError(void); /* defined in cctest.c */
116 void TestDefaultConverterSet(void); /* defined in cctest.c */
117 static void TestToUCountPending(void);
118 static void TestFromUCountPending(void);
119 static void TestDefaultName(void);
120 static void TestCompareNames(void);
121 static void TestSubstString(void);
122 static void InvalidArguments(void);
123 static void TestGetName(void);
124 static void TestUTFBOM(void);
126 void addTestConvert(TestNode
** root
);
128 void addTestConvert(TestNode
** root
)
130 addTest(root
, &ListNames
, "tsconv/ccapitst/ListNames");
131 addTest(root
, &TestConvert
, "tsconv/ccapitst/TestConvert");
132 addTest(root
, &TestFlushCache
, "tsconv/ccapitst/TestFlushCache");
133 addTest(root
, &TestAlias
, "tsconv/ccapitst/TestAlias");
134 addTest(root
, &TestDuplicateAlias
, "tsconv/ccapitst/TestDuplicateAlias");
135 addTest(root
, &TestConvertSafeClone
, "tsconv/ccapitst/TestConvertSafeClone");
136 #if !UCONFIG_NO_LEGACY_CONVERSION
137 addTest(root
, &TestConvertSafeCloneCallback
,"tsconv/ccapitst/TestConvertSafeCloneCallback");
139 addTest(root
, &TestCCSID
, "tsconv/ccapitst/TestCCSID");
140 addTest(root
, &TestJ932
, "tsconv/ccapitst/TestJ932");
141 addTest(root
, &TestJ1968
, "tsconv/ccapitst/TestJ1968");
142 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
143 addTest(root
, &TestLMBCSMaxChar
, "tsconv/ccapitst/TestLMBCSMaxChar");
145 addTest(root
, &TestEBCDICSwapLFNL
, "tsconv/ccapitst/TestEBCDICSwapLFNL");
146 addTest(root
, &TestConvertEx
, "tsconv/ccapitst/TestConvertEx");
147 addTest(root
, &TestConvertExFromUTF8
, "tsconv/ccapitst/TestConvertExFromUTF8");
148 addTest(root
, &TestConvertExFromUTF8_C5F0
, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0");
149 addTest(root
, &TestConvertAlgorithmic
, "tsconv/ccapitst/TestConvertAlgorithmic");
150 addTest(root
, &TestDefaultConverterError
, "tsconv/ccapitst/TestDefaultConverterError");
151 addTest(root
, &TestDefaultConverterSet
, "tsconv/ccapitst/TestDefaultConverterSet");
152 #if !UCONFIG_NO_FILE_IO
153 addTest(root
, &TestToUCountPending
, "tsconv/ccapitst/TestToUCountPending");
154 addTest(root
, &TestFromUCountPending
, "tsconv/ccapitst/TestFromUCountPending");
156 addTest(root
, &TestDefaultName
, "tsconv/ccapitst/TestDefaultName");
157 addTest(root
, &TestCompareNames
, "tsconv/ccapitst/TestCompareNames");
158 addTest(root
, &TestSubstString
, "tsconv/ccapitst/TestSubstString");
159 addTest(root
, &InvalidArguments
, "tsconv/ccapitst/InvalidArguments");
160 addTest(root
, &TestGetName
, "tsconv/ccapitst/TestGetName");
161 addTest(root
, &TestUTFBOM
, "tsconv/ccapitst/TestUTFBOM");
164 static void ListNames(void) {
165 UErrorCode err
= U_ZERO_ERROR
;
166 int32_t testLong1
= 0;
167 const char* available_conv
;
168 UEnumeration
*allNamesEnum
= NULL
;
169 int32_t allNamesCount
= 0;
172 log_verbose("Testing ucnv_openAllNames()...");
173 allNamesEnum
= ucnv_openAllNames(&err
);
175 log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err
));
178 const char *string
= NULL
;
182 allNamesCount
= uenum_count(allNamesEnum
, &err
);
183 while ((string
= uenum_next(allNamesEnum
, &len
, &err
))) {
185 log_verbose("read \"%s\", length %i\n", string
, len
);
187 if (U_FAILURE(err
)) {
188 log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err
));
191 uenum_reset(allNamesEnum
, &err
);
192 while ((string
= uenum_next(allNamesEnum
, &len
, &err
))) {
194 ucnv_close(ucnv_open(string
, &err
));
195 log_verbose("read \"%s\", length %i (%s)\n", string
, len
, U_SUCCESS(err
) ? "available" : "unavailable");
198 if (count1
!= count2
) {
199 log_err("FAILURE! uenum_reset(allNamesEnum, &err); doesn't work\n");
202 uenum_close(allNamesEnum
);
205 /*Tests ucnv_getAvailableName(), getAvialableCount()*/
207 log_verbose("Testing ucnv_countAvailable()...");
209 testLong1
=ucnv_countAvailable();
210 log_info("Number of available codepages: %d/%d\n", testLong1
, allNamesCount
);
212 log_verbose("\n---Testing ucnv_getAvailableName.."); /*need to check this out */
214 available_conv
= ucnv_getAvailableName(testLong1
);
215 /*test ucnv_getAvailableName with err condition*/
216 log_verbose("\n---Testing ucnv_getAvailableName..with index < 0 ");
217 available_conv
= ucnv_getAvailableName(-1);
218 if(available_conv
!= NULL
){
219 log_err("ucnv_getAvailableName() with index < 0) should return NULL\n");
222 /* Test ucnv_countAliases() etc. */
223 count
= ucnv_countAliases("utf-8", &err
);
225 log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err
));
226 } else if(count
<= 0) {
227 log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count
);
229 /* try to get the aliases individually */
231 alias
= ucnv_getAlias("utf-8", 0, &err
);
233 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s\n", myErrorName(err
));
234 } else if(strcmp("UTF-8", alias
) != 0) {
235 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s instead of UTF-8\n", alias
);
238 for(aliasNum
= 0; aliasNum
< count
; ++aliasNum
) {
239 alias
= ucnv_getAlias("utf-8", aliasNum
, &err
);
241 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum
, myErrorName(err
));
242 } else if(strlen(alias
) > 20) {
244 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> alias %s insanely long, corrupt?!\n", aliasNum
, alias
);
246 log_verbose("alias %d for utf-8: %s\n", aliasNum
, alias
);
250 /* try to fill an array with all aliases */
251 const char **aliases
;
252 aliases
=(const char **)malloc(count
* sizeof(const char *));
254 ucnv_getAliases("utf-8", aliases
, &err
);
256 log_err("FAILURE! ucnv_getAliases(\"utf-8\") -> %s\n", myErrorName(err
));
258 for(aliasNum
= 0; aliasNum
< count
; ++aliasNum
) {
259 /* compare the pointers with the ones returned individually */
260 alias
= ucnv_getAlias("utf-8", aliasNum
, &err
);
262 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum
, myErrorName(err
));
263 } else if(aliases
[aliasNum
] != alias
) {
264 log_err("FAILURE! ucnv_getAliases(\"utf-8\")[%d] != ucnv_getAlias(\"utf-8\", %d)\n", aliasNum
, aliasNum
);
268 free((char **)aliases
);
276 static void TestConvert()
278 #if !UCONFIG_NO_LEGACY_CONVERSION
281 int32_t testLong1
= 0;
285 FILE* ucs_file_in
= NULL
;
287 UChar myUChar
= 0x0000;
288 char* mytarget
; /* [MAX_FILE_LEN] */
291 UChar
* consumedUni
= NULL
;
292 char* consumed
= NULL
;
293 char* output_cp_buffer
; /* [MAX_FILE_LEN] */
294 UChar
* ucs_file_buffer
; /* [MAX_FILE_LEN] */
295 UChar
* ucs_file_buffer_use
;
296 UChar
* my_ucs_file_buffer
; /* [MAX_FILE_LEN] */
297 UChar
* my_ucs_file_buffer_1
;
300 uint16_t codepage_index
= 0;
302 UErrorCode err
= U_ZERO_ERROR
;
303 char ucs_file_name
[UCS_FILE_NAME_SIZE
];
304 UConverterFromUCallback MIA1
, MIA1_2
;
305 UConverterToUCallback MIA2
, MIA2_2
;
306 const void *MIA1Context
, *MIA1Context2
, *MIA2Context
, *MIA2Context2
;
307 UConverter
* someConverters
[5];
308 UConverter
* myConverter
= 0;
309 UChar
* displayname
= 0;
316 int32_t targetcapacity2
;
317 int32_t targetcapacity
;
321 const UChar
* tmp_ucs_buf
;
322 const UChar
* tmp_consumedUni
=NULL
;
323 const char* tmp_mytarget_use
;
324 const char* tmp_consumed
;
326 /******************************************************************
327 Checking Unicode -> ksc
328 ******************************************************************/
330 const char* CodePagesToTest
[NUM_CODEPAGE
] =
336 const uint16_t CodePageNumberToTest
[NUM_CODEPAGE
] =
342 const int8_t CodePagesMinChars
[NUM_CODEPAGE
] =
348 const int8_t CodePagesMaxChars
[NUM_CODEPAGE
] =
354 const uint16_t CodePagesSubstitutionChars
[NUM_CODEPAGE
] =
359 const char* CodePagesTestFiles
[NUM_CODEPAGE
] =
365 const UConverterPlatform CodePagesPlatform
[NUM_CODEPAGE
] =
371 const char* CodePagesLocale
[NUM_CODEPAGE
] =
376 UConverterFromUCallback oldFromUAction
= NULL
;
377 UConverterToUCallback oldToUAction
= NULL
;
378 const void* oldFromUContext
= NULL
;
379 const void* oldToUContext
= NULL
;
381 /* Allocate memory */
382 mytarget
= (char*) malloc(MAX_FILE_LEN
* sizeof(mytarget
[0]));
383 output_cp_buffer
= (char*) malloc(MAX_FILE_LEN
* sizeof(output_cp_buffer
[0]));
384 ucs_file_buffer
= (UChar
*) malloc(MAX_FILE_LEN
* sizeof(ucs_file_buffer
[0]));
385 my_ucs_file_buffer
= (UChar
*) malloc(MAX_FILE_LEN
* sizeof(my_ucs_file_buffer
[0]));
387 ucs_file_buffer_use
= ucs_file_buffer
;
389 mytarget_use
= mytarget
;
390 my_ucs_file_buffer_1
=my_ucs_file_buffer
;
392 /* flush the converter cache to get a consistent state before the flushing is tested */
395 /*Testing ucnv_openU()*/
397 UChar converterName
[]={ 0x0069, 0x0062, 0x006d, 0x002d, 0x0039, 0x0034, 0x0033, 0x0000}; /*ibm-943*/
398 UChar firstSortedName
[]={ 0x0021, 0x0000}; /* ! */
399 UChar lastSortedName
[]={ 0x007E, 0x0000}; /* ~ */
400 const char *illegalNameChars
={ "ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943"};
401 UChar illegalName
[100];
402 UConverter
*converter
=NULL
;
404 converter
=ucnv_openU(converterName
, &err
);
406 log_data_err("FAILURE! ucnv_openU(ibm-943, err) failed. %s\n", myErrorName(err
));
408 ucnv_close(converter
);
410 converter
=ucnv_openU(NULL
, &err
);
412 log_err("FAILURE! ucnv_openU(NULL, err) failed. %s\n", myErrorName(err
));
414 ucnv_close(converter
);
415 /*testing with error value*/
416 err
=U_ILLEGAL_ARGUMENT_ERROR
;
417 converter
=ucnv_openU(converterName
, &err
);
418 if(!(converter
== NULL
)){
419 log_data_err("FAILURE! ucnv_openU(ibm-943, U_ILLEGAL_ARGUMENT_ERROR) is expected to fail\n");
421 ucnv_close(converter
);
423 u_uastrcpy(illegalName
, "");
424 u_uastrcpy(illegalName
, illegalNameChars
);
425 ucnv_openU(illegalName
, &err
);
426 if(!(err
==U_ILLEGAL_ARGUMENT_ERROR
)){
427 log_err("FAILURE! ucnv_openU(illegalName, err) is expected to fail\n");
431 ucnv_openU(firstSortedName
, &err
);
432 if(err
!=U_FILE_ACCESS_ERROR
){
433 log_err("FAILURE! ucnv_openU(firstSortedName, err) is expected to fail\n");
437 ucnv_openU(lastSortedName
, &err
);
438 if(err
!=U_FILE_ACCESS_ERROR
){
439 log_err("FAILURE! ucnv_openU(lastSortedName, err) is expected to fail\n");
444 log_verbose("Testing ucnv_open() with converter name greater than 7 characters\n");
446 UConverter
*cnv
=NULL
;
448 cnv
=ucnv_open("ibm-949,Madhu", &err
);
450 log_data_err("FAILURE! ucnv_open(\"ibm-949,Madhu\", err) failed. %s\n", myErrorName(err
));
455 /*Testing ucnv_convert()*/
457 int32_t targetLimit
=0, sourceLimit
=0, i
=0, targetCapacity
=0;
458 const uint8_t source
[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00};
459 const uint8_t expectedTarget
[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00};
461 sourceLimit
=sizeof(source
)/sizeof(source
[0]);
465 targetCapacity
=ucnv_convert("ibm-1364", "ibm-1363", NULL
, targetLimit
, (const char*)source
, sourceLimit
, &err
);
466 if(err
== U_BUFFER_OVERFLOW_ERROR
){
468 targetLimit
=targetCapacity
+1;
469 target
=(char*)malloc(sizeof(char) * targetLimit
);
470 targetCapacity
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
, sourceLimit
, &err
);
473 log_data_err("FAILURE! ucnv_convert(ibm-1363->ibm-1364) failed. %s\n", myErrorName(err
));
476 for(i
=0; i
<targetCapacity
; i
++){
477 if(target
[i
] != expectedTarget
[i
]){
478 log_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i
, (UChar
)expectedTarget
[i
], (uint8_t)target
[i
]);
482 i
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
+1, -1, &err
);
483 if(U_FAILURE(err
) || i
!=7){
484 log_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 7\n",
485 u_errorName(err
), i
);
488 /*Test error conditions*/
490 i
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
, 0, &err
);
492 log_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n");
495 err
=U_ILLEGAL_ARGUMENT_ERROR
;
496 sourceLimit
=sizeof(source
)/sizeof(source
[0]);
497 i
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
, sourceLimit
, &err
);
499 log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n");
503 sourceLimit
=sizeof(source
)/sizeof(source
[0]);
505 i
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
, sourceLimit
, &err
);
506 if(!(U_FAILURE(err
) && err
==U_BUFFER_OVERFLOW_ERROR
)){
507 log_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n");
514 /*Testing ucnv_openCCSID and ucnv_open with error conditions*/
515 log_verbose("\n---Testing ucnv_open with err ! = U_ZERO_ERROR...\n");
516 err
=U_ILLEGAL_ARGUMENT_ERROR
;
517 if(ucnv_open(NULL
, &err
) != NULL
){
518 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
520 if(ucnv_openCCSID(1051, UCNV_IBM
, &err
) != NULL
){
521 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
525 /* Testing ucnv_openCCSID(), ucnv_open(), ucnv_getName() */
526 log_verbose("\n---Testing ucnv_open default...\n");
527 someConverters
[0] = ucnv_open(NULL
,&err
);
528 someConverters
[1] = ucnv_open(NULL
,&err
);
529 someConverters
[2] = ucnv_open("utf8", &err
);
530 someConverters
[3] = ucnv_openCCSID(949,UCNV_IBM
,&err
);
531 ucnv_close(ucnv_openCCSID(1051, UCNV_IBM
, &err
)); /* test for j350; ucnv_close(NULL) is safe */
532 if (U_FAILURE(err
)){ log_data_err("FAILURE! %s\n", myErrorName(err
));}
534 /* Testing ucnv_getName()*/
535 /*default code page */
536 ucnv_getName(someConverters
[0], &err
);
538 log_data_err("getName[0] failed\n");
540 log_verbose("getName(someConverters[0]) returned %s\n", ucnv_getName(someConverters
[0], &err
));
542 ucnv_getName(someConverters
[1], &err
);
544 log_data_err("getName[1] failed\n");
546 log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters
[1], &err
));
549 ucnv_close(someConverters
[0]);
550 ucnv_close(someConverters
[1]);
551 ucnv_close(someConverters
[2]);
552 ucnv_close(someConverters
[3]);
555 for (codepage_index
=0; codepage_index
< NUM_CODEPAGE
; ++codepage_index
)
561 strcpy(ucs_file_name
, U_TOPSRCDIR U_FILE_SEP_STRING
"test"U_FILE_SEP_STRING
"testdata"U_FILE_SEP_STRING
);
563 strcpy(ucs_file_name
, loadTestData(&err
));
566 log_err("\nCouldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err
));
571 char* index
= strrchr(ucs_file_name
,(char)U_FILE_SEP_CHAR
);
573 if((unsigned int)(index
-ucs_file_name
) != (strlen(ucs_file_name
)-1)){
578 strcat(ucs_file_name
,".."U_FILE_SEP_STRING
);
580 strcat(ucs_file_name
, CodePagesTestFiles
[codepage_index
]);
582 ucs_file_in
= fopen(ucs_file_name
,"rb");
585 log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name
);
589 /*Creates a converter and testing ucnv_openCCSID(u_int code_page, platform, errstatus*/
591 /* myConverter =ucnv_openCCSID(CodePageNumberToTest[codepage_index],UCNV_IBM, &err); */
592 /* ucnv_flushCache(); */
593 myConverter
=ucnv_open( "ibm-949", &err
);
594 if (!myConverter
|| U_FAILURE(err
))
596 log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err
));
601 /*testing for ucnv_getName() */
602 log_verbose("Testing ucnv_getName()...\n");
603 ucnv_getName(myConverter
, &err
);
605 log_err("Error in getName\n");
608 log_verbose("getName o.k. %s\n", ucnv_getName(myConverter
, &err
));
610 if (ctest_stricmp(ucnv_getName(myConverter
, &err
), CodePagesToTest
[codepage_index
]))
611 log_err("getName failed\n");
613 log_verbose("getName ok\n");
614 /*Test getName with error condition*/
617 err
=U_ILLEGAL_ARGUMENT_ERROR
;
618 log_verbose("Testing ucnv_getName with err != U_ZERO_ERROR");
619 name
=ucnv_getName(myConverter
, &err
);
621 log_err("ucnv_getName() with err != U_ZERO_ERROR is expected to fail");
627 /*Tests ucnv_getMaxCharSize() and ucnv_getMinCharSize()*/
629 log_verbose("Testing ucnv_getMaxCharSize()...\n");
630 if (ucnv_getMaxCharSize(myConverter
)==CodePagesMaxChars
[codepage_index
])
631 log_verbose("Max byte per character OK\n");
633 log_err("Max byte per character failed\n");
635 log_verbose("\n---Testing ucnv_getMinCharSize()...\n");
636 if (ucnv_getMinCharSize(myConverter
)==CodePagesMinChars
[codepage_index
])
637 log_verbose("Min byte per character OK\n");
639 log_err("Min byte per character failed\n");
642 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars()*/
643 log_verbose("\n---Testing ucnv_getSubstChars...\n");
645 ucnv_getSubstChars(myConverter
, myptr
, &ii
, &err
);
647 log_err("ucnv_getSubstChars returned a negative number %d\n", ii
);
651 rest
= (uint16_t)(((unsigned char)rest
<< 8) + (unsigned char)myptr
[x
]);
652 if (rest
==CodePagesSubstitutionChars
[codepage_index
])
653 log_verbose("Substitution character ok\n");
655 log_err("Substitution character failed.\n");
657 log_verbose("\n---Testing ucnv_setSubstChars RoundTrip Test ...\n");
658 ucnv_setSubstChars(myConverter
, myptr
, ii
, &err
);
661 log_err("FAILURE! %s\n", myErrorName(err
));
663 ucnv_getSubstChars(myConverter
,save
, &ii
, &err
);
666 log_err("FAILURE! %s\n", myErrorName(err
));
669 if (strncmp(save
, myptr
, ii
))
670 log_err("Saved substitution character failed\n");
672 log_verbose("Saved substitution character ok\n");
674 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars() with error conditions*/
675 log_verbose("\n---Testing ucnv_getSubstChars.. with len < minBytesPerChar\n");
677 ucnv_getSubstChars(myConverter
, myptr
, &ii
, &err
);
678 if(err
!= U_INDEX_OUTOFBOUNDS_ERROR
){
679 log_err("ucnv_getSubstChars() with len < minBytesPerChar should throw U_INDEX_OUTOFBOUNDS_ERROR Got %s\n", myErrorName(err
));
683 ucnv_getSubstChars(myConverter
, myptr
, &ii
, &err
);
684 log_verbose("\n---Testing ucnv_setSubstChars.. with len < minBytesPerChar\n");
685 ucnv_setSubstChars(myConverter
, myptr
, 0, &err
);
686 if(err
!= U_ILLEGAL_ARGUMENT_ERROR
){
687 log_err("ucnv_setSubstChars() with len < minBytesPerChar should throw U_ILLEGAL_ARGUMENT_ERROR Got %s\n", myErrorName(err
));
689 log_verbose("\n---Testing ucnv_setSubstChars.. with err != U_ZERO_ERROR \n");
690 strcpy(myptr
, "abc");
691 ucnv_setSubstChars(myConverter
, myptr
, ii
, &err
);
693 ucnv_getSubstChars(myConverter
, save
, &ii
, &err
);
694 if(strncmp(save
, myptr
, ii
) == 0){
695 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't set the SubstChars and just return\n");
697 log_verbose("\n---Testing ucnv_getSubstChars.. with err != U_ZERO_ERROR \n");
699 strcpy(myptr
, "abc");
700 ucnv_setSubstChars(myConverter
, myptr
, ii
, &err
);
701 err
=U_ILLEGAL_ARGUMENT_ERROR
;
702 ucnv_getSubstChars(myConverter
, save
, &ii
, &err
);
703 if(strncmp(save
, myptr
, ii
) == 0){
704 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't fill the SubstChars in the buffer, it just returns\n");
709 #ifdef U_ENABLE_GENERIC_ISO_2022
710 /*resetState ucnv_reset()*/
711 log_verbose("\n---Testing ucnv_reset()..\n");
712 ucnv_reset(myConverter
);
715 const uint8_t in
[]={ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x61, 0xc0, 0x80, 0xe0, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80};
716 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
717 UConverter
*cnv
=ucnv_open("ISO_2022", &err
);
719 log_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err
));
721 c
=ucnv_getNextUChar(cnv
, &source
, limit
, &err
);
722 if((U_FAILURE(err
) || c
!= (UChar32
)0x0031)) {
723 log_err("ucnv_getNextUChar() failed: %s\n", u_errorName(err
));
732 log_verbose("\n---Testing ucnv_getDisplayName()...\n");
733 locale
=CodePagesLocale
[codepage_index
];
736 disnamelen
= ucnv_getDisplayName(myConverter
, locale
, displayname
, len
, &err
);
737 if(err
==U_BUFFER_OVERFLOW_ERROR
) {
739 displayname
=(UChar
*)malloc((disnamelen
+1) * sizeof(UChar
));
740 ucnv_getDisplayName(myConverter
,locale
,displayname
,disnamelen
+1, &err
);
742 log_err("getDisplayName failed. The error is %s\n", myErrorName(err
));
745 log_verbose(" getDisplayName o.k.\n");
751 log_err("getDisplayName preflight doesn't work. Error is %s\n", myErrorName(err
));
753 /*test ucnv_getDiaplayName with error condition*/
754 err
= U_ILLEGAL_ARGUMENT_ERROR
;
755 len
=ucnv_getDisplayName(myConverter
,locale
,NULL
,0, &err
);
757 log_err("ucnv_getDisplayName() with err != U_ZERO_ERROR is supposed to return 0\n");
759 /*test ucnv_getDiaplayName with error condition*/
761 len
=ucnv_getDisplayName(NULL
,locale
,NULL
,0, &err
);
762 if( len
!=0 || U_SUCCESS(err
)){
763 log_err("ucnv_getDisplayName(NULL) with cnv == NULL is supposed to return 0\n");
767 /* testing ucnv_setFromUCallBack() and ucnv_getFromUCallBack()*/
768 ucnv_getFromUCallBack(myConverter
, &MIA1
, &MIA1Context
);
770 log_verbose("\n---Testing ucnv_setFromUCallBack...\n");
771 ucnv_setFromUCallBack(myConverter
, otherUnicodeAction(MIA1
), &BOM
, &oldFromUAction
, &oldFromUContext
, &err
);
772 if (U_FAILURE(err
) || oldFromUAction
!= MIA1
|| oldFromUContext
!= MIA1Context
)
774 log_err("FAILURE! %s\n", myErrorName(err
));
777 ucnv_getFromUCallBack(myConverter
, &MIA1_2
, &MIA1Context2
);
778 if (MIA1_2
!= otherUnicodeAction(MIA1
) || MIA1Context2
!= &BOM
)
779 log_err("get From UCallBack failed\n");
781 log_verbose("get From UCallBack ok\n");
783 log_verbose("\n---Testing getFromUCallBack Roundtrip...\n");
784 ucnv_setFromUCallBack(myConverter
,MIA1
, MIA1Context
, &oldFromUAction
, &oldFromUContext
, &err
);
785 if (U_FAILURE(err
) || oldFromUAction
!= otherUnicodeAction(MIA1
) || oldFromUContext
!= &BOM
)
787 log_err("FAILURE! %s\n", myErrorName(err
));
790 ucnv_getFromUCallBack(myConverter
, &MIA1_2
, &MIA1Context2
);
791 if (MIA1_2
!= MIA1
|| MIA1Context2
!= MIA1Context
)
792 log_err("get From UCallBack action failed\n");
794 log_verbose("get From UCallBack action ok\n");
796 /*testing ucnv_setToUCallBack with error conditions*/
797 err
=U_ILLEGAL_ARGUMENT_ERROR
;
798 log_verbose("\n---Testing setFromUCallBack. with err != U_ZERO_ERROR..\n");
799 ucnv_setFromUCallBack(myConverter
, otherUnicodeAction(MIA1
), &BOM
, &oldFromUAction
, &oldFromUContext
, &err
);
800 ucnv_getFromUCallBack(myConverter
, &MIA1_2
, &MIA1Context2
);
801 if(MIA1_2
== otherUnicodeAction(MIA1
) || MIA1Context2
== &BOM
){
802 log_err("To setFromUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
807 /*testing ucnv_setToUCallBack() and ucnv_getToUCallBack()*/
808 ucnv_getToUCallBack(myConverter
, &MIA2
, &MIA2Context
);
810 log_verbose("\n---Testing setTo UCallBack...\n");
811 ucnv_setToUCallBack(myConverter
,otherCharAction(MIA2
), &BOM
, &oldToUAction
, &oldToUContext
, &err
);
812 if (U_FAILURE(err
) || oldToUAction
!= MIA2
|| oldToUContext
!= MIA2Context
)
814 log_err("FAILURE! %s\n", myErrorName(err
));
817 ucnv_getToUCallBack(myConverter
, &MIA2_2
, &MIA2Context2
);
818 if (MIA2_2
!= otherCharAction(MIA2
) || MIA2Context2
!= &BOM
)
819 log_err("To UCallBack failed\n");
821 log_verbose("To UCallBack ok\n");
823 log_verbose("\n---Testing setTo UCallBack Roundtrip...\n");
824 ucnv_setToUCallBack(myConverter
,MIA2
, MIA2Context
, &oldToUAction
, &oldToUContext
, &err
);
825 if (U_FAILURE(err
) || oldToUAction
!= otherCharAction(MIA2
) || oldToUContext
!= &BOM
)
826 { log_err("FAILURE! %s\n", myErrorName(err
)); }
828 ucnv_getToUCallBack(myConverter
, &MIA2_2
, &MIA2Context2
);
829 if (MIA2_2
!= MIA2
|| MIA2Context2
!= MIA2Context
)
830 log_err("To UCallBack failed\n");
832 log_verbose("To UCallBack ok\n");
834 /*testing ucnv_setToUCallBack with error conditions*/
835 err
=U_ILLEGAL_ARGUMENT_ERROR
;
836 log_verbose("\n---Testing setToUCallBack. with err != U_ZERO_ERROR..\n");
837 ucnv_setToUCallBack(myConverter
,otherCharAction(MIA2
), NULL
, &oldToUAction
, &oldToUContext
, &err
);
838 ucnv_getToUCallBack(myConverter
, &MIA2_2
, &MIA2Context2
);
839 if (MIA2_2
== otherCharAction(MIA2
) || MIA2Context2
== &BOM
){
840 log_err("To setToUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
845 /*getcodepageid testing ucnv_getCCSID() */
846 log_verbose("\n----Testing getCCSID....\n");
847 cp
= ucnv_getCCSID(myConverter
,&err
);
850 log_err("FAILURE!..... %s\n", myErrorName(err
));
852 if (cp
!= CodePageNumberToTest
[codepage_index
])
853 log_err("Codepage number test failed\n");
855 log_verbose("Codepage number test OK\n");
857 /*testing ucnv_getCCSID() with err != U_ZERO_ERROR*/
858 err
=U_ILLEGAL_ARGUMENT_ERROR
;
859 if( ucnv_getCCSID(myConverter
,&err
) != -1){
860 log_err("ucnv_getCCSID() with err != U_ZERO_ERROR is supposed to fail\n");
864 /*getCodepagePlatform testing ucnv_getPlatform()*/
865 log_verbose("\n---Testing getCodepagePlatform ..\n");
866 if (CodePagesPlatform
[codepage_index
]!=ucnv_getPlatform(myConverter
, &err
))
867 log_err("Platform codepage test failed\n");
869 log_verbose("Platform codepage test ok\n");
873 log_err("FAILURE! %s\n", myErrorName(err
));
875 /*testing ucnv_getPlatform() with err != U_ZERO_ERROR*/
876 err
= U_ILLEGAL_ARGUMENT_ERROR
;
877 if(ucnv_getPlatform(myConverter
, &err
) != UCNV_UNKNOWN
){
878 log_err("ucnv)getPlatform with err != U_ZERO_ERROR is supposed to fail\n");
884 fread(&BOM
, sizeof(UChar
), 1, ucs_file_in
);
885 if (BOM
!=0xFEFF && BOM
!=0xFFFE)
887 log_err("File Missing BOM...Bailing!\n");
892 /*Reads in the file*/
893 while(!feof(ucs_file_in
)&&(i
+=fread(ucs_file_buffer
+i
, sizeof(UChar
), 1, ucs_file_in
)))
895 myUChar
= ucs_file_buffer
[i
-1];
897 ucs_file_buffer
[i
-1] = (UChar
)((BOM
==0xFEFF)?myUChar
:((myUChar
>> 8) | (myUChar
<< 8))); /*adjust if BIG_ENDIAN*/
900 myUChar
= ucs_file_buffer
[i
-1];
901 ucs_file_buffer
[i
-1] = (UChar
)((BOM
==0xFEFF)?myUChar
:((myUChar
>> 8) | (myUChar
<< 8))); /*adjust if BIG_ENDIAN Corner Case*/
904 /*testing ucnv_fromUChars() and ucnv_toUChars() */
905 /*uchar1---fromUChar--->output_cp_buffer --toUChar--->uchar2*/
907 uchar1
=(UChar
*)malloc(sizeof(UChar
) * (i
+1));
908 u_uastrcpy(uchar1
,"");
909 u_strncpy(uchar1
,ucs_file_buffer
,i
);
912 uchar3
=(UChar
*)malloc(sizeof(UChar
)*(i
+1));
913 u_uastrcpy(uchar3
,"");
914 u_strncpy(uchar3
,ucs_file_buffer
,i
);
917 /*Calls the Conversion Routine */
918 testLong1
= MAX_FILE_LEN
;
919 log_verbose("\n---Testing ucnv_fromUChars()\n");
920 targetcapacity
= ucnv_fromUChars(myConverter
, output_cp_buffer
, testLong1
, uchar1
, -1, &err
);
923 log_err("\nFAILURE...%s\n", myErrorName(err
));
926 log_verbose(" ucnv_fromUChars() o.k.\n");
928 /*test the conversion routine */
929 log_verbose("\n---Testing ucnv_toUChars()\n");
930 /*call it first time for trapping the targetcapacity and size needed to allocate memory for the buffer uchar2 */
932 targetsize
= ucnv_toUChars(myConverter
,
936 strlen(output_cp_buffer
),
938 /*if there is an buffer overflow then trap the values and pass them and make the actual call*/
940 if(err
==U_BUFFER_OVERFLOW_ERROR
)
943 uchar2
=(UChar
*)malloc((targetsize
+1) * sizeof(UChar
));
944 targetsize
= ucnv_toUChars(myConverter
,
948 strlen(output_cp_buffer
),
952 log_err("ucnv_toUChars() FAILED %s\n", myErrorName(err
));
954 log_verbose(" ucnv_toUChars() o.k.\n");
956 if(u_strcmp(uchar1
,uchar2
)!=0)
957 log_err("equality test failed with conversion routine\n");
961 log_err("ERR: calling toUChars: Didn't get U_BUFFER_OVERFLOW .. expected it.\n");
963 /*Testing ucnv_fromUChars and ucnv_toUChars with error conditions*/
964 err
=U_ILLEGAL_ARGUMENT_ERROR
;
965 log_verbose("\n---Testing ucnv_fromUChars() with err != U_ZERO_ERROR\n");
966 targetcapacity
= ucnv_fromUChars(myConverter
, output_cp_buffer
, testLong1
, uchar1
, -1, &err
);
967 if (targetcapacity
!=0) {
968 log_err("\nFAILURE: ucnv_fromUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
971 log_verbose("\n---Testing ucnv_fromUChars() with converter=NULL\n");
972 targetcapacity
= ucnv_fromUChars(NULL
, output_cp_buffer
, testLong1
, uchar1
, -1, &err
);
973 if (targetcapacity
!=0 || err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
974 log_err("\nFAILURE: ucnv_fromUChars with converter=NULL is expected to fail\n");
977 log_verbose("\n---Testing ucnv_fromUChars() with sourceLength = 0\n");
978 targetcapacity
= ucnv_fromUChars(myConverter
, output_cp_buffer
, testLong1
, uchar1
, 0, &err
);
979 if (targetcapacity
!=0) {
980 log_err("\nFAILURE: ucnv_fromUChars with sourceLength 0 is expected to return 0\n");
982 log_verbose("\n---Testing ucnv_fromUChars() with targetLength = 0\n");
983 targetcapacity
= ucnv_fromUChars(myConverter
, output_cp_buffer
, 0, uchar1
, -1, &err
);
984 if (err
!= U_BUFFER_OVERFLOW_ERROR
) {
985 log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n");
987 /*toUChars with error conditions*/
988 targetsize
= ucnv_toUChars(myConverter
, uchar2
, targetsize
, output_cp_buffer
, strlen(output_cp_buffer
), &err
);
990 log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
993 targetsize
= ucnv_toUChars(myConverter
, uchar2
, -1, output_cp_buffer
, strlen(output_cp_buffer
), &err
);
994 if(targetsize
!= 0 || err
!= U_ILLEGAL_ARGUMENT_ERROR
){
995 log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n");
998 targetsize
= ucnv_toUChars(myConverter
, uchar2
, 0, output_cp_buffer
, 0, &err
);
999 if (targetsize
!=0) {
1000 log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n");
1003 targetsize
= ucnv_toUChars(myConverter
, NULL
, targetcapacity2
, output_cp_buffer
, strlen(output_cp_buffer
), &err
);
1004 if (err
!= U_STRING_NOT_TERMINATED_WARNING
) {
1005 log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n",
1012 /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */
1013 /*Clean up re-usable vars*/
1015 log_verbose("Testing ucnv_fromUnicode().....\n");
1016 tmp_ucs_buf
=ucs_file_buffer_use
;
1017 ucnv_fromUnicode(myConverter
, &mytarget_1
,
1018 mytarget
+ MAX_FILE_LEN
,
1020 ucs_file_buffer_use
+i
,
1024 consumedUni
= (UChar
*)tmp_consumedUni
;
1028 log_err("FAILURE! %s\n", myErrorName(err
));
1031 log_verbose("ucnv_fromUnicode() o.k.\n");
1033 /*Uni1 ----ToUnicode----> Cp2 ----FromUnicode---->Uni3 */
1034 log_verbose("Testing ucnv_toUnicode().....\n");
1035 tmp_mytarget_use
=mytarget_use
;
1036 tmp_consumed
= consumed
;
1037 ucnv_toUnicode(myConverter
, &my_ucs_file_buffer_1
,
1038 my_ucs_file_buffer
+ MAX_FILE_LEN
,
1040 mytarget_use
+ (mytarget_1
- mytarget
),
1044 consumed
= (char*)tmp_consumed
;
1047 log_err("FAILURE! %s\n", myErrorName(err
));
1050 log_verbose("ucnv_toUnicode() o.k.\n");
1053 log_verbose("\n---Testing RoundTrip ...\n");
1056 u_strncpy(uchar3
, my_ucs_file_buffer
,i
);
1059 if(u_strcmp(uchar1
,uchar3
)==0)
1060 log_verbose("Equality test o.k.\n");
1062 log_err("Equality test failed\n");
1067 log_err("uchar2 was NULL (ccapitst.c line %d), couldn't do sanity check\n", __LINE__
);
1071 if(u_strcmp(uchar2
, uchar3
)==0)
1072 log_verbose("Equality test o.k.\n");
1074 log_err("Equality test failed\n");
1077 fclose(ucs_file_in
);
1078 ucnv_close(myConverter
);
1079 if (uchar1
!= 0) free(uchar1
);
1080 if (uchar2
!= 0) free(uchar2
);
1081 if (uchar3
!= 0) free(uchar3
);
1084 free((void*)mytarget
);
1085 free((void*)output_cp_buffer
);
1086 free((void*)ucs_file_buffer
);
1087 free((void*)my_ucs_file_buffer
);
1091 static UConverterFromUCallback
otherUnicodeAction(UConverterFromUCallback MIA
)
1093 return (MIA
==(UConverterFromUCallback
)UCNV_FROM_U_CALLBACK_STOP
)?(UConverterFromUCallback
)UCNV_FROM_U_CALLBACK_SUBSTITUTE
:(UConverterFromUCallback
)UCNV_FROM_U_CALLBACK_STOP
;
1097 static UConverterToUCallback
otherCharAction(UConverterToUCallback MIA
)
1099 return (MIA
==(UConverterToUCallback
)UCNV_TO_U_CALLBACK_STOP
)?(UConverterToUCallback
)UCNV_TO_U_CALLBACK_SUBSTITUTE
:(UConverterToUCallback
)UCNV_TO_U_CALLBACK_STOP
;
1102 static void TestFlushCache(void) {
1103 #if !UCONFIG_NO_LEGACY_CONVERSION
1104 UErrorCode err
= U_ZERO_ERROR
;
1105 UConverter
* someConverters
[5];
1108 /* flush the converter cache to get a consistent state before the flushing is tested */
1111 /*Testing ucnv_open()*/
1112 /* Note: These converters have been chosen because they do NOT
1113 encode the Latin characters (U+0041, ...), and therefore are
1114 highly unlikely to be chosen as system default codepages */
1116 someConverters
[0] = ucnv_open("ibm-1047", &err
);
1117 if (U_FAILURE(err
)) {
1118 log_data_err("FAILURE! %s\n", myErrorName(err
));
1121 someConverters
[1] = ucnv_open("ibm-1047", &err
);
1122 if (U_FAILURE(err
)) {
1123 log_data_err("FAILURE! %s\n", myErrorName(err
));
1126 someConverters
[2] = ucnv_open("ibm-1047", &err
);
1127 if (U_FAILURE(err
)) {
1128 log_data_err("FAILURE! %s\n", myErrorName(err
));
1131 someConverters
[3] = ucnv_open("gb18030", &err
);
1132 if (U_FAILURE(err
)) {
1133 log_data_err("FAILURE! %s\n", myErrorName(err
));
1136 someConverters
[4] = ucnv_open("ibm-954", &err
);
1137 if (U_FAILURE(err
)) {
1138 log_data_err("FAILURE! %s\n", myErrorName(err
));
1142 /* Testing ucnv_flushCache() */
1143 log_verbose("\n---Testing ucnv_flushCache...\n");
1144 if ((flushCount
=ucnv_flushCache())==0)
1145 log_verbose("Flush cache ok\n");
1147 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__
, flushCount
);
1149 /*testing ucnv_close() and ucnv_flushCache() */
1150 ucnv_close(someConverters
[0]);
1151 ucnv_close(someConverters
[1]);
1153 if ((flushCount
=ucnv_flushCache())==0)
1154 log_verbose("Flush cache ok\n");
1156 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__
, flushCount
);
1158 ucnv_close(someConverters
[2]);
1159 ucnv_close(someConverters
[3]);
1161 if ((flushCount
=ucnv_flushCache())==2)
1162 log_verbose("Flush cache ok\n"); /*because first, second and third are same */
1164 log_data_err("Flush Cache failed line %d, got %d expected 2 or there is an error in ucnv_close()\n",
1168 ucnv_close(someConverters
[4]);
1169 if ( (flushCount
=ucnv_flushCache())==1)
1170 log_verbose("Flush cache ok\n");
1172 log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__
, flushCount
);
1177 * Test the converter alias API, specifically the fuzzy matching of
1178 * alias names and the alias table integrity. Make sure each
1179 * converter has at least one alias (itself), and that its listed
1180 * aliases map back to itself. Check some hard-coded UTF-8 and
1181 * ISO_2022 aliases to make sure they work.
1183 static void TestAlias() {
1185 UErrorCode status
= U_ZERO_ERROR
;
1187 /* Predetermined aliases that we expect to map back to ISO_2022
1188 * and UTF-8. UPDATE THIS DATA AS NECESSARY. */
1189 const char* ISO_2022_NAMES
[] =
1190 {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2",
1191 "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"};
1192 int32_t ISO_2022_NAMES_LENGTH
=
1193 sizeof(ISO_2022_NAMES
) / sizeof(ISO_2022_NAMES
[0]);
1194 const char *UTF8_NAMES
[] =
1195 { "UTF-8", "utf-8", "utf8", "ibm-1208",
1196 "utf_8", "ibm1208", "cp1208" };
1197 int32_t UTF8_NAMES_LENGTH
=
1198 sizeof(UTF8_NAMES
) / sizeof(UTF8_NAMES
[0]);
1203 } CONVERTERS_NAMES
[] = {
1204 { "UTF-32BE", "UTF32_BigEndian" },
1205 { "UTF-32LE", "UTF32_LittleEndian" },
1206 { "UTF-32", "ISO-10646-UCS-4" },
1207 { "UTF32_PlatformEndian", "UTF32_PlatformEndian" },
1208 { "UTF-32", "ucs-4" }
1210 int32_t CONVERTERS_NAMES_LENGTH
= sizeof(CONVERTERS_NAMES
) / sizeof(*CONVERTERS_NAMES
);
1212 /* When there are bugs in gencnval or in ucnv_io, converters can
1213 appear to have no aliases. */
1214 ncnv
= ucnv_countAvailable();
1215 log_verbose("%d converters\n", ncnv
);
1216 for (i
=0; i
<ncnv
; ++i
) {
1217 const char *name
= ucnv_getAvailableName(i
);
1219 uint16_t na
= ucnv_countAliases(name
, &status
);
1224 log_err("FAIL: Converter \"%s\" (i=%d)"
1225 " has no aliases; expect at least one\n",
1229 cnv
= ucnv_open(name
, &status
);
1230 if (U_FAILURE(status
)) {
1231 log_data_err("FAIL: Converter \"%s\" (i=%d)"
1232 " can't be opened.\n",
1236 if (strcmp(ucnv_getName(cnv
, &status
), name
) != 0
1237 && (strstr(name
, "PlatformEndian") == 0 && strstr(name
, "OppositeEndian") == 0)) {
1238 log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. "
1239 "The should be the same\n",
1240 name
, ucnv_getName(cnv
, &status
));
1245 status
= U_ZERO_ERROR
;
1246 alias0
= ucnv_getAlias(name
, 0, &status
);
1247 for (j
=1; j
<na
; ++j
) {
1249 /* Make sure each alias maps back to the the same list of
1250 aliases. Assume that if alias 0 is the same, the whole
1251 list is the same (this should always be true). */
1252 const char *mapBack
;
1254 status
= U_ZERO_ERROR
;
1255 alias
= ucnv_getAlias(name
, j
, &status
);
1256 if (status
== U_AMBIGUOUS_ALIAS_WARNING
) {
1257 log_err("FAIL: Converter \"%s\"is ambiguous\n", name
);
1260 if (alias
== NULL
) {
1261 log_err("FAIL: Converter \"%s\" -> "
1267 mapBack
= ucnv_getAlias(alias
, 0, &status
);
1269 if (mapBack
== NULL
) {
1270 log_err("FAIL: Converter \"%s\" -> "
1271 "alias[%d]=\"%s\" -> "
1272 "alias[0]=NULL, exp. \"%s\"\n",
1273 name
, j
, alias
, alias0
);
1277 if (0 != strcmp(alias0
, mapBack
)) {
1279 UBool foundAlias
= FALSE
;
1280 if (status
== U_AMBIGUOUS_ALIAS_WARNING
) {
1281 /* Make sure that we only get this mismapping when there is
1282 an ambiguous alias, and the other converter has this alias too. */
1283 for (idx
= 0; idx
< ucnv_countAliases(mapBack
, &status
); idx
++) {
1284 if (strcmp(ucnv_getAlias(mapBack
, (uint16_t)idx
, &status
), alias
) == 0) {
1290 /* else not ambiguous, and this is a real problem. foundAlias = FALSE */
1293 log_err("FAIL: Converter \"%s\" -> "
1294 "alias[%d]=\"%s\" -> "
1295 "alias[0]=\"%s\", exp. \"%s\"\n",
1296 name
, j
, alias
, mapBack
, alias0
);
1303 /* Check a list of predetermined aliases that we expect to map
1304 * back to ISO_2022 and UTF-8. */
1305 for (i
=1; i
<ISO_2022_NAMES_LENGTH
; ++i
) {
1306 const char* mapBack
= ucnv_getAlias(ISO_2022_NAMES
[i
], 0, &status
);
1308 log_data_err("Couldn't get alias for %s. You probably have no data\n", ISO_2022_NAMES
[i
]);
1311 if (0 != strcmp(mapBack
, ISO_2022_NAMES
[0])) {
1312 log_err("FAIL: \"%s\" -> \"%s\", expect \"ISO_2022,locale=ja,version=2\"\n",
1313 ISO_2022_NAMES
[i
], mapBack
);
1318 for (i
=1; i
<UTF8_NAMES_LENGTH
; ++i
) {
1319 const char* mapBack
= ucnv_getAlias(UTF8_NAMES
[i
], 0, &status
);
1321 log_data_err("Couldn't get alias for %s. You probably have no data\n", UTF8_NAMES
[i
]);
1324 if (mapBack
&& 0 != strcmp(mapBack
, UTF8_NAMES
[0])) {
1325 log_err("FAIL: \"%s\" -> \"%s\", expect UTF-8\n",
1326 UTF8_NAMES
[i
], mapBack
);
1331 * Check a list of predetermined aliases that we expect to map
1332 * back to predermined converter names.
1335 for (i
= 0; i
< CONVERTERS_NAMES_LENGTH
; ++i
) {
1336 const char* mapBack
= ucnv_getAlias(CONVERTERS_NAMES
[i
].alias
, 0, &status
);
1338 log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES
[i
].name
);
1341 if (0 != strcmp(mapBack
, CONVERTERS_NAMES
[i
].name
)) {
1342 log_err("FAIL: \"%s\" -> \"%s\", expect %s\n",
1343 CONVERTERS_NAMES
[i
].alias
, mapBack
, CONVERTERS_NAMES
[i
].name
);
1349 static void TestDuplicateAlias(void) {
1351 UErrorCode status
= U_ZERO_ERROR
;
1353 status
= U_ZERO_ERROR
;
1354 alias
= ucnv_getStandardName("Shift_JIS", "IBM", &status
);
1355 if (alias
== NULL
|| strcmp(alias
, "ibm-943") != 0 || status
!= U_AMBIGUOUS_ALIAS_WARNING
) {
1356 log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias
);
1358 status
= U_ZERO_ERROR
;
1359 alias
= ucnv_getStandardName("ibm-943", "IANA", &status
);
1360 if (alias
== NULL
|| strcmp(alias
, "Shift_JIS") != 0 || status
!= U_AMBIGUOUS_ALIAS_WARNING
) {
1361 log_data_err("FAIL: Didn't get Shift_JIS for ibm-943 {IANA}. Got %s\n", alias
);
1363 status
= U_ZERO_ERROR
;
1364 alias
= ucnv_getStandardName("ibm-943_P130-2000", "IANA", &status
);
1365 if (alias
!= NULL
|| status
== U_AMBIGUOUS_ALIAS_WARNING
) {
1366 log_data_err("FAIL: Didn't get NULL for ibm-943 {IANA}. Got %s\n", alias
);
1371 /* Test safe clone callback */
1373 static uint32_t TSCC_nextSerial()
1375 static uint32_t n
= 1;
1382 uint32_t magic
; /* 0xC0FFEE to identify that the object is OK */
1383 uint32_t serial
; /* minted from nextSerial, above */
1384 UBool wasClosed
; /* close happened on the object */
1387 static TSCCContext
*TSCC_clone(TSCCContext
*ctx
)
1389 TSCCContext
*newCtx
= (TSCCContext
*)malloc(sizeof(TSCCContext
));
1391 newCtx
->serial
= TSCC_nextSerial();
1392 newCtx
->wasClosed
= 0;
1393 newCtx
->magic
= 0xC0FFEE;
1395 log_verbose("TSCC_clone: %p:%d -> new context %p:%d\n", ctx
, ctx
->serial
, newCtx
, newCtx
->serial
);
1400 static void TSCC_fromU(const void *context
,
1401 UConverterFromUnicodeArgs
*fromUArgs
,
1402 const UChar
* codeUnits
,
1405 UConverterCallbackReason reason
,
1408 TSCCContext
*ctx
= (TSCCContext
*)context
;
1409 UConverterFromUCallback junkFrom
;
1411 log_verbose("TSCC_fromU: Context %p:%d called, reason %d on cnv %p\n", ctx
, ctx
->serial
, reason
, fromUArgs
->converter
);
1413 if(ctx
->magic
!= 0xC0FFEE) {
1414 log_err("TSCC_fromU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx
,ctx
->serial
, ctx
->magic
);
1418 if(reason
== UCNV_CLONE
) {
1419 UErrorCode subErr
= U_ZERO_ERROR
;
1420 TSCCContext
*newCtx
;
1421 TSCCContext
*junkCtx
;
1422 TSCCContext
**pjunkCtx
= &junkCtx
;
1425 log_verbose("TSCC_fromU: cloning..\n");
1426 newCtx
= TSCC_clone(ctx
);
1428 if(newCtx
== NULL
) {
1429 log_err("TSCC_fromU: internal clone failed on %p\n", ctx
);
1433 ucnv_getFromUCallBack(fromUArgs
->converter
, &junkFrom
, (const void**)pjunkCtx
);
1434 ucnv_setFromUCallBack(fromUArgs
->converter
, junkFrom
, newCtx
, NULL
, NULL
, &subErr
);
1436 if(U_FAILURE(subErr
)) {
1441 if(reason
== UCNV_CLOSE
) {
1442 log_verbose("TSCC_fromU: Context %p:%d closing\n", ctx
, ctx
->serial
);
1443 ctx
->wasClosed
= TRUE
;
1448 static void TSCC_toU(const void *context
,
1449 UConverterToUnicodeArgs
*toUArgs
,
1450 const char* codeUnits
,
1452 UConverterCallbackReason reason
,
1455 TSCCContext
*ctx
= (TSCCContext
*)context
;
1456 UConverterToUCallback junkFrom
;
1458 log_verbose("TSCC_toU: Context %p:%d called, reason %d on cnv %p\n", ctx
, ctx
->serial
, reason
, toUArgs
->converter
);
1460 if(ctx
->magic
!= 0xC0FFEE) {
1461 log_err("TSCC_toU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx
,ctx
->serial
, ctx
->magic
);
1465 if(reason
== UCNV_CLONE
) {
1466 UErrorCode subErr
= U_ZERO_ERROR
;
1467 TSCCContext
*newCtx
;
1468 TSCCContext
*junkCtx
;
1469 TSCCContext
**pjunkCtx
= &junkCtx
;
1472 log_verbose("TSCC_toU: cloning..\n");
1473 newCtx
= TSCC_clone(ctx
);
1475 if(newCtx
== NULL
) {
1476 log_err("TSCC_toU: internal clone failed on %p\n", ctx
);
1480 ucnv_getToUCallBack(toUArgs
->converter
, &junkFrom
, (const void**)pjunkCtx
);
1481 ucnv_setToUCallBack(toUArgs
->converter
, junkFrom
, newCtx
, NULL
, NULL
, &subErr
);
1483 if(U_FAILURE(subErr
)) {
1488 if(reason
== UCNV_CLOSE
) {
1489 log_verbose("TSCC_toU: Context %p:%d closing\n", ctx
, ctx
->serial
);
1490 ctx
->wasClosed
= TRUE
;
1494 static void TSCC_init(TSCCContext
*q
)
1496 q
->magic
= 0xC0FFEE;
1497 q
->serial
= TSCC_nextSerial();
1501 static void TSCC_print_log(TSCCContext
*q
, const char *name
)
1504 log_verbose("TSCContext: %s is NULL!!\n", name
);
1506 if(q
->magic
!= 0xC0FFEE) {
1507 log_err("TSCCContext: %p:%d's magic is %x, supposed to be 0xC0FFEE\n",
1508 q
,q
->serial
, q
->magic
);
1510 log_verbose("TSCCContext %p:%d=%s - magic %x, %s\n",
1511 q
, q
->serial
, name
, q
->magic
, q
->wasClosed
?"CLOSED":"open");
1515 #if !UCONFIG_NO_LEGACY_CONVERSION
1516 static void TestConvertSafeCloneCallback()
1518 UErrorCode err
= U_ZERO_ERROR
;
1519 TSCCContext from1
, to1
;
1520 TSCCContext
*from2
, *from3
, *to2
, *to3
;
1521 TSCCContext
**pfrom2
= &from2
, **pfrom3
= &from3
, **pto2
= &to2
, **pto3
= &to3
;
1523 int32_t hunkSize
= 8192;
1524 UConverterFromUCallback junkFrom
;
1525 UConverterToUCallback junkTo
;
1526 UConverter
*conv1
, *conv2
= NULL
;
1528 conv1
= ucnv_open("iso-8859-3", &err
);
1530 if(U_FAILURE(err
)) {
1531 log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err
));
1535 log_verbose("Opened conv1=%p\n", conv1
);
1540 TSCC_print_log(&from1
, "from1");
1541 TSCC_print_log(&to1
, "to1");
1543 ucnv_setFromUCallBack(conv1
, TSCC_fromU
, &from1
, NULL
, NULL
, &err
);
1544 log_verbose("Set from1 on conv1\n");
1545 TSCC_print_log(&from1
, "from1");
1547 ucnv_setToUCallBack(conv1
, TSCC_toU
, &to1
, NULL
, NULL
, &err
);
1548 log_verbose("Set to1 on conv1\n");
1549 TSCC_print_log(&to1
, "to1");
1551 conv2
= ucnv_safeClone(conv1
, hunk
, &hunkSize
, &err
);
1552 if(U_FAILURE(err
)) {
1553 log_err("safeClone failed: %s\n", u_errorName(err
));
1556 log_verbose("Cloned to conv2=%p.\n", conv2
);
1558 /********** from *********************/
1559 ucnv_getFromUCallBack(conv2
, &junkFrom
, (const void**)pfrom2
);
1560 ucnv_getFromUCallBack(conv1
, &junkFrom
, (const void**)pfrom3
);
1562 TSCC_print_log(from2
, "from2");
1563 TSCC_print_log(from3
, "from3(==from1)");
1566 log_err("FAIL! from2 is null \n");
1571 log_err("FAIL! from3 is null \n");
1575 if(from3
!= (&from1
) ) {
1576 log_err("FAIL! conv1's FROM context changed!\n");
1579 if(from2
== (&from1
) ) {
1580 log_err("FAIL! conv1's FROM context is the same as conv2's!\n");
1583 if(from1
.wasClosed
) {
1584 log_err("FAIL! from1 is closed \n");
1587 if(from2
->wasClosed
) {
1588 log_err("FAIL! from2 was closed\n");
1591 /********** to *********************/
1592 ucnv_getToUCallBack(conv2
, &junkTo
, (const void**)pto2
);
1593 ucnv_getToUCallBack(conv1
, &junkTo
, (const void**)pto3
);
1595 TSCC_print_log(to2
, "to2");
1596 TSCC_print_log(to3
, "to3(==to1)");
1599 log_err("FAIL! to2 is null \n");
1604 log_err("FAIL! to3 is null \n");
1608 if(to3
!= (&to1
) ) {
1609 log_err("FAIL! conv1's TO context changed!\n");
1612 if(to2
== (&to1
) ) {
1613 log_err("FAIL! conv1's TO context is the same as conv2's!\n");
1617 log_err("FAIL! to1 is closed \n");
1620 if(to2
->wasClosed
) {
1621 log_err("FAIL! to2 was closed\n");
1624 /*************************************/
1627 log_verbose("ucnv_closed (conv1)\n");
1628 TSCC_print_log(&from1
, "from1");
1629 TSCC_print_log(from2
, "from2");
1630 TSCC_print_log(&to1
, "to1");
1631 TSCC_print_log(to2
, "to2");
1633 if(from1
.wasClosed
== FALSE
) {
1634 log_err("FAIL! from1 is NOT closed \n");
1637 if(from2
->wasClosed
) {
1638 log_err("FAIL! from2 was closed\n");
1641 if(to1
.wasClosed
== FALSE
) {
1642 log_err("FAIL! to1 is NOT closed \n");
1645 if(to2
->wasClosed
) {
1646 log_err("FAIL! to2 was closed\n");
1650 log_verbose("ucnv_closed (conv2)\n");
1652 TSCC_print_log(&from1
, "from1");
1653 TSCC_print_log(from2
, "from2");
1655 if(from1
.wasClosed
== FALSE
) {
1656 log_err("FAIL! from1 is NOT closed \n");
1659 if(from2
->wasClosed
== FALSE
) {
1660 log_err("FAIL! from2 was NOT closed\n");
1663 TSCC_print_log(&to1
, "to1");
1664 TSCC_print_log(to2
, "to2");
1666 if(to1
.wasClosed
== FALSE
) {
1667 log_err("FAIL! to1 is NOT closed \n");
1670 if(to2
->wasClosed
== FALSE
) {
1671 log_err("FAIL! to2 was NOT closed\n");
1675 free(to2
); /* to1 is stack based */
1677 if(from2
!= (&from1
)) {
1678 free(from2
); /* from1 is stack based */
1684 containsAnyOtherByte(uint8_t *p
, int32_t length
, uint8_t b
) {
1695 static void TestConvertSafeClone()
1697 /* one 'regular' & all the 'private stateful' converters */
1698 static const char *const names
[] = {
1699 #if !UCONFIG_NO_LEGACY_CONVERSION
1701 "ISO_2022,locale=zh,version=1",
1704 #if !UCONFIG_NO_LEGACY_CONVERSION
1708 "ISO_2022,locale=kr,version=1",
1709 "ISO_2022,locale=jp,version=2",
1713 #if !UCONFIG_NO_LEGACY_CONVERSION
1714 "IMAP-mailbox-name",
1721 /* store the actual sizes of each converter */
1722 int32_t actualSizes
[LENGTHOF(names
)];
1724 static const int32_t bufferSizes
[] = {
1725 U_CNV_SAFECLONE_BUFFERSIZE
,
1726 (int32_t)(3*sizeof(UConverter
))/2, /* 1.5*sizeof(UConverter) */
1727 (int32_t)sizeof(UConverter
)/2 /* 0.5*sizeof(UConverter) */
1730 char charBuffer
[21]; /* Leave at an odd number for alignment testing */
1731 uint8_t buffer
[3] [U_CNV_SAFECLONE_BUFFERSIZE
];
1732 int32_t bufferSize
, maxBufferSize
;
1733 const char *maxName
;
1734 UConverter
* cnv
, *cnv2
;
1738 const char *pConstCharBuffer
;
1739 const char *charBufferLimit
= charBuffer
+ sizeof(charBuffer
)/sizeof(*charBuffer
);
1740 UChar uniBuffer
[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */
1741 UChar uniCharBuffer
[20];
1742 char charSourceBuffer
[] = { 0x1b, 0x24, 0x42 };
1743 const char *pCharSource
= charSourceBuffer
;
1744 const char *pCharSourceLimit
= charSourceBuffer
+ sizeof(charSourceBuffer
);
1745 UChar
*pUCharTarget
= uniCharBuffer
;
1746 UChar
*pUCharTargetLimit
= uniCharBuffer
+ sizeof(uniCharBuffer
)/sizeof(*uniCharBuffer
);
1747 const UChar
* pUniBuffer
;
1748 const UChar
*uniBufferLimit
= uniBuffer
+ sizeof(uniBuffer
)/sizeof(*uniBuffer
);
1752 cnv
= ucnv_open(names
[0], &err
);
1753 if(U_SUCCESS(err
)) {
1754 /* Check the various error & informational states: */
1756 /* Null status - just returns NULL */
1757 bufferSize
= U_CNV_SAFECLONE_BUFFERSIZE
;
1758 if (0 != ucnv_safeClone(cnv
, buffer
[0], &bufferSize
, 0))
1760 log_err("FAIL: Cloned converter failed to deal correctly with null status\n");
1762 /* error status - should return 0 & keep error the same */
1763 err
= U_MEMORY_ALLOCATION_ERROR
;
1764 if (0 != ucnv_safeClone(cnv
, buffer
[0], &bufferSize
, &err
) || err
!= U_MEMORY_ALLOCATION_ERROR
)
1766 log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n");
1770 /* Null buffer size pointer - just returns NULL & set error to U_ILLEGAL_ARGUMENT_ERROR*/
1771 if (0 != ucnv_safeClone(cnv
, buffer
[0], 0, &err
) || err
!= U_ILLEGAL_ARGUMENT_ERROR
)
1773 log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n");
1777 /* buffer size pointer is 0 - fill in pbufferSize with a size */
1779 if (0 != ucnv_safeClone(cnv
, buffer
[0], &bufferSize
, &err
) || U_FAILURE(err
) || bufferSize
<= 0)
1781 log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n");
1783 /* Verify our define is large enough */
1784 if (U_CNV_SAFECLONE_BUFFERSIZE
< bufferSize
)
1786 log_err("FAIL: Pre-calculated buffer size is too small\n");
1788 /* Verify we can use this run-time calculated size */
1789 if (0 == (cnv2
= ucnv_safeClone(cnv
, buffer
[0], &bufferSize
, &err
)) || U_FAILURE(err
))
1791 log_err("FAIL: Converter can't be cloned with run-time size\n");
1797 /* size one byte too small - should allocate & let us know */
1799 if (0 == (cnv2
= ucnv_safeClone(cnv
, 0, &bufferSize
, &err
)) || err
!= U_SAFECLONE_ALLOCATED_WARNING
)
1801 log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n");
1808 bufferSize
= U_CNV_SAFECLONE_BUFFERSIZE
;
1810 /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */
1811 if (0 == (cnv2
= ucnv_safeClone(cnv
, 0, &bufferSize
, &err
)) || err
!= U_SAFECLONE_ALLOCATED_WARNING
)
1813 log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n");
1821 /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */
1822 if (0 != ucnv_safeClone(0, buffer
[0], &bufferSize
, &err
) || err
!= U_ILLEGAL_ARGUMENT_ERROR
)
1824 log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n");
1833 /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/
1835 for(j
= 0; j
< LENGTHOF(bufferSizes
); ++j
) {
1836 for (index
= 0; index
< LENGTHOF(names
); index
++)
1839 cnv
= ucnv_open(names
[index
], &err
);
1840 if(U_FAILURE(err
)) {
1841 log_data_err("ucnv_open(\"%s\") failed - %s\n", names
[index
], u_errorName(err
));
1846 /* preflight to get maxBufferSize */
1847 actualSizes
[index
] = 0;
1848 ucnv_safeClone(cnv
, NULL
, &actualSizes
[index
], &err
);
1849 if(actualSizes
[index
] > maxBufferSize
) {
1850 maxBufferSize
= actualSizes
[index
];
1851 maxName
= names
[index
];
1855 memset(buffer
, 0xaa, sizeof(buffer
));
1857 bufferSize
= bufferSizes
[j
];
1858 cnv2
= ucnv_safeClone(cnv
, buffer
[1], &bufferSize
, &err
);
1860 /* close the original immediately to make sure that the clone works by itself */
1863 if( actualSizes
[index
] <= (bufferSizes
[j
] - (int32_t)sizeof(UAlignedMemory
)) &&
1864 err
== U_SAFECLONE_ALLOCATED_WARNING
1866 log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names
[index
]);
1869 /* check if the clone function overwrote any bytes that it is not supposed to touch */
1870 if(bufferSize
<= bufferSizes
[j
]) {
1871 /* used the stack buffer */
1872 if( containsAnyOtherByte(buffer
[0], (int32_t)sizeof(buffer
[0]), 0xaa) ||
1873 containsAnyOtherByte(buffer
[1]+bufferSize
, (int32_t)(sizeof(buffer
)-(sizeof(buffer
[0])+bufferSize
)), 0xaa)
1875 log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n",
1876 names
[index
], bufferSize
, bufferSizes
[j
]);
1879 /* heap-allocated the clone */
1880 if(containsAnyOtherByte(buffer
[0], (int32_t)sizeof(buffer
), 0xaa)) {
1881 log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n",
1882 names
[index
], bufferSize
, bufferSizes
[j
]);
1886 pCharBuffer
= charBuffer
;
1887 pUniBuffer
= uniBuffer
;
1889 ucnv_fromUnicode(cnv2
,
1898 log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err
));
1900 ucnv_toUnicode(cnv2
,
1911 log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err
));
1914 pConstCharBuffer
= charBuffer
;
1915 if (uniBuffer
[0] != ucnv_getNextUChar(cnv2
, &pConstCharBuffer
, pCharBuffer
, &err
))
1917 log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err
));
1923 log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1924 sizeof(UConverter
), maxBufferSize
, maxName
, (int)U_CNV_SAFECLONE_BUFFERSIZE
);
1925 if(maxBufferSize
> U_CNV_SAFECLONE_BUFFERSIZE
) {
1926 log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1927 maxBufferSize
, maxName
, (int)U_CNV_SAFECLONE_BUFFERSIZE
);
1931 static void TestCCSID() {
1932 #if !UCONFIG_NO_LEGACY_CONVERSION
1934 UErrorCode errorCode
;
1935 int32_t ccsids
[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 };
1938 for(i
=0; i
<(int32_t)(sizeof(ccsids
)/sizeof(int32_t)); ++i
) {
1941 errorCode
=U_ZERO_ERROR
;
1942 cnv
=ucnv_openCCSID(ccsid
, UCNV_IBM
, &errorCode
);
1943 if(U_FAILURE(errorCode
)) {
1944 log_data_err("error: ucnv_openCCSID(%ld) failed (%s)\n", ccsid
, u_errorName(errorCode
));
1948 if(ccsid
!=ucnv_getCCSID(cnv
, &errorCode
)) {
1949 log_err("error: ucnv_getCCSID(ucnv_openCCSID(%ld))=%ld\n", ccsid
, ucnv_getCCSID(cnv
, &errorCode
));
1952 /* skip gb18030(ccsid 1392) */
1953 if(ccsid
!= 1392 && UCNV_IBM
!=ucnv_getPlatform(cnv
, &errorCode
)) {
1954 log_err("error: ucnv_getPlatform(ucnv_openCCSID(%ld))=%ld!=UCNV_IBM\n", ccsid
, ucnv_getPlatform(cnv
, &errorCode
));
1962 /* jitterbug 932: ucnv_convert() bugs --------------------------------------- */
1964 /* CHUNK_SIZE defined in common\ucnv.c: */
1965 #define CHUNK_SIZE 1024
1967 static void bug1(void);
1968 static void bug2(void);
1969 static void bug3(void);
1974 bug1(); /* Unicode intermediate buffer straddle bug */
1975 bug2(); /* pre-flighting size incorrect caused by simple overflow */
1976 bug3(); /* pre-flighting size incorrect caused by expansion overflow */
1980 * jitterbug 932: test chunking boundary conditions in
1982 int32_t ucnv_convert(const char *toConverterName,
1983 const char *fromConverterName,
1990 * See discussions on the icu mailing list in
1991 * 2001-April with the subject "converter 'flush' question".
1993 * Bug report and test code provided by Edward J. Batutis.
1997 #if !UCONFIG_NO_LEGACY_CONVERSION
1998 char char_in
[CHUNK_SIZE
+32];
1999 char char_out
[CHUNK_SIZE
*2];
2001 /* GB 18030 equivalent of U+10000 is 90308130 */
2002 static const char test_seq
[]={ (char)0x90u
, 0x30, (char)0x81u
, 0x30 };
2004 UErrorCode err
= U_ZERO_ERROR
;
2005 int32_t i
, test_seq_len
= sizeof(test_seq
);
2008 * causes straddle bug in Unicode intermediate buffer by sliding the test sequence forward
2009 * until the straddle bug appears. I didn't want to hard-code everything so this test could
2010 * be expanded - however this is the only type of straddle bug I can think of at the moment -
2011 * a high surrogate in the last position of the Unicode intermediate buffer. Apparently no
2012 * other Unicode sequences cause a bug since combining sequences are not supported by the
2016 for (i
= test_seq_len
; i
>= 0; i
--) {
2017 /* put character sequence into input buffer */
2018 memset(char_in
, 0x61, sizeof(char_in
)); /* GB 18030 'a' */
2019 memcpy(char_in
+ (CHUNK_SIZE
- i
), test_seq
, test_seq_len
);
2021 /* do the conversion */
2022 ucnv_convert("us-ascii", /* out */
2031 if (err
== U_TRUNCATED_CHAR_FOUND
) {
2032 /* this happens when surrogate pair straddles the intermediate buffer in
2033 * T_UConverter_fromCodepageToCodepage */
2034 log_err("error j932 bug 1: expected success, got U_TRUNCATED_CHAR_FOUND\n");
2040 /* bug2: pre-flighting loop bug: simple overflow causes bug */
2043 /* US-ASCII "1234567890" */
2044 static const char source
[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 };
2045 static const char sourceUTF8
[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (char)0xef, (char)0x80, (char)0x80 };
2046 static const char sourceUTF32
[]={ 0x00, 0x00, 0x00, 0x30,
2047 0x00, 0x00, 0x00, 0x31,
2048 0x00, 0x00, 0x00, 0x32,
2049 0x00, 0x00, 0x00, 0x33,
2050 0x00, 0x00, 0x00, 0x34,
2051 0x00, 0x00, 0x00, 0x35,
2052 0x00, 0x00, 0x00, 0x36,
2053 0x00, 0x00, 0x00, 0x37,
2054 0x00, 0x00, 0x00, 0x38,
2055 0x00, 0x00, (char)0xf0, 0x00};
2056 static char target
[5];
2058 UErrorCode err
= U_ZERO_ERROR
;
2061 /* do the conversion */
2062 size
= ucnv_convert("iso-8859-1", /* out */
2063 "us-ascii", /* in */
2071 /* bug2: size is 5, should be 10 */
2072 log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting size %d instead of 10\n", size
);
2076 /* do the conversion */
2077 size
= ucnv_convert("UTF-32BE", /* out */
2086 /* bug2: size is 5, should be 32 */
2087 log_err("error j932 bug 2 UTF-8->UTF-32BE: got preflighting size %d instead of 32\n", size
);
2091 /* do the conversion */
2092 size
= ucnv_convert("UTF-8", /* out */
2093 "UTF-32BE", /* in */
2097 sizeof(sourceUTF32
),
2101 /* bug2: size is 5, should be 12 */
2102 log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size
);
2107 * bug3: when the characters expand going from source to target codepage
2108 * you get bug3 in addition to bug2
2112 #if !UCONFIG_NO_LEGACY_CONVERSION
2113 char char_in
[CHUNK_SIZE
*4];
2115 UErrorCode err
= U_ZERO_ERROR
;
2119 * first get the buggy size from bug2 then
2120 * compare it to buggy size with an expansion
2122 memset(char_in
, 0x61, sizeof(char_in
)); /* US-ASCII 'a' */
2124 /* do the conversion */
2125 size
= ucnv_convert("lmbcs", /* out */
2126 "us-ascii", /* in */
2133 if ( size
!= sizeof(char_in
) ) {
2135 * bug2: size is 0x2805 (CHUNK_SIZE*2+5 - maybe 5 is the size of the overflow buffer
2136 * in the converter?), should be CHUNK_SIZE*4
2138 * Markus 2001-05-18: 5 is the size of our target[] here, ucnv_convert() did not reset targetSize...
2140 log_data_err("error j932 bug 2/3a: expected preflighting size 0x%04x, got 0x%04x\n", sizeof(char_in
), size
);
2144 * now do the conversion with expansion
2145 * ascii 0x08 expands to 0x0F 0x28 in lmbcs
2147 memset(char_in
, 8, sizeof(char_in
));
2150 /* do the conversion */
2151 size
= ucnv_convert("lmbcs", /* out */
2152 "us-ascii", /* in */
2159 /* expect 2X expansion */
2160 if ( size
!= sizeof(char_in
) * 2 ) {
2163 * bug2 would lead us to expect 0x2805, but it isn't that either, it is 0x3c05:
2165 log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in
) * 2, size
);
2171 convertExStreaming(UConverter
*srcCnv
, UConverter
*targetCnv
,
2172 const char *src
, int32_t srcLength
,
2173 const char *expectTarget
, int32_t expectTargetLength
,
2175 const char *testName
,
2176 UErrorCode expectCode
) {
2177 UChar pivotBuffer
[CHUNK_SIZE
];
2178 UChar
*pivotSource
, *pivotTarget
;
2179 const UChar
*pivotLimit
;
2181 char targetBuffer
[CHUNK_SIZE
];
2183 const char *srcLimit
, *finalSrcLimit
, *targetLimit
;
2185 int32_t targetLength
;
2189 UErrorCode errorCode
;
2192 if(chunkSize
>CHUNK_SIZE
) {
2193 chunkSize
=CHUNK_SIZE
;
2196 pivotSource
=pivotTarget
=pivotBuffer
;
2197 pivotLimit
=pivotBuffer
+chunkSize
;
2199 finalSrcLimit
=src
+srcLength
;
2200 target
=targetBuffer
;
2201 targetLimit
=targetBuffer
+chunkSize
;
2203 ucnv_resetToUnicode(srcCnv
);
2204 ucnv_resetFromUnicode(targetCnv
);
2206 errorCode
=U_ZERO_ERROR
;
2209 /* convert, streaming-style (both converters and pivot keep state) */
2211 /* for testing, give ucnv_convertEx() at most <chunkSize> input/pivot/output units at a time */
2212 if(src
+chunkSize
<=finalSrcLimit
) {
2213 srcLimit
=src
+chunkSize
;
2215 srcLimit
=finalSrcLimit
;
2217 ucnv_convertEx(targetCnv
, srcCnv
,
2218 &target
, targetLimit
,
2220 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotLimit
,
2221 FALSE
, flush
, &errorCode
);
2222 targetLength
=(int32_t)(target
-targetBuffer
);
2223 if(target
>targetLimit
) {
2224 log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n",
2225 testName
, chunkSize
, target
, targetLimit
);
2226 break; /* TODO: major problem! */
2228 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2229 /* continue converting another chunk */
2230 errorCode
=U_ZERO_ERROR
;
2231 if(targetLength
+chunkSize
<=sizeof(targetBuffer
)) {
2232 targetLimit
=target
+chunkSize
;
2234 targetLimit
=targetBuffer
+sizeof(targetBuffer
);
2236 } else if(U_FAILURE(errorCode
)) {
2242 } else if(src
==finalSrcLimit
&& pivotSource
==pivotTarget
) {
2243 /* all consumed, now flush without input (separate from conversion for testing) */
2248 if(!(errorCode
==expectCode
|| (expectCode
==U_ZERO_ERROR
&& errorCode
==U_STRING_NOT_TERMINATED_WARNING
))) {
2249 log_err("ucnv_convertEx(%s) chunk[%d] results in %s instead of %s\n",
2250 testName
, chunkSize
, u_errorName(errorCode
), u_errorName(expectCode
));
2251 } else if(targetLength
!=expectTargetLength
) {
2252 log_err("ucnv_convertEx(%s) chunk[%d] writes %d bytes instead of %d\n",
2253 testName
, chunkSize
, targetLength
, expectTargetLength
);
2254 } else if(memcmp(targetBuffer
, expectTarget
, targetLength
)!=0) {
2255 log_err("ucnv_convertEx(%s) chunk[%d] writes different bytes than expected\n",
2256 testName
, chunkSize
);
2261 convertExMultiStreaming(UConverter
*srcCnv
, UConverter
*targetCnv
,
2262 const char *src
, int32_t srcLength
,
2263 const char *expectTarget
, int32_t expectTargetLength
,
2264 const char *testName
,
2265 UErrorCode expectCode
) {
2266 convertExStreaming(srcCnv
, targetCnv
,
2268 expectTarget
, expectTargetLength
,
2269 1, testName
, expectCode
);
2270 convertExStreaming(srcCnv
, targetCnv
,
2272 expectTarget
, expectTargetLength
,
2273 3, testName
, expectCode
);
2274 convertExStreaming(srcCnv
, targetCnv
,
2276 expectTarget
, expectTargetLength
,
2277 7, testName
, expectCode
);
2280 static void TestConvertEx() {
2281 #if !UCONFIG_NO_LEGACY_CONVERSION
2282 static const uint8_t
2284 /* 4e00 30a1 ff61 0410 */
2285 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2288 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2292 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2293 * SUB, SUB, 0x40, SUB, SUB, 0x40
2295 0xfc, 0xfc, 0xfc, 0xfc, 0x40, 0xfc, 0xfc, 0xfc, 0xfc, 0x40
2298 char srcBuffer
[100], targetBuffer
[100];
2303 UChar pivotBuffer
[100];
2304 UChar
*pivotSource
, *pivotTarget
;
2306 UConverter
*cnv1
, *cnv2
;
2307 UErrorCode errorCode
;
2309 errorCode
=U_ZERO_ERROR
;
2310 cnv1
=ucnv_open("UTF-8", &errorCode
);
2311 if(U_FAILURE(errorCode
)) {
2312 log_err("unable to open a UTF-8 converter - %s\n", u_errorName(errorCode
));
2316 cnv2
=ucnv_open("Shift-JIS", &errorCode
);
2317 if(U_FAILURE(errorCode
)) {
2318 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode
));
2323 /* test ucnv_convertEx() with streaming conversion style */
2324 convertExMultiStreaming(cnv1
, cnv2
,
2325 (const char *)utf8
, sizeof(utf8
), (const char *)shiftJIS
, sizeof(shiftJIS
),
2326 "UTF-8 -> Shift-JIS", U_ZERO_ERROR
);
2328 convertExMultiStreaming(cnv2
, cnv1
,
2329 (const char *)shiftJIS
, sizeof(shiftJIS
), (const char *)utf8
, sizeof(utf8
),
2330 "Shift-JIS -> UTF-8", U_ZERO_ERROR
);
2332 /* U_ZERO_ERROR because by default the SUB callbacks are set */
2333 convertExMultiStreaming(cnv1
, cnv2
,
2334 (const char *)shiftJIS
, sizeof(shiftJIS
), (const char *)errorTarget
, sizeof(errorTarget
),
2335 "shiftJIS[] UTF-8 -> Shift-JIS", U_ZERO_ERROR
);
2337 /* test some simple conversions */
2339 /* NUL-terminated source and target */
2340 errorCode
=U_STRING_NOT_TERMINATED_WARNING
;
2341 memcpy(srcBuffer
, utf8
, sizeof(utf8
));
2342 srcBuffer
[sizeof(utf8
)]=0;
2344 target
=targetBuffer
;
2345 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2346 NULL
, NULL
, NULL
, NULL
, TRUE
, TRUE
, &errorCode
);
2347 if( errorCode
!=U_ZERO_ERROR
||
2348 target
-targetBuffer
!=sizeof(shiftJIS
) ||
2350 memcmp(targetBuffer
, shiftJIS
, sizeof(shiftJIS
))!=0
2352 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s - writes %d bytes, expect %d\n",
2353 u_errorName(errorCode
), target
-targetBuffer
, sizeof(shiftJIS
));
2356 /* NUL-terminated source and U_STRING_NOT_TERMINATED_WARNING */
2357 errorCode
=U_AMBIGUOUS_ALIAS_WARNING
;
2358 memset(targetBuffer
, 0xff, sizeof(targetBuffer
));
2360 target
=targetBuffer
;
2361 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(shiftJIS
), &src
, NULL
,
2362 NULL
, NULL
, NULL
, NULL
, TRUE
, TRUE
, &errorCode
);
2363 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
||
2364 target
-targetBuffer
!=sizeof(shiftJIS
) ||
2365 *target
!=(char)0xff ||
2366 memcmp(targetBuffer
, shiftJIS
, sizeof(shiftJIS
))!=0
2368 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s, expect U_STRING_NOT_TERMINATED_WARNING - writes %d bytes, expect %d\n",
2369 u_errorName(errorCode
), target
-targetBuffer
, sizeof(shiftJIS
));
2373 errorCode
=U_MESSAGE_PARSE_ERROR
;
2375 target
=targetBuffer
;
2376 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2377 NULL
, NULL
, NULL
, NULL
, TRUE
, TRUE
, &errorCode
);
2378 if(errorCode
!=U_MESSAGE_PARSE_ERROR
) {
2379 log_err("ucnv_convertEx(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode
));
2382 /* pivotLimit==pivotStart */
2383 errorCode
=U_ZERO_ERROR
;
2384 pivotSource
=pivotTarget
=pivotBuffer
;
2385 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2386 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
, TRUE
, TRUE
, &errorCode
);
2387 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2388 log_err("ucnv_convertEx(pivotLimit==pivotStart) sets %s\n", u_errorName(errorCode
));
2391 /* *pivotSource==NULL */
2392 errorCode
=U_ZERO_ERROR
;
2394 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2395 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
+1, TRUE
, TRUE
, &errorCode
);
2396 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2397 log_err("ucnv_convertEx(*pivotSource==NULL) sets %s\n", u_errorName(errorCode
));
2401 errorCode
=U_ZERO_ERROR
;
2403 pivotSource
=pivotBuffer
;
2404 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2405 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
+1, TRUE
, TRUE
, &errorCode
);
2406 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2407 log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode
));
2410 /* streaming conversion without a pivot buffer */
2411 errorCode
=U_ZERO_ERROR
;
2413 pivotSource
=pivotBuffer
;
2414 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2415 NULL
, &pivotSource
, &pivotTarget
, pivotBuffer
+1, TRUE
, FALSE
, &errorCode
);
2416 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2417 log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode
));
2425 /* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */
2426 static const char *const badUTF8
[]={
2430 /* truncated multi-byte sequences */
2467 "\xfc\x80\x80\x80\x80",
2469 /* complete sequences but non-shortest forms or out of range etc. */
2475 "\xf8\x80\x80\x80\x80",
2476 "\xfc\x80\x80\x80\x80\x80",
2481 /* get some character that can be converted and convert it */
2482 static UBool
getTestChar(UConverter
*cnv
, const char *converterName
,
2483 char charUTF8
[4], int32_t *pCharUTF8Length
,
2484 char char0
[8], int32_t *pChar0Length
,
2485 char char1
[8], int32_t *pChar1Length
) {
2486 UChar utf16
[U16_MAX_LENGTH
];
2487 int32_t utf16Length
;
2489 const UChar
*utf16Source
;
2494 UErrorCode errorCode
;
2496 errorCode
=U_ZERO_ERROR
;
2497 set
=uset_open(1, 0);
2498 ucnv_getUnicodeSet(cnv
, set
, UCNV_ROUNDTRIP_SET
, &errorCode
);
2499 c
=uset_charAt(set
, uset_size(set
)/2);
2503 U16_APPEND_UNSAFE(utf16
, utf16Length
, c
);
2505 U8_APPEND_UNSAFE(charUTF8
, *pCharUTF8Length
, c
);
2509 ucnv_fromUnicode(cnv
,
2510 &target
, char0
+sizeof(char0
),
2511 &utf16Source
, utf16
+utf16Length
,
2512 NULL
, FALSE
, &errorCode
);
2513 *pChar0Length
=(int32_t)(target
-char0
);
2517 ucnv_fromUnicode(cnv
,
2518 &target
, char1
+sizeof(char1
),
2519 &utf16Source
, utf16
+utf16Length
,
2520 NULL
, FALSE
, &errorCode
);
2521 *pChar1Length
=(int32_t)(target
-char1
);
2523 if(U_FAILURE(errorCode
)) {
2524 log_err("unable to get test character for %s - %s\n", converterName
, u_errorName(errorCode
));
2530 static void testFromTruncatedUTF8(UConverter
*utf8Cnv
, UConverter
*cnv
, const char *converterName
,
2531 char charUTF8
[4], int32_t charUTF8Length
,
2532 char char0
[8], int32_t char0Length
,
2533 char char1
[8], int32_t char1Length
) {
2538 int32_t outputLength
;
2540 char invalidChars
[8];
2541 int8_t invalidLength
;
2546 UChar pivotBuffer
[8];
2547 UChar
*pivotSource
, *pivotTarget
;
2549 UErrorCode errorCode
;
2552 /* test truncated sequences */
2553 errorCode
=U_ZERO_ERROR
;
2554 ucnv_setToUCallBack(utf8Cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2556 memcpy(utf8
, charUTF8
, charUTF8Length
);
2558 for(i
=0; i
<LENGTHOF(badUTF8
); ++i
) {
2559 /* truncated sequence? */
2560 int32_t length
=strlen(badUTF8
[i
]);
2561 if(length
>=(1+U8_COUNT_TRAIL_BYTES(badUTF8
[i
][0]))) {
2565 /* assemble a string with the test character and the truncated sequence */
2566 memcpy(utf8
+charUTF8Length
, badUTF8
[i
], length
);
2567 utf8Length
=charUTF8Length
+length
;
2569 /* convert and check the invalidChars */
2572 pivotSource
=pivotTarget
=pivotBuffer
;
2573 errorCode
=U_ZERO_ERROR
;
2574 ucnv_convertEx(cnv
, utf8Cnv
,
2575 &target
, output
+sizeof(output
),
2576 &source
, utf8
+utf8Length
,
2577 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
+LENGTHOF(pivotBuffer
),
2578 TRUE
, TRUE
, /* reset & flush */
2580 outputLength
=(int32_t)(target
-output
);
2581 if(errorCode
!=U_TRUNCATED_CHAR_FOUND
|| pivotSource
!=pivotBuffer
) {
2582 log_err("unexpected error %s from %s badUTF8[%ld]\n", u_errorName(errorCode
), converterName
, (long)i
);
2586 errorCode
=U_ZERO_ERROR
;
2587 invalidLength
=(int8_t)sizeof(invalidChars
);
2588 ucnv_getInvalidChars(utf8Cnv
, invalidChars
, &invalidLength
, &errorCode
);
2589 if(invalidLength
!=length
|| 0!=memcmp(invalidChars
, badUTF8
[i
], length
)) {
2590 log_err("wrong invalidChars from %s badUTF8[%ld]\n", converterName
, (long)i
);
2595 static void testFromBadUTF8(UConverter
*utf8Cnv
, UConverter
*cnv
, const char *converterName
,
2596 char charUTF8
[4], int32_t charUTF8Length
,
2597 char char0
[8], int32_t char0Length
,
2598 char char1
[8], int32_t char1Length
) {
2599 char utf8
[600], expect
[600];
2600 int32_t utf8Length
, expectLength
;
2604 UErrorCode errorCode
;
2607 errorCode
=U_ZERO_ERROR
;
2608 ucnv_setToUCallBack(utf8Cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, NULL
, NULL
, &errorCode
);
2611 * assemble an input string with the test character between each
2613 * and an expected string with repeated test character output
2615 memcpy(utf8
, charUTF8
, charUTF8Length
);
2616 utf8Length
=charUTF8Length
;
2618 memcpy(expect
, char0
, char0Length
);
2619 expectLength
=char0Length
;
2621 for(i
=0; i
<LENGTHOF(badUTF8
); ++i
) {
2622 int32_t length
=strlen(badUTF8
[i
]);
2623 memcpy(utf8
+utf8Length
, badUTF8
[i
], length
);
2626 memcpy(utf8
+utf8Length
, charUTF8
, charUTF8Length
);
2627 utf8Length
+=charUTF8Length
;
2629 memcpy(expect
+expectLength
, char1
, char1Length
);
2630 expectLength
+=char1Length
;
2633 /* expect that each bad UTF-8 sequence is detected and skipped */
2634 strcpy(testName
, "from bad UTF-8 to ");
2635 strcat(testName
, converterName
);
2637 convertExMultiStreaming(utf8Cnv
, cnv
,
2639 expect
, expectLength
,
2644 /* Test illegal UTF-8 input. */
2645 static void TestConvertExFromUTF8() {
2646 static const char *const converterNames
[]={
2647 #if !UCONFIG_NO_LEGACY_CONVERSION
2656 UConverter
*utf8Cnv
, *cnv
;
2657 UErrorCode errorCode
;
2660 /* fromUnicode versions of some character, from initial state and later */
2661 char charUTF8
[4], char0
[8], char1
[8];
2662 int32_t charUTF8Length
, char0Length
, char1Length
;
2664 errorCode
=U_ZERO_ERROR
;
2665 utf8Cnv
=ucnv_open("UTF-8", &errorCode
);
2666 if(U_FAILURE(errorCode
)) {
2667 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode
));
2671 for(i
=0; i
<LENGTHOF(converterNames
); ++i
) {
2672 errorCode
=U_ZERO_ERROR
;
2673 cnv
=ucnv_open(converterNames
[i
], &errorCode
);
2674 if(U_FAILURE(errorCode
)) {
2675 log_data_err("unable to open %s converter - %s\n", converterNames
[i
], u_errorName(errorCode
));
2678 if(!getTestChar(cnv
, converterNames
[i
], charUTF8
, &charUTF8Length
, char0
, &char0Length
, char1
, &char1Length
)) {
2681 testFromTruncatedUTF8(utf8Cnv
, cnv
, converterNames
[i
], charUTF8
, charUTF8Length
, char0
, char0Length
, char1
, char1Length
);
2682 testFromBadUTF8(utf8Cnv
, cnv
, converterNames
[i
], charUTF8
, charUTF8Length
, char0
, char0Length
, char1
, char1Length
);
2685 ucnv_close(utf8Cnv
);
2688 static void TestConvertExFromUTF8_C5F0() {
2689 static const char *const converterNames
[]={
2690 #if !UCONFIG_NO_LEGACY_CONVERSION
2699 UConverter
*utf8Cnv
, *cnv
;
2700 UErrorCode errorCode
;
2703 static const char bad_utf8
[2]={ (char)0xC5, (char)0xF0 };
2704 /* Expect "��" (2x U+FFFD as decimal NCRs) */
2705 static const char twoNCRs
[16]={
2706 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B,
2707 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B
2709 static const char twoFFFD
[6]={
2710 (char)0xef, (char)0xbf, (char)0xbd,
2711 (char)0xef, (char)0xbf, (char)0xbd
2713 const char *expected
;
2714 int32_t expectedLength
;
2715 char dest
[20]; /* longer than longest expectedLength */
2720 UChar pivotBuffer
[128];
2721 UChar
*pivotSource
, *pivotTarget
;
2723 errorCode
=U_ZERO_ERROR
;
2724 utf8Cnv
=ucnv_open("UTF-8", &errorCode
);
2725 if(U_FAILURE(errorCode
)) {
2726 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode
));
2730 for(i
=0; i
<LENGTHOF(converterNames
); ++i
) {
2731 errorCode
=U_ZERO_ERROR
;
2732 cnv
=ucnv_open(converterNames
[i
], &errorCode
);
2733 ucnv_setFromUCallBack(cnv
, UCNV_FROM_U_CALLBACK_ESCAPE
, UCNV_ESCAPE_XML_DEC
,
2734 NULL
, NULL
, &errorCode
);
2735 if(U_FAILURE(errorCode
)) {
2736 log_data_err("unable to open %s converter - %s\n",
2737 converterNames
[i
], u_errorName(errorCode
));
2742 uprv_memset(dest
, 9, sizeof(dest
));
2743 if(i
==LENGTHOF(converterNames
)-1) {
2744 /* conversion to UTF-8 yields two U+FFFD directly */
2748 /* conversion to a non-Unicode charset yields two NCRs */
2755 pivotSource
=pivotTarget
=pivotBuffer
;
2758 &target
, dest
+expectedLength
,
2759 &src
, bad_utf8
+sizeof(bad_utf8
),
2760 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
+LENGTHOF(pivotBuffer
),
2761 TRUE
, TRUE
, &errorCode
);
2762 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
|| src
!=bad_utf8
+2 ||
2763 target
!=dest
+expectedLength
|| 0!=uprv_memcmp(dest
, expected
, expectedLength
) ||
2764 dest
[expectedLength
]!=9
2766 log_err("ucnv_convertEx(UTF-8 C5 F0 -> %s/decimal NCRs) failed\n", converterNames
[i
]);
2770 ucnv_close(utf8Cnv
);
2774 TestConvertAlgorithmic() {
2775 #if !UCONFIG_NO_LEGACY_CONVERSION
2776 static const uint8_t
2778 /* 4e00 30a1 ff61 0410 */
2779 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2782 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2786 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2787 * SUB, SUB, 0x40, SUB, SUB, 0x40
2789 /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/
2792 0xfe, 0xff /* BOM only, no text */
2795 0xff, 0xfe, 0, 0 /* BOM only, no text */
2798 char target
[100], utf8NUL
[100], shiftJISNUL
[100];
2801 UErrorCode errorCode
;
2805 errorCode
=U_ZERO_ERROR
;
2806 cnv
=ucnv_open("Shift-JIS", &errorCode
);
2807 if(U_FAILURE(errorCode
)) {
2808 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode
));
2813 memcpy(utf8NUL
, utf8
, sizeof(utf8
));
2814 utf8NUL
[sizeof(utf8
)]=0;
2815 memcpy(shiftJISNUL
, shiftJIS
, sizeof(shiftJIS
));
2816 shiftJISNUL
[sizeof(shiftJIS
)]=0;
2819 * The to/from algorithmic convenience functions share a common implementation,
2820 * so we need not test all permutations of them.
2823 /* length in, not terminated out */
2824 errorCode
=U_ZERO_ERROR
;
2825 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF8
, target
, sizeof(shiftJIS
), (const char *)utf8
, sizeof(utf8
), &errorCode
);
2826 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
||
2827 length
!=sizeof(shiftJIS
) ||
2828 memcmp(target
, shiftJIS
, length
)!=0
2830 log_err("ucnv_fromAlgorithmic(UTF-8 -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect %d\n",
2831 u_errorName(errorCode
), length
, sizeof(shiftJIS
));
2834 /* terminated in and out */
2835 memset(target
, 0x55, sizeof(target
));
2836 errorCode
=U_STRING_NOT_TERMINATED_WARNING
;
2837 length
=ucnv_toAlgorithmic(UCNV_UTF8
, cnv
, target
, sizeof(target
), shiftJISNUL
, -1, &errorCode
);
2838 if( errorCode
!=U_ZERO_ERROR
||
2839 length
!=sizeof(utf8
) ||
2840 memcmp(target
, utf8
, length
)!=0
2842 log_err("ucnv_toAlgorithmic(Shift-JIS -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect %d\n",
2843 u_errorName(errorCode
), length
, sizeof(shiftJIS
));
2846 /* empty string, some target buffer */
2847 errorCode
=U_STRING_NOT_TERMINATED_WARNING
;
2848 length
=ucnv_toAlgorithmic(UCNV_UTF8
, cnv
, target
, sizeof(target
), shiftJISNUL
, 0, &errorCode
);
2849 if( errorCode
!=U_ZERO_ERROR
||
2852 log_err("ucnv_toAlgorithmic(empty string -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect 0\n",
2853 u_errorName(errorCode
), length
);
2856 /* pseudo-empty string, no target buffer */
2857 errorCode
=U_ZERO_ERROR
;
2858 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF16
, target
, 0, (const char *)utf16
, 2, &errorCode
);
2859 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
||
2862 log_err("ucnv_fromAlgorithmic(UTF-16 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
2863 u_errorName(errorCode
), length
);
2866 errorCode
=U_ZERO_ERROR
;
2867 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF32
, target
, 0, (const char *)utf32
, 4, &errorCode
);
2868 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
||
2871 log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
2872 u_errorName(errorCode
), length
);
2876 errorCode
=U_MESSAGE_PARSE_ERROR
;
2877 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF16
, target
, 0, (const char *)utf16
, 2, &errorCode
);
2878 if(errorCode
!=U_MESSAGE_PARSE_ERROR
) {
2879 log_err("ucnv_fromAlgorithmic(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode
));
2883 errorCode
=U_ZERO_ERROR
;
2884 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF16
, target
, 0, NULL
, 2, &errorCode
);
2885 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2886 log_err("ucnv_fromAlgorithmic(source==NULL) sets %s\n", u_errorName(errorCode
));
2889 /* illegal alg. type */
2890 errorCode
=U_ZERO_ERROR
;
2891 length
=ucnv_fromAlgorithmic(cnv
, (UConverterType
)99, target
, 0, (const char *)utf16
, 2, &errorCode
);
2892 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2893 log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode
));
2899 static void TestLMBCSMaxChar(void) {
2900 static const struct {
2904 /* some non-LMBCS converters - perfect test setup here */
2915 { 4, "IMAP-mailbox-name"},
2918 { 1, "windows-1256"},
2930 { 3, "ISO-2022-KR"},
2931 { 6, "ISO-2022-JP"},
2932 { 8, "ISO-2022-CN"},
2950 for (idx
= 0; idx
< LENGTHOF(converter
); idx
++) {
2951 UErrorCode status
= U_ZERO_ERROR
;
2952 UConverter
*cnv
= cnv_open(converter
[idx
].name
, &status
);
2953 if (U_FAILURE(status
)) {
2956 if (converter
[idx
].maxSize
!= ucnv_getMaxCharSize(cnv
)) {
2957 log_err("error: ucnv_getMaxCharSize(%s) expected %d, got %d\n",
2958 converter
[idx
].name
, converter
[idx
].maxSize
, ucnv_getMaxCharSize(cnv
));
2963 /* mostly test that the macro compiles */
2964 if(UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10) {
2965 log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n");
2970 static void TestJ1968(void) {
2971 UErrorCode err
= U_ZERO_ERROR
;
2973 char myConvName
[] = "My really really really really really really really really really really really"
2974 " really really really really really really really really really really really"
2975 " really really really really really really really really long converter name";
2976 UChar myConvNameU
[sizeof(myConvName
)];
2978 u_charsToUChars(myConvName
, myConvNameU
, sizeof(myConvName
));
2981 myConvNameU
[UCNV_MAX_CONVERTER_NAME_LENGTH
+1] = 0;
2982 cnv
= ucnv_openU(myConvNameU
, &err
);
2983 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2984 log_err("1U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
2988 myConvNameU
[UCNV_MAX_CONVERTER_NAME_LENGTH
] = 0;
2989 cnv
= ucnv_openU(myConvNameU
, &err
);
2990 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2991 log_err("2U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
2995 myConvNameU
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = 0;
2996 cnv
= ucnv_openU(myConvNameU
, &err
);
2997 if (cnv
|| err
!= U_FILE_ACCESS_ERROR
) {
2998 log_err("3U) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err
));
3005 cnv
= ucnv_open(myConvName
, &err
);
3006 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3007 log_err("1) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
3011 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
] = ',';
3012 cnv
= ucnv_open(myConvName
, &err
);
3013 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3014 log_err("2) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
3018 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = ',';
3019 cnv
= ucnv_open(myConvName
, &err
);
3020 if (cnv
|| err
!= U_FILE_ACCESS_ERROR
) {
3021 log_err("3) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err
));
3025 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = ',';
3026 strncpy(myConvName
+ UCNV_MAX_CONVERTER_NAME_LENGTH
, "locale=", 7);
3027 cnv
= ucnv_open(myConvName
, &err
);
3028 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3029 log_err("4) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
3032 /* The comma isn't really a part of the converter name. */
3034 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
] = 0;
3035 cnv
= ucnv_open(myConvName
, &err
);
3036 if (cnv
|| err
!= U_FILE_ACCESS_ERROR
) {
3037 log_err("5) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err
));
3041 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = ' ';
3042 cnv
= ucnv_open(myConvName
, &err
);
3043 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3044 log_err("6) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
3048 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = 0;
3049 cnv
= ucnv_open(myConvName
, &err
);
3050 if (cnv
|| err
!= U_FILE_ACCESS_ERROR
) {
3051 log_err("7) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err
));
3056 #if !UCONFIG_NO_LEGACY_CONVERSION
3058 testSwap(const char *name
, UBool swap
) {
3060 * Test Unicode text.
3061 * Contains characters that are the highest for some of the
3062 * tested conversions, to make sure that the ucnvmbcs.c code that modifies the
3063 * tables copies the entire tables.
3065 static const UChar text
[]={
3066 0x61, 0xd, 0x62, 0xa, 0x4e00, 0x3000, 0xfffd, 0xa, 0x20, 0x85, 0xff5e, 0x7a
3069 UChar uNormal
[32], uSwapped
[32];
3070 char normal
[32], swapped
[32];
3074 int32_t i
, normalLength
, swappedLength
;
3078 const char *swappedName
;
3079 UConverter
*cnv
, *swapCnv
;
3080 UErrorCode errorCode
;
3082 /* if the swap flag is FALSE, then the test encoding is not EBCDIC and must not swap */
3084 /* open both the normal and the LF/NL-swapping converters */
3085 strcpy(swapped
, name
);
3086 strcat(swapped
, UCNV_SWAP_LFNL_OPTION_STRING
);
3088 errorCode
=U_ZERO_ERROR
;
3089 swapCnv
=ucnv_open(swapped
, &errorCode
);
3090 cnv
=ucnv_open(name
, &errorCode
);
3091 if(U_FAILURE(errorCode
)) {
3092 log_data_err("TestEBCDICSwapLFNL error: unable to open %s or %s (%s)\n", name
, swapped
, u_errorName(errorCode
));
3096 /* the name must contain the swap option if and only if we expect the converter to swap */
3097 swappedName
=ucnv_getName(swapCnv
, &errorCode
);
3098 if(U_FAILURE(errorCode
)) {
3099 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl) failed (%s)\n", name
, u_errorName(errorCode
));
3103 pc
=strstr(swappedName
, UCNV_SWAP_LFNL_OPTION_STRING
);
3104 if(swap
!= (pc
!=NULL
)) {
3105 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl)=%s should (%d) contain 'swaplfnl'\n", name
, swappedName
, swap
);
3109 /* convert to EBCDIC */
3112 ucnv_fromUnicode(cnv
, &pc
, normal
+LENGTHOF(normal
), &pcu
, text
+LENGTHOF(text
), NULL
, TRUE
, &errorCode
);
3113 normalLength
=(int32_t)(pc
-normal
);
3117 ucnv_fromUnicode(swapCnv
, &pc
, swapped
+LENGTHOF(swapped
), &pcu
, text
+LENGTHOF(text
), NULL
, TRUE
, &errorCode
);
3118 swappedLength
=(int32_t)(pc
-swapped
);
3120 if(U_FAILURE(errorCode
)) {
3121 log_err("TestEBCDICSwapLFNL error converting to %s - (%s)\n", name
, u_errorName(errorCode
));
3125 /* compare EBCDIC output */
3126 if(normalLength
!=swappedLength
) {
3127 log_err("TestEBCDICSwapLFNL error converting to %s - output lengths %d vs. %d\n", name
, normalLength
, swappedLength
);
3130 for(i
=0; i
<normalLength
; ++i
) {
3131 /* swap EBCDIC LF/NL for comparison */
3136 } else if(c
==0x25) {
3142 log_err("TestEBCDICSwapLFNL error converting to %s - did not swap properly, output[%d]=0x%02x\n", name
, i
, (uint8_t)swapped
[i
]);
3147 /* convert back to Unicode (may not roundtrip) */
3150 ucnv_toUnicode(cnv
, &pu
, uNormal
+LENGTHOF(uNormal
), (const char **)&pc
, normal
+normalLength
, NULL
, TRUE
, &errorCode
);
3151 normalLength
=(int32_t)(pu
-uNormal
);
3155 ucnv_toUnicode(swapCnv
, &pu
, uSwapped
+LENGTHOF(uSwapped
), (const char **)&pc
, normal
+swappedLength
, NULL
, TRUE
, &errorCode
);
3156 swappedLength
=(int32_t)(pu
-uSwapped
);
3158 if(U_FAILURE(errorCode
)) {
3159 log_err("TestEBCDICSwapLFNL error converting from %s - (%s)\n", name
, u_errorName(errorCode
));
3163 /* compare EBCDIC output */
3164 if(normalLength
!=swappedLength
) {
3165 log_err("TestEBCDICSwapLFNL error converting from %s - output lengths %d vs. %d\n", name
, normalLength
, swappedLength
);
3168 for(i
=0; i
<normalLength
; ++i
) {
3169 /* swap EBCDIC LF/NL for comparison */
3174 } else if(u
==0x85) {
3179 if(u
!=uSwapped
[i
]) {
3180 log_err("TestEBCDICSwapLFNL error converting from %s - did not swap properly, output[%d]=U+%04x\n", name
, i
, uSwapped
[i
]);
3188 ucnv_close(swapCnv
);
3192 TestEBCDICSwapLFNL() {
3193 static const struct {
3198 { "ibm-1047", TRUE
},
3199 { "ibm-1140", TRUE
},
3200 { "ibm-930", TRUE
},
3201 { "iso-8859-3", FALSE
}
3206 for(i
=0; i
<LENGTHOF(tests
); ++i
) {
3207 testSwap(tests
[i
].name
, tests
[i
].swap
);
3212 TestEBCDICSwapLFNL() {
3213 /* test nothing... */
3217 static const UVersionInfo ICU_34
= {3,4,0,0};
3219 static void TestFromUCountPending(){
3220 #if !UCONFIG_NO_LEGACY_CONVERSION
3221 UErrorCode status
= U_ZERO_ERROR
;
3222 /* const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; */
3223 static const struct {
3227 }fromUnicodeTests
[] = {
3230 {{ 0xdbc4, 0xde34, 0xd84d},3,1},
3231 {{ 0xdbc4, 0xde34, 0xd900},3,3},
3234 UConverter
* cnv
= ucnv_openPackage(loadTestData(&status
), "test3", &status
);
3235 if(U_FAILURE(status
)){
3236 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status
));
3239 for(i
=0; i
<LENGTHOF(fromUnicodeTests
); ++i
) {
3242 char* targetLimit
= target
+ 10;
3243 const UChar
* source
= fromUnicodeTests
[i
].input
;
3244 const UChar
* sourceLimit
= source
+ fromUnicodeTests
[i
].len
;
3247 ucnv_fromUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3248 len
= ucnv_fromUCountPending(cnv
, &status
);
3249 if(U_FAILURE(status
)){
3250 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3251 status
= U_ZERO_ERROR
;
3254 if(len
!= fromUnicodeTests
[i
].exp
){
3255 log_err("Did not get the expeced output for ucnv_fromUInputConsumed.\n");
3258 status
= U_ZERO_ERROR
;
3261 * The converter has to read the tail before it knows that
3262 * only head alone matches.
3263 * At the end, the output for head will overflow the target,
3264 * middle will be pending, and tail will not have been consumed.
3267 \U00101234 -> x (<U101234> \x07 |0)
3268 \U00101234\U00050005 -> y (<U101234>+<U50005> \x07+\x00+\x01\x02\x0e+\x05 |0)
3269 \U00101234\U00050005\U00060006 -> z (<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0)
3270 \U00060007 -> unassigned
3272 static const UChar head
[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */
3273 static const UChar middle
[] = {0xD940,0x0000}; /* first half of \U00060006 or \U00060007 */
3274 static const UChar tail
[] = {0xDC07,0x0000};/* second half of \U00060007 */
3277 char* targetLimit
= target
+ 2; /* expect overflow from converting \U00101234\U00050005 */
3278 const UChar
* source
= head
;
3279 const UChar
* sourceLimit
= source
+ u_strlen(head
);
3282 ucnv_fromUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3283 len
= ucnv_fromUCountPending(cnv
, &status
);
3284 if(U_FAILURE(status
)){
3285 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3286 status
= U_ZERO_ERROR
;
3289 log_err("ucnv_fromUInputHeld did not return correct length for head\n");
3292 sourceLimit
= source
+ u_strlen(middle
);
3293 ucnv_fromUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3294 len
= ucnv_fromUCountPending(cnv
, &status
);
3295 if(U_FAILURE(status
)){
3296 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3297 status
= U_ZERO_ERROR
;
3300 log_err("ucnv_fromUInputHeld did not return correct length for middle\n");
3303 sourceLimit
= source
+ u_strlen(tail
);
3304 ucnv_fromUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3305 if(status
!= U_BUFFER_OVERFLOW_ERROR
){
3306 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3308 status
= U_ZERO_ERROR
;
3309 len
= ucnv_fromUCountPending(cnv
, &status
);
3310 /* middle[1] is pending, tail has not been consumed */
3311 if(U_FAILURE(status
)){
3312 log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status
));
3315 log_err("ucnv_fromUInputHeld did not return correct length for tail\n");
3323 TestToUCountPending(){
3324 #if !UCONFIG_NO_LEGACY_CONVERSION
3325 UErrorCode status
= U_ZERO_ERROR
;
3326 static const struct {
3330 }toUnicodeTests
[] = {
3332 {{0x05, 0x01, 0x02},3,3},
3334 {{0x07, 0x00, 0x01, 0x02},4,4},
3338 UConverterToUCallback
*oldToUAction
= NULL
;
3339 UConverter
* cnv
= ucnv_openPackage(loadTestData(&status
), "test3", &status
);
3340 if(U_FAILURE(status
)){
3341 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status
));
3344 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, oldToUAction
, NULL
, &status
);
3345 for(i
=0; i
<LENGTHOF(toUnicodeTests
); ++i
) {
3347 UChar
* target
= tgt
;
3348 UChar
* targetLimit
= target
+ 20;
3349 const char* source
= toUnicodeTests
[i
].input
;
3350 const char* sourceLimit
= source
+ toUnicodeTests
[i
].len
;
3353 ucnv_toUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3354 len
= ucnv_toUCountPending(cnv
,&status
);
3355 if(U_FAILURE(status
)){
3356 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3357 status
= U_ZERO_ERROR
;
3360 if(len
!= toUnicodeTests
[i
].exp
){
3361 log_err("Did not get the expeced output for ucnv_toUInputConsumed.\n");
3364 status
= U_ZERO_ERROR
;
3369 * The converter has to read the tail before it knows that
3370 * only head alone matches.
3371 * At the end, the output for head will overflow the target,
3372 * mid will be pending, and tail will not have been consumed.
3374 char head
[] = { 0x01, 0x02, 0x03, 0x0a , 0x00};
3375 char mid
[] = { 0x01, 0x02, 0x03, 0x0b, 0x00 };
3376 char tail
[] = { 0x01, 0x02, 0x03, 0x0d, 0x00 };
3378 0x01, 0x02, 0x03, 0x0a -> x (<U23456> \x01\x02\x03\x0a |0)
3379 0x01, 0x02, 0x03, 0x0b -> y (<U000b> \x01\x02\x03\x0b |0)
3380 0x01, 0x02, 0x03, 0x0d -> z (<U34567> \x01\x02\x03\x0d |3)
3381 0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar")
3384 UChar
* target
= tgt
;
3385 UChar
* targetLimit
= target
+ 1; /* expect overflow from converting */
3386 const char* source
= head
;
3387 const char* sourceLimit
= source
+ strlen(head
);
3389 cnv
= ucnv_openPackage(loadTestData(&status
), "test4", &status
);
3390 if(U_FAILURE(status
)){
3391 log_err("Could not create converter for test3. Error: %s\n", u_errorName(status
));
3394 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, oldToUAction
, NULL
, &status
);
3395 ucnv_toUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3396 len
= ucnv_toUCountPending(cnv
,&status
);
3397 if(U_FAILURE(status
)){
3398 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3401 log_err("Did not get the expected len for head.\n");
3404 sourceLimit
= source
+strlen(mid
);
3405 ucnv_toUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3406 len
= ucnv_toUCountPending(cnv
,&status
);
3407 if(U_FAILURE(status
)){
3408 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3411 log_err("Did not get the expected len for mid.\n");
3415 sourceLimit
= source
+strlen(tail
);
3416 targetLimit
= target
;
3417 ucnv_toUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3418 if(status
!= U_BUFFER_OVERFLOW_ERROR
){
3419 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3421 status
= U_ZERO_ERROR
;
3422 len
= ucnv_toUCountPending(cnv
,&status
);
3423 /* mid[4] is pending, tail has not been consumed */
3424 if(U_FAILURE(status
)){
3425 log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status
));
3428 log_err("Did not get the expected len for tail.\n");
3435 static void TestOneDefaultNameChange(const char *name
, const char *expected
) {
3436 UErrorCode status
= U_ZERO_ERROR
;
3438 ucnv_setDefaultName(name
);
3439 if(strcmp(ucnv_getDefaultName(), expected
)==0)
3440 log_verbose("setDefaultName of %s works.\n", name
);
3442 log_err("setDefaultName of %s failed\n", name
);
3443 cnv
=ucnv_open(NULL
, &status
);
3444 if (U_FAILURE(status
) || cnv
== NULL
) {
3445 log_err("opening the default converter of %s failed\n", name
);
3448 if(strcmp(ucnv_getName(cnv
, &status
), expected
)==0)
3449 log_verbose("ucnv_getName of %s works.\n", name
);
3451 log_err("ucnv_getName of %s failed\n", name
);
3455 static void TestDefaultName(void) {
3456 /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/
3457 static char defaultName
[UCNV_MAX_CONVERTER_NAME_LENGTH
+ 1];
3458 strcpy(defaultName
, ucnv_getDefaultName());
3460 log_verbose("getDefaultName returned %s\n", defaultName
);
3462 /*change the default name by setting it */
3463 TestOneDefaultNameChange("UTF-8", "UTF-8");
3464 #if U_CHARSET_IS_UTF8
3465 TestOneDefaultNameChange("ISCII,version=1", "UTF-8");
3466 TestOneDefaultNameChange("ISCII,version=2", "UTF-8");
3467 TestOneDefaultNameChange("ISO-8859-1", "UTF-8");
3469 # if !UCONFIG_NO_LEGACY_CONVERSION
3470 TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1");
3471 TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2");
3473 TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1");
3476 /*set the default name back*/
3477 ucnv_setDefaultName(defaultName
);
3480 /* Test that ucnv_compareNames() matches names according to spec. ----------- */
3494 compareNames(const char **names
) {
3495 const char *relation
, *name1
, *name2
;
3499 if(*relation
=='=') {
3501 } else if(*relation
=='<') {
3511 while((name2
=*names
++)!=NULL
) {
3512 result
=ucnv_compareNames(name1
, name2
);
3513 if(sign(result
)!=rel
) {
3514 log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1
, name2
, result
, rel
);
3521 TestCompareNames() {
3522 static const char *equalUTF8
[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL
};
3523 static const char *equalIBM
[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL
};
3524 static const char *lessMac
[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL
};
3525 static const char *lessUTF080
[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL
};
3527 compareNames(equalUTF8
);
3528 compareNames(equalIBM
);
3529 compareNames(lessMac
);
3530 compareNames(lessUTF080
);
3535 static const UChar surrogate
[1]={ 0xd900 };
3538 static const UChar sub
[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3539 static const char subChars
[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3541 UErrorCode errorCode
;
3545 /* UTF-16/32: test that the BOM is output before the sub character */
3546 errorCode
=U_ZERO_ERROR
;
3547 cnv
=ucnv_open("UTF-16", &errorCode
);
3548 if(U_FAILURE(errorCode
)) {
3549 log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode
));
3552 length
=ucnv_fromUChars(cnv
, buffer
, (int32_t)sizeof(buffer
), surrogate
, 1, &errorCode
);
3554 if(U_FAILURE(errorCode
) ||
3556 NULL
== ucnv_detectUnicodeSignature(buffer
, length
, NULL
, &errorCode
)
3558 log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n");
3561 errorCode
=U_ZERO_ERROR
;
3562 cnv
=ucnv_open("UTF-32", &errorCode
);
3563 if(U_FAILURE(errorCode
)) {
3564 log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode
));
3567 length
=ucnv_fromUChars(cnv
, buffer
, (int32_t)sizeof(buffer
), surrogate
, 1, &errorCode
);
3569 if(U_FAILURE(errorCode
) ||
3571 NULL
== ucnv_detectUnicodeSignature(buffer
, length
, NULL
, &errorCode
)
3573 log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n");
3576 /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */
3577 errorCode
=U_ZERO_ERROR
;
3578 cnv
=ucnv_open("ISO-8859-1", &errorCode
);
3579 if(U_FAILURE(errorCode
)) {
3580 log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode
));
3583 ucnv_setSubstString(cnv
, sub
, LENGTHOF(sub
), &errorCode
);
3584 if(U_FAILURE(errorCode
)) {
3585 log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode
));
3587 len8
= sizeof(buffer
);
3588 ucnv_getSubstChars(cnv
, buffer
, &len8
, &errorCode
);
3589 /* Stateless converter, we expect the string converted to charset bytes. */
3590 if(U_FAILURE(errorCode
) || len8
!=sizeof(subChars
) || 0!=uprv_memcmp(buffer
, subChars
, len8
)) {
3591 log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode
));
3596 #if !UCONFIG_NO_LEGACY_CONVERSION
3597 errorCode
=U_ZERO_ERROR
;
3598 cnv
=ucnv_open("HZ", &errorCode
);
3599 if(U_FAILURE(errorCode
)) {
3600 log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode
));
3603 ucnv_setSubstString(cnv
, sub
, LENGTHOF(sub
), &errorCode
);
3604 if(U_FAILURE(errorCode
)) {
3605 log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode
));
3607 len8
= sizeof(buffer
);
3608 ucnv_getSubstChars(cnv
, buffer
, &len8
, &errorCode
);
3609 /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */
3610 if(U_FAILURE(errorCode
) || len8
!=0) {
3611 log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode
));
3617 * Further testing of ucnv_setSubstString() is done via intltest convert.
3618 * We do not test edge cases of illegal arguments and similar because the
3619 * function implementation uses all of its parameters in calls to other
3620 * functions with UErrorCode parameters.
3625 InvalidArguments() {
3627 UErrorCode errorCode
;
3628 char charBuffer
[2] = {1, 1};
3629 char ucharAsCharBuffer
[2] = {2, 2};
3630 char *charsPtr
= charBuffer
;
3631 UChar
*ucharsPtr
= (UChar
*)ucharAsCharBuffer
;
3632 UChar
*ucharsBadPtr
= (UChar
*)(ucharAsCharBuffer
+ 1);
3634 errorCode
=U_ZERO_ERROR
;
3635 cnv
=ucnv_open("UTF-8", &errorCode
);
3636 if(U_FAILURE(errorCode
)) {
3637 log_err("ucnv_open() failed - %s\n", u_errorName(errorCode
));
3641 errorCode
=U_ZERO_ERROR
;
3642 /* This one should fail because an incomplete UChar is being passed in */
3643 ucnv_fromUnicode(cnv
, &charsPtr
, charsPtr
, (const UChar
**)&ucharsPtr
, ucharsBadPtr
, NULL
, TRUE
, &errorCode
);
3644 if(errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3645 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode
));
3648 errorCode
=U_ZERO_ERROR
;
3649 /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3650 ucnv_fromUnicode(cnv
, &charsPtr
, charsPtr
, (const UChar
**)&ucharsBadPtr
, ucharsPtr
, NULL
, TRUE
, &errorCode
);
3651 if(errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3652 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode
));
3655 errorCode
=U_ZERO_ERROR
;
3656 /* This one should fail because an incomplete UChar is being passed in */
3657 ucnv_toUnicode(cnv
, &ucharsPtr
, ucharsBadPtr
, (const char **)&charsPtr
, charsPtr
, NULL
, TRUE
, &errorCode
);
3658 if(errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3659 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode
));
3662 errorCode
=U_ZERO_ERROR
;
3663 /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3664 ucnv_toUnicode(cnv
, &ucharsBadPtr
, ucharsPtr
, (const char **)&charsPtr
, charsPtr
, NULL
, TRUE
, &errorCode
);
3665 if(errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3666 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode
));
3669 if (charBuffer
[0] != 1 || charBuffer
[1] != 1
3670 || ucharAsCharBuffer
[0] != 2 || ucharAsCharBuffer
[1] != 2)
3672 log_err("Data was incorrectly written to buffers\n");
3678 static void TestGetName() {
3679 static const char *const names
[] = {
3680 "Unicode", "UTF-16",
3681 "UnicodeBigUnmarked", "UTF-16BE",
3682 "UnicodeBig", "UTF-16BE,version=1",
3683 "UnicodeLittleUnmarked", "UTF-16LE",
3684 "UnicodeLittle", "UTF-16LE,version=1",
3685 "x-UTF-16LE-BOM", "UTF-16LE,version=1"
3688 for(i
= 0; i
< LENGTHOF(names
); i
+= 2) {
3689 UErrorCode errorCode
= U_ZERO_ERROR
;
3690 UConverter
*cnv
= ucnv_open(names
[i
], &errorCode
);
3691 if(U_SUCCESS(errorCode
)) {
3692 const char *name
= ucnv_getName(cnv
, &errorCode
);
3693 if(U_FAILURE(errorCode
) || 0 != strcmp(name
, names
[i
+1])) {
3694 log_err("ucnv_getName(%s) = %s != %s -- %s\n",
3695 names
[i
], name
, names
[i
+1], u_errorName(errorCode
));
3702 static void TestUTFBOM() {
3703 static const UChar a16
[] = { 0x61 };
3704 static const char *const names
[] = {
3712 static const uint8_t expected
[][5] = {
3714 { 4, 0xfe, 0xff, 0, 0x61 },
3715 { 4, 0xfe, 0xff, 0, 0x61 },
3717 { 4, 0xff, 0xfe, 0x61, 0 },
3718 { 4, 0xff, 0xfe, 0x61, 0 },
3722 { 4, 0xfe, 0xff, 0, 0x61 },
3725 { 4, 0xff, 0xfe, 0x61, 0 }
3731 for(i
= 0; i
< LENGTHOF(names
); ++i
) {
3732 UErrorCode errorCode
= U_ZERO_ERROR
;
3733 UConverter
*cnv
= ucnv_open(names
[i
], &errorCode
);
3735 const uint8_t *exp
= expected
[i
];
3736 if (U_FAILURE(errorCode
)) {
3737 log_err_status(errorCode
, "Unable to open converter: %s got error code: %s\n", names
[i
], u_errorName(errorCode
));
3740 length
= ucnv_fromUChars(cnv
, bytes
, (int32_t)sizeof(bytes
), a16
, 1, &errorCode
);
3742 if(U_FAILURE(errorCode
) || length
!= exp
[0] || 0 != memcmp(bytes
, exp
+1, length
)) {
3743 log_err("unexpected %s BOM writing behavior -- %s\n",
3744 names
[i
], u_errorName(errorCode
));