1 /********************************************************************
3 * Copyright (c) 1997-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /*****************************************************************************
10 * Modification History:
12 * Madhu Katragadda Ported for C API
13 ******************************************************************************
19 #include "unicode/uloc.h"
20 #include "unicode/ucnv.h"
21 #include "unicode/ucnv_err.h"
22 #include "unicode/putil.h"
23 #include "unicode/uset.h"
24 #include "unicode/ustring.h"
25 #include "ucnv_bld.h" /* for sizeof(UConverter) */
26 #include "cmemory.h" /* for UAlignedMemory */
31 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
33 #define NUM_CODEPAGE 1
34 #define MAX_FILE_LEN 1024*20
35 #define UCS_FILE_NAME_SIZE 512
37 /*returns an action other than the one provided*/
38 #if !UCONFIG_NO_LEGACY_CONVERSION
39 static UConverterFromUCallback
otherUnicodeAction(UConverterFromUCallback MIA
);
40 static UConverterToUCallback
otherCharAction(UConverterToUCallback MIA
);
44 cnv_open(const char *name
, UErrorCode
*pErrorCode
) {
45 if(name
!=NULL
&& name
[0]=='*') {
46 return ucnv_openPackage(loadTestData(pErrorCode
), name
+1, pErrorCode
);
48 return ucnv_open(name
, pErrorCode
);
53 static void ListNames(void);
54 static void TestFlushCache(void);
55 static void TestDuplicateAlias(void);
56 static void TestCCSID(void);
57 static void TestJ932(void);
58 static void TestJ1968(void);
59 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
60 static void TestLMBCSMaxChar(void);
63 #if !UCONFIG_NO_LEGACY_CONVERSION
64 static void TestConvertSafeCloneCallback(void);
67 static void TestEBCDICSwapLFNL(void);
68 static void TestConvertEx(void);
69 static void TestConvertExFromUTF8(void);
70 static void TestConvertExFromUTF8_C5F0(void);
71 static void TestConvertAlgorithmic(void);
72 void TestDefaultConverterError(void); /* defined in cctest.c */
73 void TestDefaultConverterSet(void); /* defined in cctest.c */
74 static void TestToUCountPending(void);
75 static void TestFromUCountPending(void);
76 static void TestDefaultName(void);
77 static void TestCompareNames(void);
78 static void TestSubstString(void);
79 static void InvalidArguments(void);
80 static void TestGetName(void);
81 static void TestUTFBOM(void);
83 void addTestConvert(TestNode
** root
);
85 void addTestConvert(TestNode
** root
)
87 addTest(root
, &ListNames
, "tsconv/ccapitst/ListNames");
88 addTest(root
, &TestConvert
, "tsconv/ccapitst/TestConvert");
89 addTest(root
, &TestFlushCache
, "tsconv/ccapitst/TestFlushCache");
90 addTest(root
, &TestAlias
, "tsconv/ccapitst/TestAlias");
91 addTest(root
, &TestDuplicateAlias
, "tsconv/ccapitst/TestDuplicateAlias");
92 addTest(root
, &TestConvertSafeClone
, "tsconv/ccapitst/TestConvertSafeClone");
93 #if !UCONFIG_NO_LEGACY_CONVERSION
94 addTest(root
, &TestConvertSafeCloneCallback
,"tsconv/ccapitst/TestConvertSafeCloneCallback");
96 addTest(root
, &TestCCSID
, "tsconv/ccapitst/TestCCSID");
97 addTest(root
, &TestJ932
, "tsconv/ccapitst/TestJ932");
98 addTest(root
, &TestJ1968
, "tsconv/ccapitst/TestJ1968");
99 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
100 addTest(root
, &TestLMBCSMaxChar
, "tsconv/ccapitst/TestLMBCSMaxChar");
102 addTest(root
, &TestEBCDICSwapLFNL
, "tsconv/ccapitst/TestEBCDICSwapLFNL");
103 addTest(root
, &TestConvertEx
, "tsconv/ccapitst/TestConvertEx");
104 addTest(root
, &TestConvertExFromUTF8
, "tsconv/ccapitst/TestConvertExFromUTF8");
105 addTest(root
, &TestConvertExFromUTF8_C5F0
, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0");
106 addTest(root
, &TestConvertAlgorithmic
, "tsconv/ccapitst/TestConvertAlgorithmic");
107 addTest(root
, &TestDefaultConverterError
, "tsconv/ccapitst/TestDefaultConverterError");
108 addTest(root
, &TestDefaultConverterSet
, "tsconv/ccapitst/TestDefaultConverterSet");
109 #if !UCONFIG_NO_FILE_IO
110 addTest(root
, &TestToUCountPending
, "tsconv/ccapitst/TestToUCountPending");
111 addTest(root
, &TestFromUCountPending
, "tsconv/ccapitst/TestFromUCountPending");
113 addTest(root
, &TestDefaultName
, "tsconv/ccapitst/TestDefaultName");
114 addTest(root
, &TestCompareNames
, "tsconv/ccapitst/TestCompareNames");
115 addTest(root
, &TestSubstString
, "tsconv/ccapitst/TestSubstString");
116 addTest(root
, &InvalidArguments
, "tsconv/ccapitst/InvalidArguments");
117 addTest(root
, &TestGetName
, "tsconv/ccapitst/TestGetName");
118 addTest(root
, &TestUTFBOM
, "tsconv/ccapitst/TestUTFBOM");
121 static void ListNames(void) {
122 UErrorCode err
= U_ZERO_ERROR
;
123 int32_t testLong1
= 0;
124 const char* available_conv
;
125 UEnumeration
*allNamesEnum
= NULL
;
126 int32_t allNamesCount
= 0;
129 log_verbose("Testing ucnv_openAllNames()...");
130 allNamesEnum
= ucnv_openAllNames(&err
);
132 log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err
));
135 const char *string
= NULL
;
139 allNamesCount
= uenum_count(allNamesEnum
, &err
);
140 while ((string
= uenum_next(allNamesEnum
, &len
, &err
))) {
142 log_verbose("read \"%s\", length %i\n", string
, len
);
144 if (U_FAILURE(err
)) {
145 log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err
));
148 uenum_reset(allNamesEnum
, &err
);
149 while ((string
= uenum_next(allNamesEnum
, &len
, &err
))) {
151 ucnv_close(ucnv_open(string
, &err
));
152 log_verbose("read \"%s\", length %i (%s)\n", string
, len
, U_SUCCESS(err
) ? "available" : "unavailable");
155 if (count1
!= count2
) {
156 log_err("FAILURE! uenum_reset(allNamesEnum, &err); doesn't work\n");
159 uenum_close(allNamesEnum
);
162 /*Tests ucnv_getAvailableName(), getAvialableCount()*/
164 log_verbose("Testing ucnv_countAvailable()...");
166 testLong1
=ucnv_countAvailable();
167 log_info("Number of available codepages: %d/%d\n", testLong1
, allNamesCount
);
169 log_verbose("\n---Testing ucnv_getAvailableName.."); /*need to check this out */
171 available_conv
= ucnv_getAvailableName(testLong1
);
172 /*test ucnv_getAvailableName with err condition*/
173 log_verbose("\n---Testing ucnv_getAvailableName..with index < 0 ");
174 available_conv
= ucnv_getAvailableName(-1);
175 if(available_conv
!= NULL
){
176 log_err("ucnv_getAvailableName() with index < 0) should return NULL\n");
179 /* Test ucnv_countAliases() etc. */
180 count
= ucnv_countAliases("utf-8", &err
);
182 log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err
));
183 } else if(count
<= 0) {
184 log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count
);
186 /* try to get the aliases individually */
188 alias
= ucnv_getAlias("utf-8", 0, &err
);
190 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s\n", myErrorName(err
));
191 } else if(strcmp("UTF-8", alias
) != 0) {
192 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s instead of UTF-8\n", alias
);
195 for(aliasNum
= 0; aliasNum
< count
; ++aliasNum
) {
196 alias
= ucnv_getAlias("utf-8", aliasNum
, &err
);
198 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum
, myErrorName(err
));
199 } else if(strlen(alias
) > 20) {
201 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> alias %s insanely long, corrupt?!\n", aliasNum
, alias
);
203 log_verbose("alias %d for utf-8: %s\n", aliasNum
, alias
);
207 /* try to fill an array with all aliases */
208 const char **aliases
;
209 aliases
=(const char **)malloc(count
* sizeof(const char *));
211 ucnv_getAliases("utf-8", aliases
, &err
);
213 log_err("FAILURE! ucnv_getAliases(\"utf-8\") -> %s\n", myErrorName(err
));
215 for(aliasNum
= 0; aliasNum
< count
; ++aliasNum
) {
216 /* compare the pointers with the ones returned individually */
217 alias
= ucnv_getAlias("utf-8", aliasNum
, &err
);
219 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum
, myErrorName(err
));
220 } else if(aliases
[aliasNum
] != alias
) {
221 log_err("FAILURE! ucnv_getAliases(\"utf-8\")[%d] != ucnv_getAlias(\"utf-8\", %d)\n", aliasNum
, aliasNum
);
225 free((char **)aliases
);
233 static void TestConvert()
235 #if !UCONFIG_NO_LEGACY_CONVERSION
238 int32_t testLong1
= 0;
242 FILE* ucs_file_in
= NULL
;
244 UChar myUChar
= 0x0000;
245 char* mytarget
; /* [MAX_FILE_LEN] */
248 UChar
* consumedUni
= NULL
;
249 char* consumed
= NULL
;
250 char* output_cp_buffer
; /* [MAX_FILE_LEN] */
251 UChar
* ucs_file_buffer
; /* [MAX_FILE_LEN] */
252 UChar
* ucs_file_buffer_use
;
253 UChar
* my_ucs_file_buffer
; /* [MAX_FILE_LEN] */
254 UChar
* my_ucs_file_buffer_1
;
256 uint16_t codepage_index
= 0;
258 UErrorCode err
= U_ZERO_ERROR
;
259 char ucs_file_name
[UCS_FILE_NAME_SIZE
];
260 UConverterFromUCallback MIA1
, MIA1_2
;
261 UConverterToUCallback MIA2
, MIA2_2
;
262 const void *MIA1Context
, *MIA1Context2
, *MIA2Context
, *MIA2Context2
;
263 UConverter
* someConverters
[5];
264 UConverter
* myConverter
= 0;
265 UChar
* displayname
= 0;
272 int32_t targetcapacity2
;
273 int32_t targetcapacity
;
277 const UChar
* tmp_ucs_buf
;
278 const UChar
* tmp_consumedUni
=NULL
;
279 const char* tmp_mytarget_use
;
280 const char* tmp_consumed
;
282 /******************************************************************
283 Checking Unicode -> ksc
284 ******************************************************************/
286 const char* CodePagesToTest
[NUM_CODEPAGE
] =
292 const uint16_t CodePageNumberToTest
[NUM_CODEPAGE
] =
298 const int8_t CodePagesMinChars
[NUM_CODEPAGE
] =
304 const int8_t CodePagesMaxChars
[NUM_CODEPAGE
] =
310 const uint16_t CodePagesSubstitutionChars
[NUM_CODEPAGE
] =
315 const char* CodePagesTestFiles
[NUM_CODEPAGE
] =
321 const UConverterPlatform CodePagesPlatform
[NUM_CODEPAGE
] =
327 const char* CodePagesLocale
[NUM_CODEPAGE
] =
332 UConverterFromUCallback oldFromUAction
= NULL
;
333 UConverterToUCallback oldToUAction
= NULL
;
334 const void* oldFromUContext
= NULL
;
335 const void* oldToUContext
= NULL
;
337 /* Allocate memory */
338 mytarget
= (char*) malloc(MAX_FILE_LEN
* sizeof(mytarget
[0]));
339 output_cp_buffer
= (char*) malloc(MAX_FILE_LEN
* sizeof(output_cp_buffer
[0]));
340 ucs_file_buffer
= (UChar
*) malloc(MAX_FILE_LEN
* sizeof(ucs_file_buffer
[0]));
341 my_ucs_file_buffer
= (UChar
*) malloc(MAX_FILE_LEN
* sizeof(my_ucs_file_buffer
[0]));
343 ucs_file_buffer_use
= ucs_file_buffer
;
345 mytarget_use
= mytarget
;
346 my_ucs_file_buffer_1
=my_ucs_file_buffer
;
348 /* flush the converter cache to get a consistent state before the flushing is tested */
351 /*Testing ucnv_openU()*/
353 UChar converterName
[]={ 0x0069, 0x0062, 0x006d, 0x002d, 0x0039, 0x0034, 0x0033, 0x0000}; /*ibm-943*/
354 UChar firstSortedName
[]={ 0x0021, 0x0000}; /* ! */
355 UChar lastSortedName
[]={ 0x007E, 0x0000}; /* ~ */
356 const char *illegalNameChars
={ "ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943"};
357 UChar illegalName
[100];
358 UConverter
*converter
=NULL
;
360 converter
=ucnv_openU(converterName
, &err
);
362 log_data_err("FAILURE! ucnv_openU(ibm-943, err) failed. %s\n", myErrorName(err
));
364 ucnv_close(converter
);
366 converter
=ucnv_openU(NULL
, &err
);
368 log_err("FAILURE! ucnv_openU(NULL, err) failed. %s\n", myErrorName(err
));
370 ucnv_close(converter
);
371 /*testing with error value*/
372 err
=U_ILLEGAL_ARGUMENT_ERROR
;
373 converter
=ucnv_openU(converterName
, &err
);
374 if(!(converter
== NULL
)){
375 log_data_err("FAILURE! ucnv_openU(ibm-943, U_ILLEGAL_ARGUMENT_ERROR) is expected to fail\n");
377 ucnv_close(converter
);
379 u_uastrcpy(illegalName
, "");
380 u_uastrcpy(illegalName
, illegalNameChars
);
381 ucnv_openU(illegalName
, &err
);
382 if(!(err
==U_ILLEGAL_ARGUMENT_ERROR
)){
383 log_err("FAILURE! ucnv_openU(illegalName, err) is expected to fail\n");
387 ucnv_openU(firstSortedName
, &err
);
388 if(err
!=U_FILE_ACCESS_ERROR
){
389 log_err("FAILURE! ucnv_openU(firstSortedName, err) is expected to fail\n");
393 ucnv_openU(lastSortedName
, &err
);
394 if(err
!=U_FILE_ACCESS_ERROR
){
395 log_err("FAILURE! ucnv_openU(lastSortedName, err) is expected to fail\n");
400 log_verbose("Testing ucnv_open() with converter name greater than 7 characters\n");
402 UConverter
*cnv
=NULL
;
404 cnv
=ucnv_open("ibm-949,Madhu", &err
);
406 log_data_err("FAILURE! ucnv_open(\"ibm-949,Madhu\", err) failed. %s\n", myErrorName(err
));
411 /*Testing ucnv_convert()*/
413 int32_t targetLimit
=0, sourceLimit
=0, i
=0, targetCapacity
=0;
414 const uint8_t source
[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00};
415 const uint8_t expectedTarget
[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00};
417 sourceLimit
=sizeof(source
)/sizeof(source
[0]);
421 targetCapacity
=ucnv_convert("ibm-1364", "ibm-1363", NULL
, targetLimit
, (const char*)source
, sourceLimit
, &err
);
422 if(err
== U_BUFFER_OVERFLOW_ERROR
){
424 targetLimit
=targetCapacity
+1;
425 target
=(char*)malloc(sizeof(char) * targetLimit
);
426 targetCapacity
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
, sourceLimit
, &err
);
429 log_data_err("FAILURE! ucnv_convert(ibm-1363->ibm-1364) failed. %s\n", myErrorName(err
));
432 for(i
=0; i
<targetCapacity
; i
++){
433 if(target
[i
] != expectedTarget
[i
]){
434 log_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i
, (UChar
)expectedTarget
[i
], (uint8_t)target
[i
]);
438 i
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
+1, -1, &err
);
439 if(U_FAILURE(err
) || i
!=7){
440 log_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 7\n",
441 u_errorName(err
), i
);
444 /*Test error conditions*/
446 i
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
, 0, &err
);
448 log_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n");
451 err
=U_ILLEGAL_ARGUMENT_ERROR
;
452 sourceLimit
=sizeof(source
)/sizeof(source
[0]);
453 i
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
, sourceLimit
, &err
);
455 log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n");
459 sourceLimit
=sizeof(source
)/sizeof(source
[0]);
461 i
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
, sourceLimit
, &err
);
462 if(!(U_FAILURE(err
) && err
==U_BUFFER_OVERFLOW_ERROR
)){
463 log_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n");
470 /*Testing ucnv_openCCSID and ucnv_open with error conditions*/
471 log_verbose("\n---Testing ucnv_open with err ! = U_ZERO_ERROR...\n");
472 err
=U_ILLEGAL_ARGUMENT_ERROR
;
473 if(ucnv_open(NULL
, &err
) != NULL
){
474 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
476 if(ucnv_openCCSID(1051, UCNV_IBM
, &err
) != NULL
){
477 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
481 /* Testing ucnv_openCCSID(), ucnv_open(), ucnv_getName() */
482 log_verbose("\n---Testing ucnv_open default...\n");
483 someConverters
[0] = ucnv_open(NULL
,&err
);
484 someConverters
[1] = ucnv_open(NULL
,&err
);
485 someConverters
[2] = ucnv_open("utf8", &err
);
486 someConverters
[3] = ucnv_openCCSID(949,UCNV_IBM
,&err
);
487 ucnv_close(ucnv_openCCSID(1051, UCNV_IBM
, &err
)); /* test for j350; ucnv_close(NULL) is safe */
488 if (U_FAILURE(err
)){ log_data_err("FAILURE! %s\n", myErrorName(err
));}
490 /* Testing ucnv_getName()*/
491 /*default code page */
492 ucnv_getName(someConverters
[0], &err
);
494 log_data_err("getName[0] failed\n");
496 log_verbose("getName(someConverters[0]) returned %s\n", ucnv_getName(someConverters
[0], &err
));
498 ucnv_getName(someConverters
[1], &err
);
500 log_data_err("getName[1] failed\n");
502 log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters
[1], &err
));
505 ucnv_close(someConverters
[0]);
506 ucnv_close(someConverters
[1]);
507 ucnv_close(someConverters
[2]);
508 ucnv_close(someConverters
[3]);
511 for (codepage_index
=0; codepage_index
< NUM_CODEPAGE
; ++codepage_index
)
517 strcpy(ucs_file_name
, U_TOPSRCDIR U_FILE_SEP_STRING
"test"U_FILE_SEP_STRING
"testdata"U_FILE_SEP_STRING
);
519 strcpy(ucs_file_name
, loadTestData(&err
));
522 log_err("\nCouldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err
));
527 char* index
= strrchr(ucs_file_name
,(char)U_FILE_SEP_CHAR
);
529 if((unsigned int)(index
-ucs_file_name
) != (strlen(ucs_file_name
)-1)){
534 strcat(ucs_file_name
,".."U_FILE_SEP_STRING
);
536 strcat(ucs_file_name
, CodePagesTestFiles
[codepage_index
]);
538 ucs_file_in
= fopen(ucs_file_name
,"rb");
541 log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name
);
545 /*Creates a converter and testing ucnv_openCCSID(u_int code_page, platform, errstatus*/
547 /* myConverter =ucnv_openCCSID(CodePageNumberToTest[codepage_index],UCNV_IBM, &err); */
548 /* ucnv_flushCache(); */
549 myConverter
=ucnv_open( "ibm-949", &err
);
550 if (!myConverter
|| U_FAILURE(err
))
552 log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err
));
557 /*testing for ucnv_getName() */
558 log_verbose("Testing ucnv_getName()...\n");
559 ucnv_getName(myConverter
, &err
);
561 log_err("Error in getName\n");
564 log_verbose("getName o.k. %s\n", ucnv_getName(myConverter
, &err
));
566 if (uprv_stricmp(ucnv_getName(myConverter
, &err
), CodePagesToTest
[codepage_index
]))
567 log_err("getName failed\n");
569 log_verbose("getName ok\n");
570 /*Test getName with error condition*/
573 err
=U_ILLEGAL_ARGUMENT_ERROR
;
574 log_verbose("Testing ucnv_getName with err != U_ZERO_ERROR");
575 name
=ucnv_getName(myConverter
, &err
);
577 log_err("ucnv_getName() with err != U_ZERO_ERROR is expected to fail");
583 /*Tests ucnv_getMaxCharSize() and ucnv_getMinCharSize()*/
585 log_verbose("Testing ucnv_getMaxCharSize()...\n");
586 if (ucnv_getMaxCharSize(myConverter
)==CodePagesMaxChars
[codepage_index
])
587 log_verbose("Max byte per character OK\n");
589 log_err("Max byte per character failed\n");
591 log_verbose("\n---Testing ucnv_getMinCharSize()...\n");
592 if (ucnv_getMinCharSize(myConverter
)==CodePagesMinChars
[codepage_index
])
593 log_verbose("Min byte per character OK\n");
595 log_err("Min byte per character failed\n");
598 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars()*/
599 log_verbose("\n---Testing ucnv_getSubstChars...\n");
601 ucnv_getSubstChars(myConverter
, myptr
, &ii
, &err
);
603 log_err("ucnv_getSubstChars returned a negative number %d\n", ii
);
607 rest
= (uint16_t)(((unsigned char)rest
<< 8) + (unsigned char)myptr
[x
]);
608 if (rest
==CodePagesSubstitutionChars
[codepage_index
])
609 log_verbose("Substitution character ok\n");
611 log_err("Substitution character failed.\n");
613 log_verbose("\n---Testing ucnv_setSubstChars RoundTrip Test ...\n");
614 ucnv_setSubstChars(myConverter
, myptr
, ii
, &err
);
617 log_err("FAILURE! %s\n", myErrorName(err
));
619 ucnv_getSubstChars(myConverter
,save
, &ii
, &err
);
622 log_err("FAILURE! %s\n", myErrorName(err
));
625 if (strncmp(save
, myptr
, ii
))
626 log_err("Saved substitution character failed\n");
628 log_verbose("Saved substitution character ok\n");
630 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars() with error conditions*/
631 log_verbose("\n---Testing ucnv_getSubstChars.. with len < minBytesPerChar\n");
633 ucnv_getSubstChars(myConverter
, myptr
, &ii
, &err
);
634 if(err
!= U_INDEX_OUTOFBOUNDS_ERROR
){
635 log_err("ucnv_getSubstChars() with len < minBytesPerChar should throw U_INDEX_OUTOFBOUNDS_ERROR Got %s\n", myErrorName(err
));
639 ucnv_getSubstChars(myConverter
, myptr
, &ii
, &err
);
640 log_verbose("\n---Testing ucnv_setSubstChars.. with len < minBytesPerChar\n");
641 ucnv_setSubstChars(myConverter
, myptr
, 0, &err
);
642 if(err
!= U_ILLEGAL_ARGUMENT_ERROR
){
643 log_err("ucnv_setSubstChars() with len < minBytesPerChar should throw U_ILLEGAL_ARGUMENT_ERROR Got %s\n", myErrorName(err
));
645 log_verbose("\n---Testing ucnv_setSubstChars.. with err != U_ZERO_ERROR \n");
646 strcpy(myptr
, "abc");
647 ucnv_setSubstChars(myConverter
, myptr
, ii
, &err
);
649 ucnv_getSubstChars(myConverter
, save
, &ii
, &err
);
650 if(strncmp(save
, myptr
, ii
) == 0){
651 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't set the SubstChars and just return\n");
653 log_verbose("\n---Testing ucnv_getSubstChars.. with err != U_ZERO_ERROR \n");
655 strcpy(myptr
, "abc");
656 ucnv_setSubstChars(myConverter
, myptr
, ii
, &err
);
657 err
=U_ILLEGAL_ARGUMENT_ERROR
;
658 ucnv_getSubstChars(myConverter
, save
, &ii
, &err
);
659 if(strncmp(save
, myptr
, ii
) == 0){
660 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't fill the SubstChars in the buffer, it just returns\n");
665 #ifdef U_ENABLE_GENERIC_ISO_2022
666 /*resetState ucnv_reset()*/
667 log_verbose("\n---Testing ucnv_reset()..\n");
668 ucnv_reset(myConverter
);
671 const uint8_t in
[]={ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x61, 0xc0, 0x80, 0xe0, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80};
672 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
673 UConverter
*cnv
=ucnv_open("ISO_2022", &err
);
675 log_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err
));
677 c
=ucnv_getNextUChar(cnv
, &source
, limit
, &err
);
678 if((U_FAILURE(err
) || c
!= (UChar32
)0x0031)) {
679 log_err("ucnv_getNextUChar() failed: %s\n", u_errorName(err
));
688 log_verbose("\n---Testing ucnv_getDisplayName()...\n");
689 locale
=CodePagesLocale
[codepage_index
];
692 disnamelen
= ucnv_getDisplayName(myConverter
, locale
, displayname
, len
, &err
);
693 if(err
==U_BUFFER_OVERFLOW_ERROR
) {
695 displayname
=(UChar
*)malloc((disnamelen
+1) * sizeof(UChar
));
696 ucnv_getDisplayName(myConverter
,locale
,displayname
,disnamelen
+1, &err
);
698 log_err("getDisplayName failed. The error is %s\n", myErrorName(err
));
701 log_verbose(" getDisplayName o.k.\n");
707 log_err("getDisplayName preflight doesn't work. Error is %s\n", myErrorName(err
));
709 /*test ucnv_getDiaplayName with error condition*/
710 err
= U_ILLEGAL_ARGUMENT_ERROR
;
711 len
=ucnv_getDisplayName(myConverter
,locale
,NULL
,0, &err
);
713 log_err("ucnv_getDisplayName() with err != U_ZERO_ERROR is supposed to return 0\n");
715 /*test ucnv_getDiaplayName with error condition*/
717 len
=ucnv_getDisplayName(NULL
,locale
,NULL
,0, &err
);
718 if( len
!=0 || U_SUCCESS(err
)){
719 log_err("ucnv_getDisplayName(NULL) with cnv == NULL is supposed to return 0\n");
723 /* testing ucnv_setFromUCallBack() and ucnv_getFromUCallBack()*/
724 ucnv_getFromUCallBack(myConverter
, &MIA1
, &MIA1Context
);
726 log_verbose("\n---Testing ucnv_setFromUCallBack...\n");
727 ucnv_setFromUCallBack(myConverter
, otherUnicodeAction(MIA1
), &BOM
, &oldFromUAction
, &oldFromUContext
, &err
);
728 if (U_FAILURE(err
) || oldFromUAction
!= MIA1
|| oldFromUContext
!= MIA1Context
)
730 log_err("FAILURE! %s\n", myErrorName(err
));
733 ucnv_getFromUCallBack(myConverter
, &MIA1_2
, &MIA1Context2
);
734 if (MIA1_2
!= otherUnicodeAction(MIA1
) || MIA1Context2
!= &BOM
)
735 log_err("get From UCallBack failed\n");
737 log_verbose("get From UCallBack ok\n");
739 log_verbose("\n---Testing getFromUCallBack Roundtrip...\n");
740 ucnv_setFromUCallBack(myConverter
,MIA1
, MIA1Context
, &oldFromUAction
, &oldFromUContext
, &err
);
741 if (U_FAILURE(err
) || oldFromUAction
!= otherUnicodeAction(MIA1
) || oldFromUContext
!= &BOM
)
743 log_err("FAILURE! %s\n", myErrorName(err
));
746 ucnv_getFromUCallBack(myConverter
, &MIA1_2
, &MIA1Context2
);
747 if (MIA1_2
!= MIA1
|| MIA1Context2
!= MIA1Context
)
748 log_err("get From UCallBack action failed\n");
750 log_verbose("get From UCallBack action ok\n");
752 /*testing ucnv_setToUCallBack with error conditions*/
753 err
=U_ILLEGAL_ARGUMENT_ERROR
;
754 log_verbose("\n---Testing setFromUCallBack. with err != U_ZERO_ERROR..\n");
755 ucnv_setFromUCallBack(myConverter
, otherUnicodeAction(MIA1
), &BOM
, &oldFromUAction
, &oldFromUContext
, &err
);
756 ucnv_getFromUCallBack(myConverter
, &MIA1_2
, &MIA1Context2
);
757 if(MIA1_2
== otherUnicodeAction(MIA1
) || MIA1Context2
== &BOM
){
758 log_err("To setFromUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
763 /*testing ucnv_setToUCallBack() and ucnv_getToUCallBack()*/
764 ucnv_getToUCallBack(myConverter
, &MIA2
, &MIA2Context
);
766 log_verbose("\n---Testing setTo UCallBack...\n");
767 ucnv_setToUCallBack(myConverter
,otherCharAction(MIA2
), &BOM
, &oldToUAction
, &oldToUContext
, &err
);
768 if (U_FAILURE(err
) || oldToUAction
!= MIA2
|| oldToUContext
!= MIA2Context
)
770 log_err("FAILURE! %s\n", myErrorName(err
));
773 ucnv_getToUCallBack(myConverter
, &MIA2_2
, &MIA2Context2
);
774 if (MIA2_2
!= otherCharAction(MIA2
) || MIA2Context2
!= &BOM
)
775 log_err("To UCallBack failed\n");
777 log_verbose("To UCallBack ok\n");
779 log_verbose("\n---Testing setTo UCallBack Roundtrip...\n");
780 ucnv_setToUCallBack(myConverter
,MIA2
, MIA2Context
, &oldToUAction
, &oldToUContext
, &err
);
781 if (U_FAILURE(err
) || oldToUAction
!= otherCharAction(MIA2
) || oldToUContext
!= &BOM
)
782 { log_err("FAILURE! %s\n", myErrorName(err
)); }
784 ucnv_getToUCallBack(myConverter
, &MIA2_2
, &MIA2Context2
);
785 if (MIA2_2
!= MIA2
|| MIA2Context2
!= MIA2Context
)
786 log_err("To UCallBack failed\n");
788 log_verbose("To UCallBack ok\n");
790 /*testing ucnv_setToUCallBack with error conditions*/
791 err
=U_ILLEGAL_ARGUMENT_ERROR
;
792 log_verbose("\n---Testing setToUCallBack. with err != U_ZERO_ERROR..\n");
793 ucnv_setToUCallBack(myConverter
,otherCharAction(MIA2
), NULL
, &oldToUAction
, &oldToUContext
, &err
);
794 ucnv_getToUCallBack(myConverter
, &MIA2_2
, &MIA2Context2
);
795 if (MIA2_2
== otherCharAction(MIA2
) || MIA2Context2
== &BOM
){
796 log_err("To setToUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
801 /*getcodepageid testing ucnv_getCCSID() */
802 log_verbose("\n----Testing getCCSID....\n");
803 cp
= ucnv_getCCSID(myConverter
,&err
);
806 log_err("FAILURE!..... %s\n", myErrorName(err
));
808 if (cp
!= CodePageNumberToTest
[codepage_index
])
809 log_err("Codepage number test failed\n");
811 log_verbose("Codepage number test OK\n");
813 /*testing ucnv_getCCSID() with err != U_ZERO_ERROR*/
814 err
=U_ILLEGAL_ARGUMENT_ERROR
;
815 if( ucnv_getCCSID(myConverter
,&err
) != -1){
816 log_err("ucnv_getCCSID() with err != U_ZERO_ERROR is supposed to fail\n");
820 /*getCodepagePlatform testing ucnv_getPlatform()*/
821 log_verbose("\n---Testing getCodepagePlatform ..\n");
822 if (CodePagesPlatform
[codepage_index
]!=ucnv_getPlatform(myConverter
, &err
))
823 log_err("Platform codepage test failed\n");
825 log_verbose("Platform codepage test ok\n");
829 log_err("FAILURE! %s\n", myErrorName(err
));
831 /*testing ucnv_getPlatform() with err != U_ZERO_ERROR*/
832 err
= U_ILLEGAL_ARGUMENT_ERROR
;
833 if(ucnv_getPlatform(myConverter
, &err
) != UCNV_UNKNOWN
){
834 log_err("ucnv)getPlatform with err != U_ZERO_ERROR is supposed to fail\n");
841 // Note: gcc produces a compile warning if the return value from fread() is ignored.
842 size_t numRead
= fread(&BOM
, sizeof(UChar
), 1, ucs_file_in
);
845 if (BOM
!=0xFEFF && BOM
!=0xFFFE)
847 log_err("File Missing BOM...Bailing!\n");
853 /*Reads in the file*/
854 while(!feof(ucs_file_in
)&&(i
+=fread(ucs_file_buffer
+i
, sizeof(UChar
), 1, ucs_file_in
)))
856 myUChar
= ucs_file_buffer
[i
-1];
858 ucs_file_buffer
[i
-1] = (UChar
)((BOM
==0xFEFF)?myUChar
:((myUChar
>> 8) | (myUChar
<< 8))); /*adjust if BIG_ENDIAN*/
861 myUChar
= ucs_file_buffer
[i
-1];
862 ucs_file_buffer
[i
-1] = (UChar
)((BOM
==0xFEFF)?myUChar
:((myUChar
>> 8) | (myUChar
<< 8))); /*adjust if BIG_ENDIAN Corner Case*/
865 /*testing ucnv_fromUChars() and ucnv_toUChars() */
866 /*uchar1---fromUChar--->output_cp_buffer --toUChar--->uchar2*/
868 uchar1
=(UChar
*)malloc(sizeof(UChar
) * (i
+1));
869 u_uastrcpy(uchar1
,"");
870 u_strncpy(uchar1
,ucs_file_buffer
,i
);
873 uchar3
=(UChar
*)malloc(sizeof(UChar
)*(i
+1));
874 u_uastrcpy(uchar3
,"");
875 u_strncpy(uchar3
,ucs_file_buffer
,i
);
878 /*Calls the Conversion Routine */
879 testLong1
= MAX_FILE_LEN
;
880 log_verbose("\n---Testing ucnv_fromUChars()\n");
881 targetcapacity
= ucnv_fromUChars(myConverter
, output_cp_buffer
, testLong1
, uchar1
, -1, &err
);
884 log_err("\nFAILURE...%s\n", myErrorName(err
));
887 log_verbose(" ucnv_fromUChars() o.k.\n");
889 /*test the conversion routine */
890 log_verbose("\n---Testing ucnv_toUChars()\n");
891 /*call it first time for trapping the targetcapacity and size needed to allocate memory for the buffer uchar2 */
893 targetsize
= ucnv_toUChars(myConverter
,
897 strlen(output_cp_buffer
),
899 /*if there is an buffer overflow then trap the values and pass them and make the actual call*/
901 if(err
==U_BUFFER_OVERFLOW_ERROR
)
904 uchar2
=(UChar
*)malloc((targetsize
+1) * sizeof(UChar
));
905 targetsize
= ucnv_toUChars(myConverter
,
909 strlen(output_cp_buffer
),
913 log_err("ucnv_toUChars() FAILED %s\n", myErrorName(err
));
915 log_verbose(" ucnv_toUChars() o.k.\n");
917 if(u_strcmp(uchar1
,uchar2
)!=0)
918 log_err("equality test failed with conversion routine\n");
922 log_err("ERR: calling toUChars: Didn't get U_BUFFER_OVERFLOW .. expected it.\n");
924 /*Testing ucnv_fromUChars and ucnv_toUChars with error conditions*/
925 err
=U_ILLEGAL_ARGUMENT_ERROR
;
926 log_verbose("\n---Testing ucnv_fromUChars() with err != U_ZERO_ERROR\n");
927 targetcapacity
= ucnv_fromUChars(myConverter
, output_cp_buffer
, testLong1
, uchar1
, -1, &err
);
928 if (targetcapacity
!=0) {
929 log_err("\nFAILURE: ucnv_fromUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
932 log_verbose("\n---Testing ucnv_fromUChars() with converter=NULL\n");
933 targetcapacity
= ucnv_fromUChars(NULL
, output_cp_buffer
, testLong1
, uchar1
, -1, &err
);
934 if (targetcapacity
!=0 || err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
935 log_err("\nFAILURE: ucnv_fromUChars with converter=NULL is expected to fail\n");
938 log_verbose("\n---Testing ucnv_fromUChars() with sourceLength = 0\n");
939 targetcapacity
= ucnv_fromUChars(myConverter
, output_cp_buffer
, testLong1
, uchar1
, 0, &err
);
940 if (targetcapacity
!=0) {
941 log_err("\nFAILURE: ucnv_fromUChars with sourceLength 0 is expected to return 0\n");
943 log_verbose("\n---Testing ucnv_fromUChars() with targetLength = 0\n");
944 targetcapacity
= ucnv_fromUChars(myConverter
, output_cp_buffer
, 0, uchar1
, -1, &err
);
945 if (err
!= U_BUFFER_OVERFLOW_ERROR
) {
946 log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n");
948 /*toUChars with error conditions*/
949 targetsize
= ucnv_toUChars(myConverter
, uchar2
, targetsize
, output_cp_buffer
, strlen(output_cp_buffer
), &err
);
951 log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
954 targetsize
= ucnv_toUChars(myConverter
, uchar2
, -1, output_cp_buffer
, strlen(output_cp_buffer
), &err
);
955 if(targetsize
!= 0 || err
!= U_ILLEGAL_ARGUMENT_ERROR
){
956 log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n");
959 targetsize
= ucnv_toUChars(myConverter
, uchar2
, 0, output_cp_buffer
, 0, &err
);
960 if (targetsize
!=0) {
961 log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n");
964 targetsize
= ucnv_toUChars(myConverter
, NULL
, targetcapacity2
, output_cp_buffer
, strlen(output_cp_buffer
), &err
);
965 if (err
!= U_STRING_NOT_TERMINATED_WARNING
) {
966 log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n",
973 /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */
974 /*Clean up re-usable vars*/
975 log_verbose("Testing ucnv_fromUnicode().....\n");
976 tmp_ucs_buf
=ucs_file_buffer_use
;
977 ucnv_fromUnicode(myConverter
, &mytarget_1
,
978 mytarget
+ MAX_FILE_LEN
,
980 ucs_file_buffer_use
+i
,
984 consumedUni
= (UChar
*)tmp_consumedUni
;
985 (void)consumedUni
; /* Suppress set but not used warning. */
989 log_err("FAILURE! %s\n", myErrorName(err
));
992 log_verbose("ucnv_fromUnicode() o.k.\n");
994 /*Uni1 ----ToUnicode----> Cp2 ----FromUnicode---->Uni3 */
995 log_verbose("Testing ucnv_toUnicode().....\n");
996 tmp_mytarget_use
=mytarget_use
;
997 tmp_consumed
= consumed
;
998 ucnv_toUnicode(myConverter
, &my_ucs_file_buffer_1
,
999 my_ucs_file_buffer
+ MAX_FILE_LEN
,
1001 mytarget_use
+ (mytarget_1
- mytarget
),
1005 consumed
= (char*)tmp_consumed
;
1008 log_err("FAILURE! %s\n", myErrorName(err
));
1011 log_verbose("ucnv_toUnicode() o.k.\n");
1014 log_verbose("\n---Testing RoundTrip ...\n");
1017 u_strncpy(uchar3
, my_ucs_file_buffer
,i
);
1020 if(u_strcmp(uchar1
,uchar3
)==0)
1021 log_verbose("Equality test o.k.\n");
1023 log_err("Equality test failed\n");
1028 log_err("uchar2 was NULL (ccapitst.c line %d), couldn't do sanity check\n", __LINE__
);
1032 if(u_strcmp(uchar2
, uchar3
)==0)
1033 log_verbose("Equality test o.k.\n");
1035 log_err("Equality test failed\n");
1038 fclose(ucs_file_in
);
1039 ucnv_close(myConverter
);
1040 if (uchar1
!= 0) free(uchar1
);
1041 if (uchar2
!= 0) free(uchar2
);
1042 if (uchar3
!= 0) free(uchar3
);
1045 free((void*)mytarget
);
1046 free((void*)output_cp_buffer
);
1047 free((void*)ucs_file_buffer
);
1048 free((void*)my_ucs_file_buffer
);
1052 #if !UCONFIG_NO_LEGACY_CONVERSION
1053 static UConverterFromUCallback
otherUnicodeAction(UConverterFromUCallback MIA
)
1055 return (MIA
==(UConverterFromUCallback
)UCNV_FROM_U_CALLBACK_STOP
)?(UConverterFromUCallback
)UCNV_FROM_U_CALLBACK_SUBSTITUTE
:(UConverterFromUCallback
)UCNV_FROM_U_CALLBACK_STOP
;
1058 static UConverterToUCallback
otherCharAction(UConverterToUCallback MIA
)
1060 return (MIA
==(UConverterToUCallback
)UCNV_TO_U_CALLBACK_STOP
)?(UConverterToUCallback
)UCNV_TO_U_CALLBACK_SUBSTITUTE
:(UConverterToUCallback
)UCNV_TO_U_CALLBACK_STOP
;
1064 static void TestFlushCache(void) {
1065 #if !UCONFIG_NO_LEGACY_CONVERSION
1066 UErrorCode err
= U_ZERO_ERROR
;
1067 UConverter
* someConverters
[5];
1070 /* flush the converter cache to get a consistent state before the flushing is tested */
1073 /*Testing ucnv_open()*/
1074 /* Note: These converters have been chosen because they do NOT
1075 encode the Latin characters (U+0041, ...), and therefore are
1076 highly unlikely to be chosen as system default codepages */
1078 someConverters
[0] = ucnv_open("ibm-1047", &err
);
1079 if (U_FAILURE(err
)) {
1080 log_data_err("FAILURE! %s\n", myErrorName(err
));
1083 someConverters
[1] = ucnv_open("ibm-1047", &err
);
1084 if (U_FAILURE(err
)) {
1085 log_data_err("FAILURE! %s\n", myErrorName(err
));
1088 someConverters
[2] = ucnv_open("ibm-1047", &err
);
1089 if (U_FAILURE(err
)) {
1090 log_data_err("FAILURE! %s\n", myErrorName(err
));
1093 someConverters
[3] = ucnv_open("gb18030", &err
);
1094 if (U_FAILURE(err
)) {
1095 log_data_err("FAILURE! %s\n", myErrorName(err
));
1098 someConverters
[4] = ucnv_open("ibm-954", &err
);
1099 if (U_FAILURE(err
)) {
1100 log_data_err("FAILURE! %s\n", myErrorName(err
));
1104 /* Testing ucnv_flushCache() */
1105 log_verbose("\n---Testing ucnv_flushCache...\n");
1106 if ((flushCount
=ucnv_flushCache())==0)
1107 log_verbose("Flush cache ok\n");
1109 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__
, flushCount
);
1111 /*testing ucnv_close() and ucnv_flushCache() */
1112 ucnv_close(someConverters
[0]);
1113 ucnv_close(someConverters
[1]);
1115 if ((flushCount
=ucnv_flushCache())==0)
1116 log_verbose("Flush cache ok\n");
1118 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__
, flushCount
);
1120 ucnv_close(someConverters
[2]);
1121 ucnv_close(someConverters
[3]);
1123 if ((flushCount
=ucnv_flushCache())==2)
1124 log_verbose("Flush cache ok\n"); /*because first, second and third are same */
1126 log_data_err("Flush Cache failed line %d, got %d expected 2 or there is an error in ucnv_close()\n",
1130 ucnv_close(someConverters
[4]);
1131 if ( (flushCount
=ucnv_flushCache())==1)
1132 log_verbose("Flush cache ok\n");
1134 log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__
, flushCount
);
1139 * Test the converter alias API, specifically the fuzzy matching of
1140 * alias names and the alias table integrity. Make sure each
1141 * converter has at least one alias (itself), and that its listed
1142 * aliases map back to itself. Check some hard-coded UTF-8 and
1143 * ISO_2022 aliases to make sure they work.
1145 static void TestAlias() {
1147 UErrorCode status
= U_ZERO_ERROR
;
1149 /* Predetermined aliases that we expect to map back to ISO_2022
1150 * and UTF-8. UPDATE THIS DATA AS NECESSARY. */
1151 const char* ISO_2022_NAMES
[] =
1152 {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2",
1153 "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"};
1154 int32_t ISO_2022_NAMES_LENGTH
=
1155 sizeof(ISO_2022_NAMES
) / sizeof(ISO_2022_NAMES
[0]);
1156 const char *UTF8_NAMES
[] =
1157 { "UTF-8", "utf-8", "utf8", "ibm-1208",
1158 "utf_8", "ibm1208", "cp1208" };
1159 int32_t UTF8_NAMES_LENGTH
=
1160 sizeof(UTF8_NAMES
) / sizeof(UTF8_NAMES
[0]);
1165 } CONVERTERS_NAMES
[] = {
1166 { "UTF-32BE", "UTF32_BigEndian" },
1167 { "UTF-32LE", "UTF32_LittleEndian" },
1168 { "UTF-32", "ISO-10646-UCS-4" },
1169 { "UTF32_PlatformEndian", "UTF32_PlatformEndian" },
1170 { "UTF-32", "ucs-4" }
1172 int32_t CONVERTERS_NAMES_LENGTH
= sizeof(CONVERTERS_NAMES
) / sizeof(*CONVERTERS_NAMES
);
1174 /* When there are bugs in gencnval or in ucnv_io, converters can
1175 appear to have no aliases. */
1176 ncnv
= ucnv_countAvailable();
1177 log_verbose("%d converters\n", ncnv
);
1178 for (i
=0; i
<ncnv
; ++i
) {
1179 const char *name
= ucnv_getAvailableName(i
);
1181 uint16_t na
= ucnv_countAliases(name
, &status
);
1186 log_err("FAIL: Converter \"%s\" (i=%d)"
1187 " has no aliases; expect at least one\n",
1191 cnv
= ucnv_open(name
, &status
);
1192 if (U_FAILURE(status
)) {
1193 log_data_err("FAIL: Converter \"%s\" (i=%d)"
1194 " can't be opened.\n",
1198 if (strcmp(ucnv_getName(cnv
, &status
), name
) != 0
1199 && (strstr(name
, "PlatformEndian") == 0 && strstr(name
, "OppositeEndian") == 0)) {
1200 log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. "
1201 "The should be the same\n",
1202 name
, ucnv_getName(cnv
, &status
));
1207 status
= U_ZERO_ERROR
;
1208 alias0
= ucnv_getAlias(name
, 0, &status
);
1209 for (j
=1; j
<na
; ++j
) {
1211 /* Make sure each alias maps back to the the same list of
1212 aliases. Assume that if alias 0 is the same, the whole
1213 list is the same (this should always be true). */
1214 const char *mapBack
;
1216 status
= U_ZERO_ERROR
;
1217 alias
= ucnv_getAlias(name
, j
, &status
);
1218 if (status
== U_AMBIGUOUS_ALIAS_WARNING
) {
1219 log_err("FAIL: Converter \"%s\"is ambiguous\n", name
);
1222 if (alias
== NULL
) {
1223 log_err("FAIL: Converter \"%s\" -> "
1229 mapBack
= ucnv_getAlias(alias
, 0, &status
);
1231 if (mapBack
== NULL
) {
1232 log_err("FAIL: Converter \"%s\" -> "
1233 "alias[%d]=\"%s\" -> "
1234 "alias[0]=NULL, exp. \"%s\"\n",
1235 name
, j
, alias
, alias0
);
1239 if (0 != strcmp(alias0
, mapBack
)) {
1241 UBool foundAlias
= FALSE
;
1242 if (status
== U_AMBIGUOUS_ALIAS_WARNING
) {
1243 /* Make sure that we only get this mismapping when there is
1244 an ambiguous alias, and the other converter has this alias too. */
1245 for (idx
= 0; idx
< ucnv_countAliases(mapBack
, &status
); idx
++) {
1246 if (strcmp(ucnv_getAlias(mapBack
, (uint16_t)idx
, &status
), alias
) == 0) {
1252 /* else not ambiguous, and this is a real problem. foundAlias = FALSE */
1255 log_err("FAIL: Converter \"%s\" -> "
1256 "alias[%d]=\"%s\" -> "
1257 "alias[0]=\"%s\", exp. \"%s\"\n",
1258 name
, j
, alias
, mapBack
, alias0
);
1265 /* Check a list of predetermined aliases that we expect to map
1266 * back to ISO_2022 and UTF-8. */
1267 for (i
=1; i
<ISO_2022_NAMES_LENGTH
; ++i
) {
1268 const char* mapBack
= ucnv_getAlias(ISO_2022_NAMES
[i
], 0, &status
);
1270 log_data_err("Couldn't get alias for %s. You probably have no data\n", ISO_2022_NAMES
[i
]);
1273 if (0 != strcmp(mapBack
, ISO_2022_NAMES
[0])) {
1274 log_err("FAIL: \"%s\" -> \"%s\", expect \"ISO_2022,locale=ja,version=2\"\n",
1275 ISO_2022_NAMES
[i
], mapBack
);
1280 for (i
=1; i
<UTF8_NAMES_LENGTH
; ++i
) {
1281 const char* mapBack
= ucnv_getAlias(UTF8_NAMES
[i
], 0, &status
);
1283 log_data_err("Couldn't get alias for %s. You probably have no data\n", UTF8_NAMES
[i
]);
1286 if (mapBack
&& 0 != strcmp(mapBack
, UTF8_NAMES
[0])) {
1287 log_err("FAIL: \"%s\" -> \"%s\", expect UTF-8\n",
1288 UTF8_NAMES
[i
], mapBack
);
1293 * Check a list of predetermined aliases that we expect to map
1294 * back to predermined converter names.
1297 for (i
= 0; i
< CONVERTERS_NAMES_LENGTH
; ++i
) {
1298 const char* mapBack
= ucnv_getAlias(CONVERTERS_NAMES
[i
].alias
, 0, &status
);
1300 log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES
[i
].name
);
1303 if (0 != strcmp(mapBack
, CONVERTERS_NAMES
[i
].name
)) {
1304 log_err("FAIL: \"%s\" -> \"%s\", expect %s\n",
1305 CONVERTERS_NAMES
[i
].alias
, mapBack
, CONVERTERS_NAMES
[i
].name
);
1311 static void TestDuplicateAlias(void) {
1313 UErrorCode status
= U_ZERO_ERROR
;
1315 status
= U_ZERO_ERROR
;
1316 alias
= ucnv_getStandardName("Shift_JIS", "IBM", &status
);
1317 if (alias
== NULL
|| strcmp(alias
, "ibm-943") != 0 || status
!= U_AMBIGUOUS_ALIAS_WARNING
) {
1318 log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias
);
1320 status
= U_ZERO_ERROR
;
1321 alias
= ucnv_getStandardName("ibm-943", "IANA", &status
);
1322 if (alias
== NULL
|| strcmp(alias
, "Shift_JIS") != 0 || status
!= U_AMBIGUOUS_ALIAS_WARNING
) {
1323 log_data_err("FAIL: Didn't get Shift_JIS for ibm-943 {IANA}. Got %s\n", alias
);
1325 status
= U_ZERO_ERROR
;
1326 alias
= ucnv_getStandardName("ibm-943_P130-2000", "IANA", &status
);
1327 if (alias
!= NULL
|| status
== U_AMBIGUOUS_ALIAS_WARNING
) {
1328 log_data_err("FAIL: Didn't get NULL for ibm-943 {IANA}. Got %s\n", alias
);
1333 /* Test safe clone callback */
1335 static uint32_t TSCC_nextSerial()
1337 static uint32_t n
= 1;
1344 uint32_t magic
; /* 0xC0FFEE to identify that the object is OK */
1345 uint32_t serial
; /* minted from nextSerial, above */
1346 UBool wasClosed
; /* close happened on the object */
1349 static TSCCContext
*TSCC_clone(TSCCContext
*ctx
)
1351 TSCCContext
*newCtx
= (TSCCContext
*)malloc(sizeof(TSCCContext
));
1353 newCtx
->serial
= TSCC_nextSerial();
1354 newCtx
->wasClosed
= 0;
1355 newCtx
->magic
= 0xC0FFEE;
1357 log_verbose("TSCC_clone: %p:%d -> new context %p:%d\n", ctx
, ctx
->serial
, newCtx
, newCtx
->serial
);
1362 #if !UCONFIG_NO_LEGACY_CONVERSION
1363 static void TSCC_fromU(const void *context
,
1364 UConverterFromUnicodeArgs
*fromUArgs
,
1365 const UChar
* codeUnits
,
1368 UConverterCallbackReason reason
,
1371 TSCCContext
*ctx
= (TSCCContext
*)context
;
1372 UConverterFromUCallback junkFrom
;
1374 log_verbose("TSCC_fromU: Context %p:%d called, reason %d on cnv %p\n", ctx
, ctx
->serial
, reason
, fromUArgs
->converter
);
1376 if(ctx
->magic
!= 0xC0FFEE) {
1377 log_err("TSCC_fromU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx
,ctx
->serial
, ctx
->magic
);
1381 if(reason
== UCNV_CLONE
) {
1382 UErrorCode subErr
= U_ZERO_ERROR
;
1383 TSCCContext
*newCtx
;
1384 TSCCContext
*junkCtx
;
1385 TSCCContext
**pjunkCtx
= &junkCtx
;
1388 log_verbose("TSCC_fromU: cloning..\n");
1389 newCtx
= TSCC_clone(ctx
);
1391 if(newCtx
== NULL
) {
1392 log_err("TSCC_fromU: internal clone failed on %p\n", ctx
);
1396 ucnv_getFromUCallBack(fromUArgs
->converter
, &junkFrom
, (const void**)pjunkCtx
);
1397 ucnv_setFromUCallBack(fromUArgs
->converter
, junkFrom
, newCtx
, NULL
, NULL
, &subErr
);
1399 if(U_FAILURE(subErr
)) {
1404 if(reason
== UCNV_CLOSE
) {
1405 log_verbose("TSCC_fromU: Context %p:%d closing\n", ctx
, ctx
->serial
);
1406 ctx
->wasClosed
= TRUE
;
1410 static void TSCC_toU(const void *context
,
1411 UConverterToUnicodeArgs
*toUArgs
,
1412 const char* codeUnits
,
1414 UConverterCallbackReason reason
,
1417 TSCCContext
*ctx
= (TSCCContext
*)context
;
1418 UConverterToUCallback junkFrom
;
1420 log_verbose("TSCC_toU: Context %p:%d called, reason %d on cnv %p\n", ctx
, ctx
->serial
, reason
, toUArgs
->converter
);
1422 if(ctx
->magic
!= 0xC0FFEE) {
1423 log_err("TSCC_toU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx
,ctx
->serial
, ctx
->magic
);
1427 if(reason
== UCNV_CLONE
) {
1428 UErrorCode subErr
= U_ZERO_ERROR
;
1429 TSCCContext
*newCtx
;
1430 TSCCContext
*junkCtx
;
1431 TSCCContext
**pjunkCtx
= &junkCtx
;
1434 log_verbose("TSCC_toU: cloning..\n");
1435 newCtx
= TSCC_clone(ctx
);
1437 if(newCtx
== NULL
) {
1438 log_err("TSCC_toU: internal clone failed on %p\n", ctx
);
1442 ucnv_getToUCallBack(toUArgs
->converter
, &junkFrom
, (const void**)pjunkCtx
);
1443 ucnv_setToUCallBack(toUArgs
->converter
, junkFrom
, newCtx
, NULL
, NULL
, &subErr
);
1445 if(U_FAILURE(subErr
)) {
1450 if(reason
== UCNV_CLOSE
) {
1451 log_verbose("TSCC_toU: Context %p:%d closing\n", ctx
, ctx
->serial
);
1452 ctx
->wasClosed
= TRUE
;
1456 static void TSCC_init(TSCCContext
*q
)
1458 q
->magic
= 0xC0FFEE;
1459 q
->serial
= TSCC_nextSerial();
1463 static void TSCC_print_log(TSCCContext
*q
, const char *name
)
1466 log_verbose("TSCContext: %s is NULL!!\n", name
);
1468 if(q
->magic
!= 0xC0FFEE) {
1469 log_err("TSCCContext: %p:%d's magic is %x, supposed to be 0xC0FFEE\n",
1470 q
,q
->serial
, q
->magic
);
1472 log_verbose("TSCCContext %p:%d=%s - magic %x, %s\n",
1473 q
, q
->serial
, name
, q
->magic
, q
->wasClosed
?"CLOSED":"open");
1477 static void TestConvertSafeCloneCallback()
1479 UErrorCode err
= U_ZERO_ERROR
;
1480 TSCCContext from1
, to1
;
1481 TSCCContext
*from2
, *from3
, *to2
, *to3
;
1482 TSCCContext
**pfrom2
= &from2
, **pfrom3
= &from3
, **pto2
= &to2
, **pto3
= &to3
;
1484 int32_t hunkSize
= 8192;
1485 UConverterFromUCallback junkFrom
;
1486 UConverterToUCallback junkTo
;
1487 UConverter
*conv1
, *conv2
= NULL
;
1489 conv1
= ucnv_open("iso-8859-3", &err
);
1491 if(U_FAILURE(err
)) {
1492 log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err
));
1496 log_verbose("Opened conv1=%p\n", conv1
);
1501 TSCC_print_log(&from1
, "from1");
1502 TSCC_print_log(&to1
, "to1");
1504 ucnv_setFromUCallBack(conv1
, TSCC_fromU
, &from1
, NULL
, NULL
, &err
);
1505 log_verbose("Set from1 on conv1\n");
1506 TSCC_print_log(&from1
, "from1");
1508 ucnv_setToUCallBack(conv1
, TSCC_toU
, &to1
, NULL
, NULL
, &err
);
1509 log_verbose("Set to1 on conv1\n");
1510 TSCC_print_log(&to1
, "to1");
1512 conv2
= ucnv_safeClone(conv1
, hunk
, &hunkSize
, &err
);
1513 if(U_FAILURE(err
)) {
1514 log_err("safeClone failed: %s\n", u_errorName(err
));
1517 log_verbose("Cloned to conv2=%p.\n", conv2
);
1519 /********** from *********************/
1520 ucnv_getFromUCallBack(conv2
, &junkFrom
, (const void**)pfrom2
);
1521 ucnv_getFromUCallBack(conv1
, &junkFrom
, (const void**)pfrom3
);
1523 TSCC_print_log(from2
, "from2");
1524 TSCC_print_log(from3
, "from3(==from1)");
1527 log_err("FAIL! from2 is null \n");
1532 log_err("FAIL! from3 is null \n");
1536 if(from3
!= (&from1
) ) {
1537 log_err("FAIL! conv1's FROM context changed!\n");
1540 if(from2
== (&from1
) ) {
1541 log_err("FAIL! conv1's FROM context is the same as conv2's!\n");
1544 if(from1
.wasClosed
) {
1545 log_err("FAIL! from1 is closed \n");
1548 if(from2
->wasClosed
) {
1549 log_err("FAIL! from2 was closed\n");
1552 /********** to *********************/
1553 ucnv_getToUCallBack(conv2
, &junkTo
, (const void**)pto2
);
1554 ucnv_getToUCallBack(conv1
, &junkTo
, (const void**)pto3
);
1556 TSCC_print_log(to2
, "to2");
1557 TSCC_print_log(to3
, "to3(==to1)");
1560 log_err("FAIL! to2 is null \n");
1565 log_err("FAIL! to3 is null \n");
1569 if(to3
!= (&to1
) ) {
1570 log_err("FAIL! conv1's TO context changed!\n");
1573 if(to2
== (&to1
) ) {
1574 log_err("FAIL! conv1's TO context is the same as conv2's!\n");
1578 log_err("FAIL! to1 is closed \n");
1581 if(to2
->wasClosed
) {
1582 log_err("FAIL! to2 was closed\n");
1585 /*************************************/
1588 log_verbose("ucnv_closed (conv1)\n");
1589 TSCC_print_log(&from1
, "from1");
1590 TSCC_print_log(from2
, "from2");
1591 TSCC_print_log(&to1
, "to1");
1592 TSCC_print_log(to2
, "to2");
1594 if(from1
.wasClosed
== FALSE
) {
1595 log_err("FAIL! from1 is NOT closed \n");
1598 if(from2
->wasClosed
) {
1599 log_err("FAIL! from2 was closed\n");
1602 if(to1
.wasClosed
== FALSE
) {
1603 log_err("FAIL! to1 is NOT closed \n");
1606 if(to2
->wasClosed
) {
1607 log_err("FAIL! to2 was closed\n");
1611 log_verbose("ucnv_closed (conv2)\n");
1613 TSCC_print_log(&from1
, "from1");
1614 TSCC_print_log(from2
, "from2");
1616 if(from1
.wasClosed
== FALSE
) {
1617 log_err("FAIL! from1 is NOT closed \n");
1620 if(from2
->wasClosed
== FALSE
) {
1621 log_err("FAIL! from2 was NOT closed\n");
1624 TSCC_print_log(&to1
, "to1");
1625 TSCC_print_log(to2
, "to2");
1627 if(to1
.wasClosed
== FALSE
) {
1628 log_err("FAIL! to1 is NOT closed \n");
1631 if(to2
->wasClosed
== FALSE
) {
1632 log_err("FAIL! to2 was NOT closed\n");
1636 free(to2
); /* to1 is stack based */
1638 if(from2
!= (&from1
)) {
1639 free(from2
); /* from1 is stack based */
1645 containsAnyOtherByte(uint8_t *p
, int32_t length
, uint8_t b
) {
1656 static void TestConvertSafeClone()
1658 /* one 'regular' & all the 'private stateful' converters */
1659 static const char *const names
[] = {
1660 #if !UCONFIG_NO_LEGACY_CONVERSION
1662 "ISO_2022,locale=zh,version=1",
1665 #if !UCONFIG_NO_LEGACY_CONVERSION
1669 "ISO_2022,locale=kr,version=1",
1670 "ISO_2022,locale=jp,version=2",
1674 #if !UCONFIG_NO_LEGACY_CONVERSION
1675 "IMAP-mailbox-name",
1682 /* store the actual sizes of each converter */
1683 int32_t actualSizes
[LENGTHOF(names
)];
1685 static const int32_t bufferSizes
[] = {
1686 U_CNV_SAFECLONE_BUFFERSIZE
,
1687 (int32_t)(3*sizeof(UConverter
))/2, /* 1.5*sizeof(UConverter) */
1688 (int32_t)sizeof(UConverter
)/2 /* 0.5*sizeof(UConverter) */
1691 char charBuffer
[21]; /* Leave at an odd number for alignment testing */
1692 uint8_t buffer
[3] [U_CNV_SAFECLONE_BUFFERSIZE
];
1693 int32_t bufferSize
, maxBufferSize
;
1694 const char *maxName
;
1695 UConverter
* cnv
, *cnv2
;
1699 const char *pConstCharBuffer
;
1700 const char *charBufferLimit
= charBuffer
+ sizeof(charBuffer
)/sizeof(*charBuffer
);
1701 UChar uniBuffer
[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */
1702 UChar uniCharBuffer
[20];
1703 char charSourceBuffer
[] = { 0x1b, 0x24, 0x42 };
1704 const char *pCharSource
= charSourceBuffer
;
1705 const char *pCharSourceLimit
= charSourceBuffer
+ sizeof(charSourceBuffer
);
1706 UChar
*pUCharTarget
= uniCharBuffer
;
1707 UChar
*pUCharTargetLimit
= uniCharBuffer
+ sizeof(uniCharBuffer
)/sizeof(*uniCharBuffer
);
1708 const UChar
* pUniBuffer
;
1709 const UChar
*uniBufferLimit
= uniBuffer
+ sizeof(uniBuffer
)/sizeof(*uniBuffer
);
1713 cnv
= ucnv_open(names
[0], &err
);
1714 if(U_SUCCESS(err
)) {
1715 /* Check the various error & informational states: */
1717 /* Null status - just returns NULL */
1718 bufferSize
= U_CNV_SAFECLONE_BUFFERSIZE
;
1719 if (NULL
!= ucnv_safeClone(cnv
, buffer
[0], &bufferSize
, NULL
))
1721 log_err("FAIL: Cloned converter failed to deal correctly with null status\n");
1723 /* error status - should return 0 & keep error the same */
1724 err
= U_MEMORY_ALLOCATION_ERROR
;
1725 if (NULL
!= ucnv_safeClone(cnv
, buffer
[0], &bufferSize
, &err
) || err
!= U_MEMORY_ALLOCATION_ERROR
)
1727 log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n");
1731 /* Null buffer size pointer is ok */
1732 if (NULL
== (cnv2
= ucnv_safeClone(cnv
, buffer
[0], NULL
, &err
)) || U_FAILURE(err
))
1734 log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n");
1739 /* buffer size pointer is 0 - fill in pbufferSize with a size */
1741 if (NULL
!= ucnv_safeClone(cnv
, buffer
[0], &bufferSize
, &err
) || U_FAILURE(err
) || bufferSize
<= 0)
1743 log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n");
1745 /* Verify our define is large enough */
1746 if (U_CNV_SAFECLONE_BUFFERSIZE
< bufferSize
)
1748 log_err("FAIL: Pre-calculated buffer size is too small\n");
1750 /* Verify we can use this run-time calculated size */
1751 if (NULL
== (cnv2
= ucnv_safeClone(cnv
, buffer
[0], &bufferSize
, &err
)) || U_FAILURE(err
))
1753 log_err("FAIL: Converter can't be cloned with run-time size\n");
1759 /* size one byte too small - should allocate & let us know */
1761 if (NULL
== (cnv2
= ucnv_safeClone(cnv
, NULL
, &bufferSize
, &err
)) || err
!= U_SAFECLONE_ALLOCATED_WARNING
)
1763 log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n");
1770 bufferSize
= U_CNV_SAFECLONE_BUFFERSIZE
;
1772 /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */
1773 if (NULL
== (cnv2
= ucnv_safeClone(cnv
, NULL
, &bufferSize
, &err
)) || err
!= U_SAFECLONE_ALLOCATED_WARNING
)
1775 log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n");
1783 /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */
1784 if (NULL
!= ucnv_safeClone(NULL
, buffer
[0], &bufferSize
, &err
) || err
!= U_ILLEGAL_ARGUMENT_ERROR
)
1786 log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n");
1795 /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/
1797 for(j
= 0; j
< LENGTHOF(bufferSizes
); ++j
) {
1798 for (idx
= 0; idx
< LENGTHOF(names
); idx
++)
1801 cnv
= ucnv_open(names
[idx
], &err
);
1802 if(U_FAILURE(err
)) {
1803 log_data_err("ucnv_open(\"%s\") failed - %s\n", names
[idx
], u_errorName(err
));
1808 /* preflight to get maxBufferSize */
1809 actualSizes
[idx
] = 0;
1810 ucnv_safeClone(cnv
, NULL
, &actualSizes
[idx
], &err
);
1811 if(actualSizes
[idx
] > maxBufferSize
) {
1812 maxBufferSize
= actualSizes
[idx
];
1813 maxName
= names
[idx
];
1817 memset(buffer
, 0xaa, sizeof(buffer
));
1819 bufferSize
= bufferSizes
[j
];
1820 cnv2
= ucnv_safeClone(cnv
, buffer
[1], &bufferSize
, &err
);
1822 /* close the original immediately to make sure that the clone works by itself */
1825 if( actualSizes
[idx
] <= (bufferSizes
[j
] - (int32_t)sizeof(UAlignedMemory
)) &&
1826 err
== U_SAFECLONE_ALLOCATED_WARNING
1828 log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names
[idx
]);
1831 /* check if the clone function overwrote any bytes that it is not supposed to touch */
1832 if(bufferSize
<= bufferSizes
[j
]) {
1833 /* used the stack buffer */
1834 if( containsAnyOtherByte(buffer
[0], (int32_t)sizeof(buffer
[0]), 0xaa) ||
1835 containsAnyOtherByte(buffer
[1]+bufferSize
, (int32_t)(sizeof(buffer
)-(sizeof(buffer
[0])+bufferSize
)), 0xaa)
1837 log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n",
1838 names
[idx
], bufferSize
, bufferSizes
[j
]);
1841 /* heap-allocated the clone */
1842 if(containsAnyOtherByte(buffer
[0], (int32_t)sizeof(buffer
), 0xaa)) {
1843 log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n",
1844 names
[idx
], bufferSize
, bufferSizes
[j
]);
1848 pCharBuffer
= charBuffer
;
1849 pUniBuffer
= uniBuffer
;
1851 ucnv_fromUnicode(cnv2
,
1860 log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err
));
1862 ucnv_toUnicode(cnv2
,
1873 log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err
));
1876 pConstCharBuffer
= charBuffer
;
1877 if (uniBuffer
[0] != ucnv_getNextUChar(cnv2
, &pConstCharBuffer
, pCharBuffer
, &err
))
1879 log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err
));
1885 log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1886 sizeof(UConverter
), maxBufferSize
, maxName
, (int)U_CNV_SAFECLONE_BUFFERSIZE
);
1887 if(maxBufferSize
> U_CNV_SAFECLONE_BUFFERSIZE
) {
1888 log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1889 maxBufferSize
, maxName
, (int)U_CNV_SAFECLONE_BUFFERSIZE
);
1893 static void TestCCSID() {
1894 #if !UCONFIG_NO_LEGACY_CONVERSION
1896 UErrorCode errorCode
;
1897 int32_t ccsids
[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 };
1900 for(i
=0; i
<(int32_t)(sizeof(ccsids
)/sizeof(int32_t)); ++i
) {
1903 errorCode
=U_ZERO_ERROR
;
1904 cnv
=ucnv_openCCSID(ccsid
, UCNV_IBM
, &errorCode
);
1905 if(U_FAILURE(errorCode
)) {
1906 log_data_err("error: ucnv_openCCSID(%ld) failed (%s)\n", ccsid
, u_errorName(errorCode
));
1910 if(ccsid
!=ucnv_getCCSID(cnv
, &errorCode
)) {
1911 log_err("error: ucnv_getCCSID(ucnv_openCCSID(%ld))=%ld\n", ccsid
, ucnv_getCCSID(cnv
, &errorCode
));
1914 /* skip gb18030(ccsid 1392) */
1915 if(ccsid
!= 1392 && UCNV_IBM
!=ucnv_getPlatform(cnv
, &errorCode
)) {
1916 log_err("error: ucnv_getPlatform(ucnv_openCCSID(%ld))=%ld!=UCNV_IBM\n", ccsid
, ucnv_getPlatform(cnv
, &errorCode
));
1924 /* jitterbug 932: ucnv_convert() bugs --------------------------------------- */
1926 /* CHUNK_SIZE defined in common\ucnv.c: */
1927 #define CHUNK_SIZE 1024
1929 static void bug1(void);
1930 static void bug2(void);
1931 static void bug3(void);
1936 bug1(); /* Unicode intermediate buffer straddle bug */
1937 bug2(); /* pre-flighting size incorrect caused by simple overflow */
1938 bug3(); /* pre-flighting size incorrect caused by expansion overflow */
1942 * jitterbug 932: test chunking boundary conditions in
1944 int32_t ucnv_convert(const char *toConverterName,
1945 const char *fromConverterName,
1952 * See discussions on the icu mailing list in
1953 * 2001-April with the subject "converter 'flush' question".
1955 * Bug report and test code provided by Edward J. Batutis.
1959 #if !UCONFIG_NO_LEGACY_CONVERSION
1960 char char_in
[CHUNK_SIZE
+32];
1961 char char_out
[CHUNK_SIZE
*2];
1963 /* GB 18030 equivalent of U+10000 is 90308130 */
1964 static const char test_seq
[]={ (char)0x90u
, 0x30, (char)0x81u
, 0x30 };
1966 UErrorCode err
= U_ZERO_ERROR
;
1967 int32_t i
, test_seq_len
= sizeof(test_seq
);
1970 * causes straddle bug in Unicode intermediate buffer by sliding the test sequence forward
1971 * until the straddle bug appears. I didn't want to hard-code everything so this test could
1972 * be expanded - however this is the only type of straddle bug I can think of at the moment -
1973 * a high surrogate in the last position of the Unicode intermediate buffer. Apparently no
1974 * other Unicode sequences cause a bug since combining sequences are not supported by the
1978 for (i
= test_seq_len
; i
>= 0; i
--) {
1979 /* put character sequence into input buffer */
1980 memset(char_in
, 0x61, sizeof(char_in
)); /* GB 18030 'a' */
1981 memcpy(char_in
+ (CHUNK_SIZE
- i
), test_seq
, test_seq_len
);
1983 /* do the conversion */
1984 ucnv_convert("us-ascii", /* out */
1993 if (err
== U_TRUNCATED_CHAR_FOUND
) {
1994 /* this happens when surrogate pair straddles the intermediate buffer in
1995 * T_UConverter_fromCodepageToCodepage */
1996 log_err("error j932 bug 1: expected success, got U_TRUNCATED_CHAR_FOUND\n");
2002 /* bug2: pre-flighting loop bug: simple overflow causes bug */
2005 /* US-ASCII "1234567890" */
2006 static const char source
[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 };
2007 static const char sourceUTF8
[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (char)0xef, (char)0x80, (char)0x80 };
2008 static const char sourceUTF32
[]={ 0x00, 0x00, 0x00, 0x30,
2009 0x00, 0x00, 0x00, 0x31,
2010 0x00, 0x00, 0x00, 0x32,
2011 0x00, 0x00, 0x00, 0x33,
2012 0x00, 0x00, 0x00, 0x34,
2013 0x00, 0x00, 0x00, 0x35,
2014 0x00, 0x00, 0x00, 0x36,
2015 0x00, 0x00, 0x00, 0x37,
2016 0x00, 0x00, 0x00, 0x38,
2017 0x00, 0x00, (char)0xf0, 0x00};
2018 static char target
[5];
2020 UErrorCode err
= U_ZERO_ERROR
;
2023 /* do the conversion */
2024 size
= ucnv_convert("iso-8859-1", /* out */
2025 "us-ascii", /* in */
2033 /* bug2: size is 5, should be 10 */
2034 log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting size %d instead of 10\n", size
);
2038 /* do the conversion */
2039 size
= ucnv_convert("UTF-32BE", /* out */
2048 /* bug2: size is 5, should be 32 */
2049 log_err("error j932 bug 2 UTF-8->UTF-32BE: got preflighting size %d instead of 32\n", size
);
2053 /* do the conversion */
2054 size
= ucnv_convert("UTF-8", /* out */
2055 "UTF-32BE", /* in */
2059 sizeof(sourceUTF32
),
2063 /* bug2: size is 5, should be 12 */
2064 log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size
);
2069 * bug3: when the characters expand going from source to target codepage
2070 * you get bug3 in addition to bug2
2074 #if !UCONFIG_NO_LEGACY_CONVERSION
2075 char char_in
[CHUNK_SIZE
*4];
2077 UErrorCode err
= U_ZERO_ERROR
;
2081 * first get the buggy size from bug2 then
2082 * compare it to buggy size with an expansion
2084 memset(char_in
, 0x61, sizeof(char_in
)); /* US-ASCII 'a' */
2086 /* do the conversion */
2087 size
= ucnv_convert("lmbcs", /* out */
2088 "us-ascii", /* in */
2095 if ( size
!= sizeof(char_in
) ) {
2097 * bug2: size is 0x2805 (CHUNK_SIZE*2+5 - maybe 5 is the size of the overflow buffer
2098 * in the converter?), should be CHUNK_SIZE*4
2100 * Markus 2001-05-18: 5 is the size of our target[] here, ucnv_convert() did not reset targetSize...
2102 log_data_err("error j932 bug 2/3a: expected preflighting size 0x%04x, got 0x%04x\n", sizeof(char_in
), size
);
2106 * now do the conversion with expansion
2107 * ascii 0x08 expands to 0x0F 0x28 in lmbcs
2109 memset(char_in
, 8, sizeof(char_in
));
2112 /* do the conversion */
2113 size
= ucnv_convert("lmbcs", /* out */
2114 "us-ascii", /* in */
2121 /* expect 2X expansion */
2122 if ( size
!= sizeof(char_in
) * 2 ) {
2125 * bug2 would lead us to expect 0x2805, but it isn't that either, it is 0x3c05:
2127 log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in
) * 2, size
);
2133 convertExStreaming(UConverter
*srcCnv
, UConverter
*targetCnv
,
2134 const char *src
, int32_t srcLength
,
2135 const char *expectTarget
, int32_t expectTargetLength
,
2137 const char *testName
,
2138 UErrorCode expectCode
) {
2139 UChar pivotBuffer
[CHUNK_SIZE
];
2140 UChar
*pivotSource
, *pivotTarget
;
2141 const UChar
*pivotLimit
;
2143 char targetBuffer
[CHUNK_SIZE
];
2145 const char *srcLimit
, *finalSrcLimit
, *targetLimit
;
2147 int32_t targetLength
;
2151 UErrorCode errorCode
;
2154 if(chunkSize
>CHUNK_SIZE
) {
2155 chunkSize
=CHUNK_SIZE
;
2158 pivotSource
=pivotTarget
=pivotBuffer
;
2159 pivotLimit
=pivotBuffer
+chunkSize
;
2161 finalSrcLimit
=src
+srcLength
;
2162 target
=targetBuffer
;
2163 targetLimit
=targetBuffer
+chunkSize
;
2165 ucnv_resetToUnicode(srcCnv
);
2166 ucnv_resetFromUnicode(targetCnv
);
2168 errorCode
=U_ZERO_ERROR
;
2171 /* convert, streaming-style (both converters and pivot keep state) */
2173 /* for testing, give ucnv_convertEx() at most <chunkSize> input/pivot/output units at a time */
2174 if(src
+chunkSize
<=finalSrcLimit
) {
2175 srcLimit
=src
+chunkSize
;
2177 srcLimit
=finalSrcLimit
;
2179 ucnv_convertEx(targetCnv
, srcCnv
,
2180 &target
, targetLimit
,
2182 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotLimit
,
2183 FALSE
, flush
, &errorCode
);
2184 targetLength
=(int32_t)(target
-targetBuffer
);
2185 if(target
>targetLimit
) {
2186 log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n",
2187 testName
, chunkSize
, target
, targetLimit
);
2188 break; /* TODO: major problem! */
2190 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2191 /* continue converting another chunk */
2192 errorCode
=U_ZERO_ERROR
;
2193 if(targetLength
+chunkSize
<=sizeof(targetBuffer
)) {
2194 targetLimit
=target
+chunkSize
;
2196 targetLimit
=targetBuffer
+sizeof(targetBuffer
);
2198 } else if(U_FAILURE(errorCode
)) {
2204 } else if(src
==finalSrcLimit
&& pivotSource
==pivotTarget
) {
2205 /* all consumed, now flush without input (separate from conversion for testing) */
2210 if(!(errorCode
==expectCode
|| (expectCode
==U_ZERO_ERROR
&& errorCode
==U_STRING_NOT_TERMINATED_WARNING
))) {
2211 log_err("ucnv_convertEx(%s) chunk[%d] results in %s instead of %s\n",
2212 testName
, chunkSize
, u_errorName(errorCode
), u_errorName(expectCode
));
2213 } else if(targetLength
!=expectTargetLength
) {
2214 log_err("ucnv_convertEx(%s) chunk[%d] writes %d bytes instead of %d\n",
2215 testName
, chunkSize
, targetLength
, expectTargetLength
);
2216 } else if(memcmp(targetBuffer
, expectTarget
, targetLength
)!=0) {
2217 log_err("ucnv_convertEx(%s) chunk[%d] writes different bytes than expected\n",
2218 testName
, chunkSize
);
2223 convertExMultiStreaming(UConverter
*srcCnv
, UConverter
*targetCnv
,
2224 const char *src
, int32_t srcLength
,
2225 const char *expectTarget
, int32_t expectTargetLength
,
2226 const char *testName
,
2227 UErrorCode expectCode
) {
2228 convertExStreaming(srcCnv
, targetCnv
,
2230 expectTarget
, expectTargetLength
,
2231 1, testName
, expectCode
);
2232 convertExStreaming(srcCnv
, targetCnv
,
2234 expectTarget
, expectTargetLength
,
2235 3, testName
, expectCode
);
2236 convertExStreaming(srcCnv
, targetCnv
,
2238 expectTarget
, expectTargetLength
,
2239 7, testName
, expectCode
);
2242 static void TestConvertEx() {
2243 #if !UCONFIG_NO_LEGACY_CONVERSION
2244 static const uint8_t
2246 /* 4e00 30a1 ff61 0410 */
2247 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2250 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2254 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2255 * SUB, SUB, 0x40, SUB, SUB, 0x40
2257 0xfc, 0xfc, 0xfc, 0xfc, 0x40, 0xfc, 0xfc, 0xfc, 0xfc, 0x40
2260 char srcBuffer
[100], targetBuffer
[100];
2265 UChar pivotBuffer
[100];
2266 UChar
*pivotSource
, *pivotTarget
;
2268 UConverter
*cnv1
, *cnv2
;
2269 UErrorCode errorCode
;
2271 errorCode
=U_ZERO_ERROR
;
2272 cnv1
=ucnv_open("UTF-8", &errorCode
);
2273 if(U_FAILURE(errorCode
)) {
2274 log_err("unable to open a UTF-8 converter - %s\n", u_errorName(errorCode
));
2278 cnv2
=ucnv_open("Shift-JIS", &errorCode
);
2279 if(U_FAILURE(errorCode
)) {
2280 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode
));
2285 /* test ucnv_convertEx() with streaming conversion style */
2286 convertExMultiStreaming(cnv1
, cnv2
,
2287 (const char *)utf8
, sizeof(utf8
), (const char *)shiftJIS
, sizeof(shiftJIS
),
2288 "UTF-8 -> Shift-JIS", U_ZERO_ERROR
);
2290 convertExMultiStreaming(cnv2
, cnv1
,
2291 (const char *)shiftJIS
, sizeof(shiftJIS
), (const char *)utf8
, sizeof(utf8
),
2292 "Shift-JIS -> UTF-8", U_ZERO_ERROR
);
2294 /* U_ZERO_ERROR because by default the SUB callbacks are set */
2295 convertExMultiStreaming(cnv1
, cnv2
,
2296 (const char *)shiftJIS
, sizeof(shiftJIS
), (const char *)errorTarget
, sizeof(errorTarget
),
2297 "shiftJIS[] UTF-8 -> Shift-JIS", U_ZERO_ERROR
);
2299 /* test some simple conversions */
2301 /* NUL-terminated source and target */
2302 errorCode
=U_STRING_NOT_TERMINATED_WARNING
;
2303 memcpy(srcBuffer
, utf8
, sizeof(utf8
));
2304 srcBuffer
[sizeof(utf8
)]=0;
2306 target
=targetBuffer
;
2307 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2308 NULL
, NULL
, NULL
, NULL
, TRUE
, TRUE
, &errorCode
);
2309 if( errorCode
!=U_ZERO_ERROR
||
2310 target
-targetBuffer
!=sizeof(shiftJIS
) ||
2312 memcmp(targetBuffer
, shiftJIS
, sizeof(shiftJIS
))!=0
2314 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s - writes %d bytes, expect %d\n",
2315 u_errorName(errorCode
), target
-targetBuffer
, sizeof(shiftJIS
));
2318 /* NUL-terminated source and U_STRING_NOT_TERMINATED_WARNING */
2319 errorCode
=U_AMBIGUOUS_ALIAS_WARNING
;
2320 memset(targetBuffer
, 0xff, sizeof(targetBuffer
));
2322 target
=targetBuffer
;
2323 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(shiftJIS
), &src
, NULL
,
2324 NULL
, NULL
, NULL
, NULL
, TRUE
, TRUE
, &errorCode
);
2325 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
||
2326 target
-targetBuffer
!=sizeof(shiftJIS
) ||
2327 *target
!=(char)0xff ||
2328 memcmp(targetBuffer
, shiftJIS
, sizeof(shiftJIS
))!=0
2330 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s, expect U_STRING_NOT_TERMINATED_WARNING - writes %d bytes, expect %d\n",
2331 u_errorName(errorCode
), target
-targetBuffer
, sizeof(shiftJIS
));
2335 errorCode
=U_MESSAGE_PARSE_ERROR
;
2337 target
=targetBuffer
;
2338 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2339 NULL
, NULL
, NULL
, NULL
, TRUE
, TRUE
, &errorCode
);
2340 if(errorCode
!=U_MESSAGE_PARSE_ERROR
) {
2341 log_err("ucnv_convertEx(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode
));
2344 /* pivotLimit==pivotStart */
2345 errorCode
=U_ZERO_ERROR
;
2346 pivotSource
=pivotTarget
=pivotBuffer
;
2347 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2348 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
, TRUE
, TRUE
, &errorCode
);
2349 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2350 log_err("ucnv_convertEx(pivotLimit==pivotStart) sets %s\n", u_errorName(errorCode
));
2353 /* *pivotSource==NULL */
2354 errorCode
=U_ZERO_ERROR
;
2356 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2357 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
+1, TRUE
, TRUE
, &errorCode
);
2358 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2359 log_err("ucnv_convertEx(*pivotSource==NULL) sets %s\n", u_errorName(errorCode
));
2363 errorCode
=U_ZERO_ERROR
;
2365 pivotSource
=pivotBuffer
;
2366 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2367 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
+1, TRUE
, TRUE
, &errorCode
);
2368 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2369 log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode
));
2372 /* streaming conversion without a pivot buffer */
2373 errorCode
=U_ZERO_ERROR
;
2375 pivotSource
=pivotBuffer
;
2376 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2377 NULL
, &pivotSource
, &pivotTarget
, pivotBuffer
+1, TRUE
, FALSE
, &errorCode
);
2378 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2379 log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode
));
2387 /* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */
2388 static const char *const badUTF8
[]={
2392 /* truncated multi-byte sequences */
2429 "\xfc\x80\x80\x80\x80",
2431 /* complete sequences but non-shortest forms or out of range etc. */
2437 "\xf8\x80\x80\x80\x80",
2438 "\xfc\x80\x80\x80\x80\x80",
2443 #define ARG_CHAR_ARR_SIZE 8
2445 /* get some character that can be converted and convert it */
2446 static UBool
getTestChar(UConverter
*cnv
, const char *converterName
,
2447 char charUTF8
[4], int32_t *pCharUTF8Length
,
2448 char char0
[ARG_CHAR_ARR_SIZE
], int32_t *pChar0Length
,
2449 char char1
[ARG_CHAR_ARR_SIZE
], int32_t *pChar1Length
) {
2450 UChar utf16
[U16_MAX_LENGTH
];
2451 int32_t utf16Length
;
2453 const UChar
*utf16Source
;
2458 UErrorCode errorCode
;
2460 errorCode
=U_ZERO_ERROR
;
2461 set
=uset_open(1, 0);
2462 ucnv_getUnicodeSet(cnv
, set
, UCNV_ROUNDTRIP_SET
, &errorCode
);
2463 c
=uset_charAt(set
, uset_size(set
)/2);
2467 U16_APPEND_UNSAFE(utf16
, utf16Length
, c
);
2469 U8_APPEND_UNSAFE(charUTF8
, *pCharUTF8Length
, c
);
2473 ucnv_fromUnicode(cnv
,
2474 &target
, char0
+ARG_CHAR_ARR_SIZE
,
2475 &utf16Source
, utf16
+utf16Length
,
2476 NULL
, FALSE
, &errorCode
);
2477 *pChar0Length
=(int32_t)(target
-char0
);
2481 ucnv_fromUnicode(cnv
,
2482 &target
, char1
+ARG_CHAR_ARR_SIZE
,
2483 &utf16Source
, utf16
+utf16Length
,
2484 NULL
, FALSE
, &errorCode
);
2485 *pChar1Length
=(int32_t)(target
-char1
);
2487 if(U_FAILURE(errorCode
)) {
2488 log_err("unable to get test character for %s - %s\n", converterName
, u_errorName(errorCode
));
2494 static void testFromTruncatedUTF8(UConverter
*utf8Cnv
, UConverter
*cnv
, const char *converterName
,
2495 char charUTF8
[4], int32_t charUTF8Length
,
2496 char char0
[8], int32_t char0Length
,
2497 char char1
[8], int32_t char1Length
) {
2502 int32_t outputLength
;
2504 char invalidChars
[8];
2505 int8_t invalidLength
;
2510 UChar pivotBuffer
[8];
2511 UChar
*pivotSource
, *pivotTarget
;
2513 UErrorCode errorCode
;
2516 /* test truncated sequences */
2517 errorCode
=U_ZERO_ERROR
;
2518 ucnv_setToUCallBack(utf8Cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2520 memcpy(utf8
, charUTF8
, charUTF8Length
);
2522 for(i
=0; i
<LENGTHOF(badUTF8
); ++i
) {
2523 /* truncated sequence? */
2524 int32_t length
=strlen(badUTF8
[i
]);
2525 if(length
>=(1+U8_COUNT_TRAIL_BYTES(badUTF8
[i
][0]))) {
2529 /* assemble a string with the test character and the truncated sequence */
2530 memcpy(utf8
+charUTF8Length
, badUTF8
[i
], length
);
2531 utf8Length
=charUTF8Length
+length
;
2533 /* convert and check the invalidChars */
2536 pivotSource
=pivotTarget
=pivotBuffer
;
2537 errorCode
=U_ZERO_ERROR
;
2538 ucnv_convertEx(cnv
, utf8Cnv
,
2539 &target
, output
+sizeof(output
),
2540 &source
, utf8
+utf8Length
,
2541 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
+LENGTHOF(pivotBuffer
),
2542 TRUE
, TRUE
, /* reset & flush */
2544 outputLength
=(int32_t)(target
-output
);
2545 (void)outputLength
; /* Suppress set but not used warning. */
2546 if(errorCode
!=U_TRUNCATED_CHAR_FOUND
|| pivotSource
!=pivotBuffer
) {
2547 log_err("unexpected error %s from %s badUTF8[%ld]\n", u_errorName(errorCode
), converterName
, (long)i
);
2551 errorCode
=U_ZERO_ERROR
;
2552 invalidLength
=(int8_t)sizeof(invalidChars
);
2553 ucnv_getInvalidChars(utf8Cnv
, invalidChars
, &invalidLength
, &errorCode
);
2554 if(invalidLength
!=length
|| 0!=memcmp(invalidChars
, badUTF8
[i
], length
)) {
2555 log_err("wrong invalidChars from %s badUTF8[%ld]\n", converterName
, (long)i
);
2560 static void testFromBadUTF8(UConverter
*utf8Cnv
, UConverter
*cnv
, const char *converterName
,
2561 char charUTF8
[4], int32_t charUTF8Length
,
2562 char char0
[8], int32_t char0Length
,
2563 char char1
[8], int32_t char1Length
) {
2564 char utf8
[600], expect
[600];
2565 int32_t utf8Length
, expectLength
;
2569 UErrorCode errorCode
;
2572 errorCode
=U_ZERO_ERROR
;
2573 ucnv_setToUCallBack(utf8Cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, NULL
, NULL
, &errorCode
);
2576 * assemble an input string with the test character between each
2578 * and an expected string with repeated test character output
2580 memcpy(utf8
, charUTF8
, charUTF8Length
);
2581 utf8Length
=charUTF8Length
;
2583 memcpy(expect
, char0
, char0Length
);
2584 expectLength
=char0Length
;
2586 for(i
=0; i
<LENGTHOF(badUTF8
); ++i
) {
2587 int32_t length
=strlen(badUTF8
[i
]);
2588 memcpy(utf8
+utf8Length
, badUTF8
[i
], length
);
2591 memcpy(utf8
+utf8Length
, charUTF8
, charUTF8Length
);
2592 utf8Length
+=charUTF8Length
;
2594 memcpy(expect
+expectLength
, char1
, char1Length
);
2595 expectLength
+=char1Length
;
2598 /* expect that each bad UTF-8 sequence is detected and skipped */
2599 strcpy(testName
, "from bad UTF-8 to ");
2600 strcat(testName
, converterName
);
2602 convertExMultiStreaming(utf8Cnv
, cnv
,
2604 expect
, expectLength
,
2609 /* Test illegal UTF-8 input. */
2610 static void TestConvertExFromUTF8() {
2611 static const char *const converterNames
[]={
2612 #if !UCONFIG_NO_LEGACY_CONVERSION
2621 UConverter
*utf8Cnv
, *cnv
;
2622 UErrorCode errorCode
;
2625 /* fromUnicode versions of some character, from initial state and later */
2626 char charUTF8
[4], char0
[8], char1
[8];
2627 int32_t charUTF8Length
, char0Length
, char1Length
;
2629 errorCode
=U_ZERO_ERROR
;
2630 utf8Cnv
=ucnv_open("UTF-8", &errorCode
);
2631 if(U_FAILURE(errorCode
)) {
2632 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode
));
2636 for(i
=0; i
<LENGTHOF(converterNames
); ++i
) {
2637 errorCode
=U_ZERO_ERROR
;
2638 cnv
=ucnv_open(converterNames
[i
], &errorCode
);
2639 if(U_FAILURE(errorCode
)) {
2640 log_data_err("unable to open %s converter - %s\n", converterNames
[i
], u_errorName(errorCode
));
2643 if(!getTestChar(cnv
, converterNames
[i
], charUTF8
, &charUTF8Length
, char0
, &char0Length
, char1
, &char1Length
)) {
2646 testFromTruncatedUTF8(utf8Cnv
, cnv
, converterNames
[i
], charUTF8
, charUTF8Length
, char0
, char0Length
, char1
, char1Length
);
2647 testFromBadUTF8(utf8Cnv
, cnv
, converterNames
[i
], charUTF8
, charUTF8Length
, char0
, char0Length
, char1
, char1Length
);
2650 ucnv_close(utf8Cnv
);
2653 static void TestConvertExFromUTF8_C5F0() {
2654 static const char *const converterNames
[]={
2655 #if !UCONFIG_NO_LEGACY_CONVERSION
2664 UConverter
*utf8Cnv
, *cnv
;
2665 UErrorCode errorCode
;
2668 static const char bad_utf8
[2]={ (char)0xC5, (char)0xF0 };
2669 /* Expect "��" (2x U+FFFD as decimal NCRs) */
2670 static const char twoNCRs
[16]={
2671 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B,
2672 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B
2674 static const char twoFFFD
[6]={
2675 (char)0xef, (char)0xbf, (char)0xbd,
2676 (char)0xef, (char)0xbf, (char)0xbd
2678 const char *expected
;
2679 int32_t expectedLength
;
2680 char dest
[20]; /* longer than longest expectedLength */
2685 UChar pivotBuffer
[128];
2686 UChar
*pivotSource
, *pivotTarget
;
2688 errorCode
=U_ZERO_ERROR
;
2689 utf8Cnv
=ucnv_open("UTF-8", &errorCode
);
2690 if(U_FAILURE(errorCode
)) {
2691 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode
));
2695 for(i
=0; i
<LENGTHOF(converterNames
); ++i
) {
2696 errorCode
=U_ZERO_ERROR
;
2697 cnv
=ucnv_open(converterNames
[i
], &errorCode
);
2698 ucnv_setFromUCallBack(cnv
, UCNV_FROM_U_CALLBACK_ESCAPE
, UCNV_ESCAPE_XML_DEC
,
2699 NULL
, NULL
, &errorCode
);
2700 if(U_FAILURE(errorCode
)) {
2701 log_data_err("unable to open %s converter - %s\n",
2702 converterNames
[i
], u_errorName(errorCode
));
2707 uprv_memset(dest
, 9, sizeof(dest
));
2708 if(i
==LENGTHOF(converterNames
)-1) {
2709 /* conversion to UTF-8 yields two U+FFFD directly */
2713 /* conversion to a non-Unicode charset yields two NCRs */
2720 pivotSource
=pivotTarget
=pivotBuffer
;
2723 &target
, dest
+expectedLength
,
2724 &src
, bad_utf8
+sizeof(bad_utf8
),
2725 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
+LENGTHOF(pivotBuffer
),
2726 TRUE
, TRUE
, &errorCode
);
2727 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
|| src
!=bad_utf8
+2 ||
2728 target
!=dest
+expectedLength
|| 0!=uprv_memcmp(dest
, expected
, expectedLength
) ||
2729 dest
[expectedLength
]!=9
2731 log_err("ucnv_convertEx(UTF-8 C5 F0 -> %s/decimal NCRs) failed\n", converterNames
[i
]);
2735 ucnv_close(utf8Cnv
);
2739 TestConvertAlgorithmic() {
2740 #if !UCONFIG_NO_LEGACY_CONVERSION
2741 static const uint8_t
2743 /* 4e00 30a1 ff61 0410 */
2744 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2747 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2751 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2752 * SUB, SUB, 0x40, SUB, SUB, 0x40
2754 /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/
2757 0xfe, 0xff /* BOM only, no text */
2760 0xff, 0xfe, 0, 0 /* BOM only, no text */
2763 char target
[100], utf8NUL
[100], shiftJISNUL
[100];
2766 UErrorCode errorCode
;
2770 errorCode
=U_ZERO_ERROR
;
2771 cnv
=ucnv_open("Shift-JIS", &errorCode
);
2772 if(U_FAILURE(errorCode
)) {
2773 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode
));
2778 memcpy(utf8NUL
, utf8
, sizeof(utf8
));
2779 utf8NUL
[sizeof(utf8
)]=0;
2780 memcpy(shiftJISNUL
, shiftJIS
, sizeof(shiftJIS
));
2781 shiftJISNUL
[sizeof(shiftJIS
)]=0;
2784 * The to/from algorithmic convenience functions share a common implementation,
2785 * so we need not test all permutations of them.
2788 /* length in, not terminated out */
2789 errorCode
=U_ZERO_ERROR
;
2790 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF8
, target
, sizeof(shiftJIS
), (const char *)utf8
, sizeof(utf8
), &errorCode
);
2791 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
||
2792 length
!=sizeof(shiftJIS
) ||
2793 memcmp(target
, shiftJIS
, length
)!=0
2795 log_err("ucnv_fromAlgorithmic(UTF-8 -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect %d\n",
2796 u_errorName(errorCode
), length
, sizeof(shiftJIS
));
2799 /* terminated in and out */
2800 memset(target
, 0x55, sizeof(target
));
2801 errorCode
=U_STRING_NOT_TERMINATED_WARNING
;
2802 length
=ucnv_toAlgorithmic(UCNV_UTF8
, cnv
, target
, sizeof(target
), shiftJISNUL
, -1, &errorCode
);
2803 if( errorCode
!=U_ZERO_ERROR
||
2804 length
!=sizeof(utf8
) ||
2805 memcmp(target
, utf8
, length
)!=0
2807 log_err("ucnv_toAlgorithmic(Shift-JIS -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect %d\n",
2808 u_errorName(errorCode
), length
, sizeof(shiftJIS
));
2811 /* empty string, some target buffer */
2812 errorCode
=U_STRING_NOT_TERMINATED_WARNING
;
2813 length
=ucnv_toAlgorithmic(UCNV_UTF8
, cnv
, target
, sizeof(target
), shiftJISNUL
, 0, &errorCode
);
2814 if( errorCode
!=U_ZERO_ERROR
||
2817 log_err("ucnv_toAlgorithmic(empty string -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect 0\n",
2818 u_errorName(errorCode
), length
);
2821 /* pseudo-empty string, no target buffer */
2822 errorCode
=U_ZERO_ERROR
;
2823 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF16
, target
, 0, (const char *)utf16
, 2, &errorCode
);
2824 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
||
2827 log_err("ucnv_fromAlgorithmic(UTF-16 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
2828 u_errorName(errorCode
), length
);
2831 errorCode
=U_ZERO_ERROR
;
2832 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF32
, target
, 0, (const char *)utf32
, 4, &errorCode
);
2833 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
||
2836 log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
2837 u_errorName(errorCode
), length
);
2841 errorCode
=U_MESSAGE_PARSE_ERROR
;
2842 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF16
, target
, 0, (const char *)utf16
, 2, &errorCode
);
2843 if(errorCode
!=U_MESSAGE_PARSE_ERROR
) {
2844 log_err("ucnv_fromAlgorithmic(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode
));
2848 errorCode
=U_ZERO_ERROR
;
2849 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF16
, target
, 0, NULL
, 2, &errorCode
);
2850 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2851 log_err("ucnv_fromAlgorithmic(source==NULL) sets %s\n", u_errorName(errorCode
));
2854 /* illegal alg. type */
2855 errorCode
=U_ZERO_ERROR
;
2856 length
=ucnv_fromAlgorithmic(cnv
, (UConverterType
)99, target
, 0, (const char *)utf16
, 2, &errorCode
);
2857 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2858 log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode
));
2864 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
2865 static void TestLMBCSMaxChar(void) {
2866 static const struct {
2870 /* some non-LMBCS converters - perfect test setup here */
2881 { 4, "IMAP-mailbox-name"},
2884 { 1, "windows-1256"},
2896 { 3, "ISO-2022-KR"},
2897 { 6, "ISO-2022-JP"},
2898 { 8, "ISO-2022-CN"},
2916 for (idx
= 0; idx
< LENGTHOF(converter
); idx
++) {
2917 UErrorCode status
= U_ZERO_ERROR
;
2918 UConverter
*cnv
= cnv_open(converter
[idx
].name
, &status
);
2919 if (U_FAILURE(status
)) {
2922 if (converter
[idx
].maxSize
!= ucnv_getMaxCharSize(cnv
)) {
2923 log_err("error: ucnv_getMaxCharSize(%s) expected %d, got %d\n",
2924 converter
[idx
].name
, converter
[idx
].maxSize
, ucnv_getMaxCharSize(cnv
));
2929 /* mostly test that the macro compiles */
2930 if(UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10) {
2931 log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n");
2936 static void TestJ1968(void) {
2937 UErrorCode err
= U_ZERO_ERROR
;
2939 char myConvName
[] = "My really really really really really really really really really really really"
2940 " really really really really really really really really really really really"
2941 " really really really really really really really really long converter name";
2942 UChar myConvNameU
[sizeof(myConvName
)];
2944 u_charsToUChars(myConvName
, myConvNameU
, sizeof(myConvName
));
2947 myConvNameU
[UCNV_MAX_CONVERTER_NAME_LENGTH
+1] = 0;
2948 cnv
= ucnv_openU(myConvNameU
, &err
);
2949 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2950 log_err("1U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
2954 myConvNameU
[UCNV_MAX_CONVERTER_NAME_LENGTH
] = 0;
2955 cnv
= ucnv_openU(myConvNameU
, &err
);
2956 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2957 log_err("2U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
2961 myConvNameU
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = 0;
2962 cnv
= ucnv_openU(myConvNameU
, &err
);
2963 if (cnv
|| err
!= U_FILE_ACCESS_ERROR
) {
2964 log_err("3U) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err
));
2971 cnv
= ucnv_open(myConvName
, &err
);
2972 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2973 log_err("1) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
2977 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
] = ',';
2978 cnv
= ucnv_open(myConvName
, &err
);
2979 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2980 log_err("2) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
2984 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = ',';
2985 cnv
= ucnv_open(myConvName
, &err
);
2986 if (cnv
|| err
!= U_FILE_ACCESS_ERROR
) {
2987 log_err("3) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err
));
2991 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = ',';
2992 strncpy(myConvName
+ UCNV_MAX_CONVERTER_NAME_LENGTH
, "locale=", 7);
2993 cnv
= ucnv_open(myConvName
, &err
);
2994 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2995 log_err("4) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
2998 /* The comma isn't really a part of the converter name. */
3000 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
] = 0;
3001 cnv
= ucnv_open(myConvName
, &err
);
3002 if (cnv
|| err
!= U_FILE_ACCESS_ERROR
) {
3003 log_err("5) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err
));
3007 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = ' ';
3008 cnv
= ucnv_open(myConvName
, &err
);
3009 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3010 log_err("6) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
3014 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = 0;
3015 cnv
= ucnv_open(myConvName
, &err
);
3016 if (cnv
|| err
!= U_FILE_ACCESS_ERROR
) {
3017 log_err("7) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err
));
3022 #if !UCONFIG_NO_LEGACY_CONVERSION
3024 testSwap(const char *name
, UBool swap
) {
3026 * Test Unicode text.
3027 * Contains characters that are the highest for some of the
3028 * tested conversions, to make sure that the ucnvmbcs.c code that modifies the
3029 * tables copies the entire tables.
3031 static const UChar text
[]={
3032 0x61, 0xd, 0x62, 0xa, 0x4e00, 0x3000, 0xfffd, 0xa, 0x20, 0x85, 0xff5e, 0x7a
3035 UChar uNormal
[32], uSwapped
[32];
3036 char normal
[32], swapped
[32];
3040 int32_t i
, normalLength
, swappedLength
;
3044 const char *swappedName
;
3045 UConverter
*cnv
, *swapCnv
;
3046 UErrorCode errorCode
;
3048 /* if the swap flag is FALSE, then the test encoding is not EBCDIC and must not swap */
3050 /* open both the normal and the LF/NL-swapping converters */
3051 strcpy(swapped
, name
);
3052 strcat(swapped
, UCNV_SWAP_LFNL_OPTION_STRING
);
3054 errorCode
=U_ZERO_ERROR
;
3055 swapCnv
=ucnv_open(swapped
, &errorCode
);
3056 cnv
=ucnv_open(name
, &errorCode
);
3057 if(U_FAILURE(errorCode
)) {
3058 log_data_err("TestEBCDICSwapLFNL error: unable to open %s or %s (%s)\n", name
, swapped
, u_errorName(errorCode
));
3062 /* the name must contain the swap option if and only if we expect the converter to swap */
3063 swappedName
=ucnv_getName(swapCnv
, &errorCode
);
3064 if(U_FAILURE(errorCode
)) {
3065 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl) failed (%s)\n", name
, u_errorName(errorCode
));
3069 pc
=strstr(swappedName
, UCNV_SWAP_LFNL_OPTION_STRING
);
3070 if(swap
!= (pc
!=NULL
)) {
3071 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl)=%s should (%d) contain 'swaplfnl'\n", name
, swappedName
, swap
);
3075 /* convert to EBCDIC */
3078 ucnv_fromUnicode(cnv
, &pc
, normal
+LENGTHOF(normal
), &pcu
, text
+LENGTHOF(text
), NULL
, TRUE
, &errorCode
);
3079 normalLength
=(int32_t)(pc
-normal
);
3083 ucnv_fromUnicode(swapCnv
, &pc
, swapped
+LENGTHOF(swapped
), &pcu
, text
+LENGTHOF(text
), NULL
, TRUE
, &errorCode
);
3084 swappedLength
=(int32_t)(pc
-swapped
);
3086 if(U_FAILURE(errorCode
)) {
3087 log_err("TestEBCDICSwapLFNL error converting to %s - (%s)\n", name
, u_errorName(errorCode
));
3091 /* compare EBCDIC output */
3092 if(normalLength
!=swappedLength
) {
3093 log_err("TestEBCDICSwapLFNL error converting to %s - output lengths %d vs. %d\n", name
, normalLength
, swappedLength
);
3096 for(i
=0; i
<normalLength
; ++i
) {
3097 /* swap EBCDIC LF/NL for comparison */
3102 } else if(c
==0x25) {
3108 log_err("TestEBCDICSwapLFNL error converting to %s - did not swap properly, output[%d]=0x%02x\n", name
, i
, (uint8_t)swapped
[i
]);
3113 /* convert back to Unicode (may not roundtrip) */
3116 ucnv_toUnicode(cnv
, &pu
, uNormal
+LENGTHOF(uNormal
), (const char **)&pc
, normal
+normalLength
, NULL
, TRUE
, &errorCode
);
3117 normalLength
=(int32_t)(pu
-uNormal
);
3121 ucnv_toUnicode(swapCnv
, &pu
, uSwapped
+LENGTHOF(uSwapped
), (const char **)&pc
, normal
+swappedLength
, NULL
, TRUE
, &errorCode
);
3122 swappedLength
=(int32_t)(pu
-uSwapped
);
3124 if(U_FAILURE(errorCode
)) {
3125 log_err("TestEBCDICSwapLFNL error converting from %s - (%s)\n", name
, u_errorName(errorCode
));
3129 /* compare EBCDIC output */
3130 if(normalLength
!=swappedLength
) {
3131 log_err("TestEBCDICSwapLFNL error converting from %s - output lengths %d vs. %d\n", name
, normalLength
, swappedLength
);
3134 for(i
=0; i
<normalLength
; ++i
) {
3135 /* swap EBCDIC LF/NL for comparison */
3140 } else if(u
==0x85) {
3145 if(u
!=uSwapped
[i
]) {
3146 log_err("TestEBCDICSwapLFNL error converting from %s - did not swap properly, output[%d]=U+%04x\n", name
, i
, uSwapped
[i
]);
3154 ucnv_close(swapCnv
);
3158 TestEBCDICSwapLFNL() {
3159 static const struct {
3164 { "ibm-1047", TRUE
},
3165 { "ibm-1140", TRUE
},
3166 { "ibm-930", TRUE
},
3167 { "iso-8859-3", FALSE
}
3172 for(i
=0; i
<LENGTHOF(tests
); ++i
) {
3173 testSwap(tests
[i
].name
, tests
[i
].swap
);
3178 TestEBCDICSwapLFNL() {
3179 /* test nothing... */
3183 static void TestFromUCountPending(){
3184 #if !UCONFIG_NO_LEGACY_CONVERSION
3185 UErrorCode status
= U_ZERO_ERROR
;
3186 /* const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; */
3187 static const struct {
3191 }fromUnicodeTests
[] = {
3194 {{ 0xdbc4, 0xde34, 0xd84d},3,1},
3195 {{ 0xdbc4, 0xde34, 0xd900},3,3},
3198 UConverter
* cnv
= ucnv_openPackage(loadTestData(&status
), "test3", &status
);
3199 if(U_FAILURE(status
)){
3200 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status
));
3203 for(i
=0; i
<LENGTHOF(fromUnicodeTests
); ++i
) {
3206 char* targetLimit
= target
+ 10;
3207 const UChar
* source
= fromUnicodeTests
[i
].input
;
3208 const UChar
* sourceLimit
= source
+ fromUnicodeTests
[i
].len
;
3211 ucnv_fromUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3212 len
= ucnv_fromUCountPending(cnv
, &status
);
3213 if(U_FAILURE(status
)){
3214 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3215 status
= U_ZERO_ERROR
;
3218 if(len
!= fromUnicodeTests
[i
].exp
){
3219 log_err("Did not get the expeced output for ucnv_fromUInputConsumed.\n");
3222 status
= U_ZERO_ERROR
;
3225 * The converter has to read the tail before it knows that
3226 * only head alone matches.
3227 * At the end, the output for head will overflow the target,
3228 * middle will be pending, and tail will not have been consumed.
3231 \U00101234 -> x (<U101234> \x07 |0)
3232 \U00101234\U00050005 -> y (<U101234>+<U50005> \x07+\x00+\x01\x02\x0e+\x05 |0)
3233 \U00101234\U00050005\U00060006 -> z (<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0)
3234 \U00060007 -> unassigned
3236 static const UChar head
[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */
3237 static const UChar middle
[] = {0xD940,0x0000}; /* first half of \U00060006 or \U00060007 */
3238 static const UChar tail
[] = {0xDC07,0x0000};/* second half of \U00060007 */
3241 char* targetLimit
= target
+ 2; /* expect overflow from converting \U00101234\U00050005 */
3242 const UChar
* source
= head
;
3243 const UChar
* sourceLimit
= source
+ u_strlen(head
);
3246 ucnv_fromUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3247 len
= ucnv_fromUCountPending(cnv
, &status
);
3248 if(U_FAILURE(status
)){
3249 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3250 status
= U_ZERO_ERROR
;
3253 log_err("ucnv_fromUInputHeld did not return correct length for head\n");
3256 sourceLimit
= source
+ u_strlen(middle
);
3257 ucnv_fromUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3258 len
= ucnv_fromUCountPending(cnv
, &status
);
3259 if(U_FAILURE(status
)){
3260 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3261 status
= U_ZERO_ERROR
;
3264 log_err("ucnv_fromUInputHeld did not return correct length for middle\n");
3267 sourceLimit
= source
+ u_strlen(tail
);
3268 ucnv_fromUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3269 if(status
!= U_BUFFER_OVERFLOW_ERROR
){
3270 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3272 status
= U_ZERO_ERROR
;
3273 len
= ucnv_fromUCountPending(cnv
, &status
);
3274 /* middle[1] is pending, tail has not been consumed */
3275 if(U_FAILURE(status
)){
3276 log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status
));
3279 log_err("ucnv_fromUInputHeld did not return correct length for tail\n");
3287 TestToUCountPending(){
3288 #if !UCONFIG_NO_LEGACY_CONVERSION
3289 UErrorCode status
= U_ZERO_ERROR
;
3290 static const struct {
3294 }toUnicodeTests
[] = {
3296 {{0x05, 0x01, 0x02},3,3},
3298 {{0x07, 0x00, 0x01, 0x02},4,4},
3302 UConverterToUCallback
*oldToUAction
= NULL
;
3303 UConverter
* cnv
= ucnv_openPackage(loadTestData(&status
), "test3", &status
);
3304 if(U_FAILURE(status
)){
3305 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status
));
3308 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, oldToUAction
, NULL
, &status
);
3309 for(i
=0; i
<LENGTHOF(toUnicodeTests
); ++i
) {
3311 UChar
* target
= tgt
;
3312 UChar
* targetLimit
= target
+ 20;
3313 const char* source
= toUnicodeTests
[i
].input
;
3314 const char* sourceLimit
= source
+ toUnicodeTests
[i
].len
;
3317 ucnv_toUnicode(cnv
, &target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3318 len
= ucnv_toUCountPending(cnv
,&status
);
3319 if(U_FAILURE(status
)){
3320 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3321 status
= U_ZERO_ERROR
;
3324 if(len
!= toUnicodeTests
[i
].exp
){
3325 log_err("Did not get the expeced output for ucnv_toUInputConsumed.\n");
3328 status
= U_ZERO_ERROR
;
3333 * The converter has to read the tail before it knows that
3334 * only head alone matches.
3335 * At the end, the output for head will overflow the target,
3336 * mid will be pending, and tail will not have been consumed.
3338 char head
[] = { 0x01, 0x02, 0x03, 0x0a , 0x00};
3339 char mid
[] = { 0x01, 0x02, 0x03, 0x0b, 0x00 };
3340 char tail
[] = { 0x01, 0x02, 0x03, 0x0d, 0x00 };
3342 0x01, 0x02, 0x03, 0x0a -> x (<U23456> \x01\x02\x03\x0a |0)
3343 0x01, 0x02, 0x03, 0x0b -> y (<U000b> \x01\x02\x03\x0b |0)
3344 0x01, 0x02, 0x03, 0x0d -> z (<U34567> \x01\x02\x03\x0d |3)
3345 0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar")
3348 UChar
* target
= tgt
;
3349 UChar
* targetLimit
= target
+ 1; /* expect overflow from converting */
3350 const char* source
= head
;
3351 const char* sourceLimit
= source
+ strlen(head
);
3353 cnv
= ucnv_openPackage(loadTestData(&status
), "test4", &status
);
3354 if(U_FAILURE(status
)){
3355 log_err("Could not create converter for test3. Error: %s\n", u_errorName(status
));
3358 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, oldToUAction
, NULL
, &status
);
3359 ucnv_toUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3360 len
= ucnv_toUCountPending(cnv
,&status
);
3361 if(U_FAILURE(status
)){
3362 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3365 log_err("Did not get the expected len for head.\n");
3368 sourceLimit
= source
+strlen(mid
);
3369 ucnv_toUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3370 len
= ucnv_toUCountPending(cnv
,&status
);
3371 if(U_FAILURE(status
)){
3372 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3375 log_err("Did not get the expected len for mid.\n");
3379 sourceLimit
= source
+strlen(tail
);
3380 targetLimit
= target
;
3381 ucnv_toUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3382 if(status
!= U_BUFFER_OVERFLOW_ERROR
){
3383 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3385 status
= U_ZERO_ERROR
;
3386 len
= ucnv_toUCountPending(cnv
,&status
);
3387 /* mid[4] is pending, tail has not been consumed */
3388 if(U_FAILURE(status
)){
3389 log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status
));
3392 log_err("Did not get the expected len for tail.\n");
3399 static void TestOneDefaultNameChange(const char *name
, const char *expected
) {
3400 UErrorCode status
= U_ZERO_ERROR
;
3402 ucnv_setDefaultName(name
);
3403 if(strcmp(ucnv_getDefaultName(), expected
)==0)
3404 log_verbose("setDefaultName of %s works.\n", name
);
3406 log_err("setDefaultName of %s failed\n", name
);
3407 cnv
=ucnv_open(NULL
, &status
);
3408 if (U_FAILURE(status
) || cnv
== NULL
) {
3409 log_err("opening the default converter of %s failed\n", name
);
3412 if(strcmp(ucnv_getName(cnv
, &status
), expected
)==0)
3413 log_verbose("ucnv_getName of %s works.\n", name
);
3415 log_err("ucnv_getName of %s failed\n", name
);
3419 static void TestDefaultName(void) {
3420 /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/
3421 static char defaultName
[UCNV_MAX_CONVERTER_NAME_LENGTH
+ 1];
3422 strcpy(defaultName
, ucnv_getDefaultName());
3424 log_verbose("getDefaultName returned %s\n", defaultName
);
3426 /*change the default name by setting it */
3427 TestOneDefaultNameChange("UTF-8", "UTF-8");
3428 #if U_CHARSET_IS_UTF8
3429 TestOneDefaultNameChange("ISCII,version=1", "UTF-8");
3430 TestOneDefaultNameChange("ISCII,version=2", "UTF-8");
3431 TestOneDefaultNameChange("ISO-8859-1", "UTF-8");
3433 # if !UCONFIG_NO_LEGACY_CONVERSION
3434 TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1");
3435 TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2");
3437 TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1");
3440 /*set the default name back*/
3441 ucnv_setDefaultName(defaultName
);
3444 /* Test that ucnv_compareNames() matches names according to spec. ----------- */
3458 compareNames(const char **names
) {
3459 const char *relation
, *name1
, *name2
;
3463 if(*relation
=='=') {
3465 } else if(*relation
=='<') {
3475 while((name2
=*names
++)!=NULL
) {
3476 result
=ucnv_compareNames(name1
, name2
);
3477 if(sign(result
)!=rel
) {
3478 log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1
, name2
, result
, rel
);
3485 TestCompareNames() {
3486 static const char *equalUTF8
[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL
};
3487 static const char *equalIBM
[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL
};
3488 static const char *lessMac
[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL
};
3489 static const char *lessUTF080
[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL
};
3491 compareNames(equalUTF8
);
3492 compareNames(equalIBM
);
3493 compareNames(lessMac
);
3494 compareNames(lessUTF080
);
3499 static const UChar surrogate
[1]={ 0xd900 };
3502 static const UChar sub
[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3503 static const char subChars
[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3505 UErrorCode errorCode
;
3509 /* UTF-16/32: test that the BOM is output before the sub character */
3510 errorCode
=U_ZERO_ERROR
;
3511 cnv
=ucnv_open("UTF-16", &errorCode
);
3512 if(U_FAILURE(errorCode
)) {
3513 log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode
));
3516 length
=ucnv_fromUChars(cnv
, buffer
, (int32_t)sizeof(buffer
), surrogate
, 1, &errorCode
);
3518 if(U_FAILURE(errorCode
) ||
3520 NULL
== ucnv_detectUnicodeSignature(buffer
, length
, NULL
, &errorCode
)
3522 log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n");
3525 errorCode
=U_ZERO_ERROR
;
3526 cnv
=ucnv_open("UTF-32", &errorCode
);
3527 if(U_FAILURE(errorCode
)) {
3528 log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode
));
3531 length
=ucnv_fromUChars(cnv
, buffer
, (int32_t)sizeof(buffer
), surrogate
, 1, &errorCode
);
3533 if(U_FAILURE(errorCode
) ||
3535 NULL
== ucnv_detectUnicodeSignature(buffer
, length
, NULL
, &errorCode
)
3537 log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n");
3540 /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */
3541 errorCode
=U_ZERO_ERROR
;
3542 cnv
=ucnv_open("ISO-8859-1", &errorCode
);
3543 if(U_FAILURE(errorCode
)) {
3544 log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode
));
3547 ucnv_setSubstString(cnv
, sub
, LENGTHOF(sub
), &errorCode
);
3548 if(U_FAILURE(errorCode
)) {
3549 log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode
));
3551 len8
= sizeof(buffer
);
3552 ucnv_getSubstChars(cnv
, buffer
, &len8
, &errorCode
);
3553 /* Stateless converter, we expect the string converted to charset bytes. */
3554 if(U_FAILURE(errorCode
) || len8
!=sizeof(subChars
) || 0!=uprv_memcmp(buffer
, subChars
, len8
)) {
3555 log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode
));
3560 #if !UCONFIG_NO_LEGACY_CONVERSION
3561 errorCode
=U_ZERO_ERROR
;
3562 cnv
=ucnv_open("HZ", &errorCode
);
3563 if(U_FAILURE(errorCode
)) {
3564 log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode
));
3567 ucnv_setSubstString(cnv
, sub
, LENGTHOF(sub
), &errorCode
);
3568 if(U_FAILURE(errorCode
)) {
3569 log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode
));
3571 len8
= sizeof(buffer
);
3572 ucnv_getSubstChars(cnv
, buffer
, &len8
, &errorCode
);
3573 /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */
3574 if(U_FAILURE(errorCode
) || len8
!=0) {
3575 log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode
));
3581 * Further testing of ucnv_setSubstString() is done via intltest convert.
3582 * We do not test edge cases of illegal arguments and similar because the
3583 * function implementation uses all of its parameters in calls to other
3584 * functions with UErrorCode parameters.
3589 InvalidArguments() {
3591 UErrorCode errorCode
;
3592 char charBuffer
[2] = {1, 1};
3593 char ucharAsCharBuffer
[2] = {2, 2};
3594 char *charsPtr
= charBuffer
;
3595 UChar
*ucharsPtr
= (UChar
*)ucharAsCharBuffer
;
3596 UChar
*ucharsBadPtr
= (UChar
*)(ucharAsCharBuffer
+ 1);
3598 errorCode
=U_ZERO_ERROR
;
3599 cnv
=ucnv_open("UTF-8", &errorCode
);
3600 if(U_FAILURE(errorCode
)) {
3601 log_err("ucnv_open() failed - %s\n", u_errorName(errorCode
));
3605 errorCode
=U_ZERO_ERROR
;
3606 /* This one should fail because an incomplete UChar is being passed in */
3607 ucnv_fromUnicode(cnv
, &charsPtr
, charsPtr
, (const UChar
**)&ucharsPtr
, ucharsBadPtr
, NULL
, TRUE
, &errorCode
);
3608 if(errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3609 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode
));
3612 errorCode
=U_ZERO_ERROR
;
3613 /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3614 ucnv_fromUnicode(cnv
, &charsPtr
, charsPtr
, (const UChar
**)&ucharsBadPtr
, ucharsPtr
, NULL
, TRUE
, &errorCode
);
3615 if(errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3616 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode
));
3619 errorCode
=U_ZERO_ERROR
;
3620 /* This one should fail because an incomplete UChar is being passed in */
3621 ucnv_toUnicode(cnv
, &ucharsPtr
, ucharsBadPtr
, (const char **)&charsPtr
, charsPtr
, NULL
, TRUE
, &errorCode
);
3622 if(errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3623 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode
));
3626 errorCode
=U_ZERO_ERROR
;
3627 /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3628 ucnv_toUnicode(cnv
, &ucharsBadPtr
, ucharsPtr
, (const char **)&charsPtr
, charsPtr
, NULL
, TRUE
, &errorCode
);
3629 if(errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3630 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode
));
3633 if (charBuffer
[0] != 1 || charBuffer
[1] != 1
3634 || ucharAsCharBuffer
[0] != 2 || ucharAsCharBuffer
[1] != 2)
3636 log_err("Data was incorrectly written to buffers\n");
3642 static void TestGetName() {
3643 static const char *const names
[] = {
3644 "Unicode", "UTF-16",
3645 "UnicodeBigUnmarked", "UTF-16BE",
3646 "UnicodeBig", "UTF-16BE,version=1",
3647 "UnicodeLittleUnmarked", "UTF-16LE",
3648 "UnicodeLittle", "UTF-16LE,version=1",
3649 "x-UTF-16LE-BOM", "UTF-16LE,version=1"
3652 for(i
= 0; i
< LENGTHOF(names
); i
+= 2) {
3653 UErrorCode errorCode
= U_ZERO_ERROR
;
3654 UConverter
*cnv
= ucnv_open(names
[i
], &errorCode
);
3655 if(U_SUCCESS(errorCode
)) {
3656 const char *name
= ucnv_getName(cnv
, &errorCode
);
3657 if(U_FAILURE(errorCode
) || 0 != strcmp(name
, names
[i
+1])) {
3658 log_err("ucnv_getName(%s) = %s != %s -- %s\n",
3659 names
[i
], name
, names
[i
+1], u_errorName(errorCode
));
3666 static void TestUTFBOM() {
3667 static const UChar a16
[] = { 0x61 };
3668 static const char *const names
[] = {
3676 static const uint8_t expected
[][5] = {
3678 { 4, 0xfe, 0xff, 0, 0x61 },
3679 { 4, 0xfe, 0xff, 0, 0x61 },
3681 { 4, 0xff, 0xfe, 0x61, 0 },
3682 { 4, 0xff, 0xfe, 0x61, 0 },
3686 { 4, 0xfe, 0xff, 0, 0x61 },
3689 { 4, 0xff, 0xfe, 0x61, 0 }
3695 for(i
= 0; i
< LENGTHOF(names
); ++i
) {
3696 UErrorCode errorCode
= U_ZERO_ERROR
;
3697 UConverter
*cnv
= ucnv_open(names
[i
], &errorCode
);
3699 const uint8_t *exp
= expected
[i
];
3700 if (U_FAILURE(errorCode
)) {
3701 log_err_status(errorCode
, "Unable to open converter: %s got error code: %s\n", names
[i
], u_errorName(errorCode
));
3704 length
= ucnv_fromUChars(cnv
, bytes
, (int32_t)sizeof(bytes
), a16
, 1, &errorCode
);
3706 if(U_FAILURE(errorCode
) || length
!= exp
[0] || 0 != memcmp(bytes
, exp
+1, length
)) {
3707 log_err("unexpected %s BOM writing behavior -- %s\n",
3708 names
[i
], u_errorName(errorCode
));