1 /********************************************************************
3 * Copyright (c) 1997-2016, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /*****************************************************************************
10 * Modification History:
12 * Madhu Katragadda Ported for C API
13 ******************************************************************************
19 #include "unicode/uloc.h"
20 #include "unicode/ucnv.h"
21 #include "unicode/ucnv_err.h"
22 #include "unicode/putil.h"
23 #include "unicode/uset.h"
24 #include "unicode/ustring.h"
25 #include "ucnv_bld.h" /* for sizeof(UConverter) */
26 #include "cmemory.h" /* for UAlignedMemory */
31 #define NUM_CODEPAGE 1
32 #define MAX_FILE_LEN 1024*20
33 #define UCS_FILE_NAME_SIZE 512
35 /*returns an action other than the one provided*/
36 #if !UCONFIG_NO_LEGACY_CONVERSION
37 static UConverterFromUCallback
otherUnicodeAction(UConverterFromUCallback MIA
);
38 static UConverterToUCallback
otherCharAction(UConverterToUCallback MIA
);
42 cnv_open(const char *name
, UErrorCode
*pErrorCode
) {
43 if(name
!=NULL
&& name
[0]=='*') {
44 return ucnv_openPackage(loadTestData(pErrorCode
), name
+1, pErrorCode
);
46 return ucnv_open(name
, pErrorCode
);
51 static void ListNames(void);
52 static void TestFlushCache(void);
53 static void TestDuplicateAlias(void);
54 static void TestCCSID(void);
55 static void TestJ932(void);
56 static void TestJ1968(void);
57 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
58 static void TestLMBCSMaxChar(void);
61 #if !UCONFIG_NO_LEGACY_CONVERSION
62 static void TestConvertSafeCloneCallback(void);
65 static void TestEBCDICSwapLFNL(void);
66 static void TestConvertEx(void);
67 static void TestConvertExFromUTF8(void);
68 static void TestConvertExFromUTF8_C5F0(void);
69 static void TestConvertAlgorithmic(void);
70 void TestDefaultConverterError(void); /* defined in cctest.c */
71 void TestDefaultConverterSet(void); /* defined in cctest.c */
72 static void TestToUCountPending(void);
73 static void TestFromUCountPending(void);
74 static void TestDefaultName(void);
75 static void TestCompareNames(void);
76 static void TestSubstString(void);
77 static void InvalidArguments(void);
78 static void TestGetName(void);
79 static void TestUTFBOM(void);
81 void addTestConvert(TestNode
** root
);
83 void addTestConvert(TestNode
** root
)
85 addTest(root
, &ListNames
, "tsconv/ccapitst/ListNames");
86 addTest(root
, &TestConvert
, "tsconv/ccapitst/TestConvert");
87 addTest(root
, &TestFlushCache
, "tsconv/ccapitst/TestFlushCache");
88 addTest(root
, &TestAlias
, "tsconv/ccapitst/TestAlias");
89 addTest(root
, &TestDuplicateAlias
, "tsconv/ccapitst/TestDuplicateAlias");
90 addTest(root
, &TestConvertSafeClone
, "tsconv/ccapitst/TestConvertSafeClone");
91 #if !UCONFIG_NO_LEGACY_CONVERSION
92 addTest(root
, &TestConvertSafeCloneCallback
,"tsconv/ccapitst/TestConvertSafeCloneCallback");
94 addTest(root
, &TestCCSID
, "tsconv/ccapitst/TestCCSID");
95 addTest(root
, &TestJ932
, "tsconv/ccapitst/TestJ932");
96 addTest(root
, &TestJ1968
, "tsconv/ccapitst/TestJ1968");
97 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
98 addTest(root
, &TestLMBCSMaxChar
, "tsconv/ccapitst/TestLMBCSMaxChar");
100 addTest(root
, &TestEBCDICSwapLFNL
, "tsconv/ccapitst/TestEBCDICSwapLFNL");
101 addTest(root
, &TestConvertEx
, "tsconv/ccapitst/TestConvertEx");
102 addTest(root
, &TestConvertExFromUTF8
, "tsconv/ccapitst/TestConvertExFromUTF8");
103 addTest(root
, &TestConvertExFromUTF8_C5F0
, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0");
104 addTest(root
, &TestConvertAlgorithmic
, "tsconv/ccapitst/TestConvertAlgorithmic");
105 addTest(root
, &TestDefaultConverterError
, "tsconv/ccapitst/TestDefaultConverterError");
106 addTest(root
, &TestDefaultConverterSet
, "tsconv/ccapitst/TestDefaultConverterSet");
107 #if !UCONFIG_NO_FILE_IO
108 addTest(root
, &TestToUCountPending
, "tsconv/ccapitst/TestToUCountPending");
109 addTest(root
, &TestFromUCountPending
, "tsconv/ccapitst/TestFromUCountPending");
111 addTest(root
, &TestDefaultName
, "tsconv/ccapitst/TestDefaultName");
112 addTest(root
, &TestCompareNames
, "tsconv/ccapitst/TestCompareNames");
113 addTest(root
, &TestSubstString
, "tsconv/ccapitst/TestSubstString");
114 addTest(root
, &InvalidArguments
, "tsconv/ccapitst/InvalidArguments");
115 addTest(root
, &TestGetName
, "tsconv/ccapitst/TestGetName");
116 addTest(root
, &TestUTFBOM
, "tsconv/ccapitst/TestUTFBOM");
119 static void ListNames(void) {
120 UErrorCode err
= U_ZERO_ERROR
;
121 int32_t testLong1
= 0;
122 const char* available_conv
;
123 UEnumeration
*allNamesEnum
= NULL
;
124 int32_t allNamesCount
= 0;
127 log_verbose("Testing ucnv_openAllNames()...");
128 allNamesEnum
= ucnv_openAllNames(&err
);
130 log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err
));
133 const char *string
= NULL
;
137 allNamesCount
= uenum_count(allNamesEnum
, &err
);
138 while ((string
= uenum_next(allNamesEnum
, &len
, &err
))) {
140 log_verbose("read \"%s\", length %i\n", string
, len
);
142 if (U_FAILURE(err
)) {
143 log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err
));
146 uenum_reset(allNamesEnum
, &err
);
147 while ((string
= uenum_next(allNamesEnum
, &len
, &err
))) {
149 ucnv_close(ucnv_open(string
, &err
));
150 log_verbose("read \"%s\", length %i (%s)\n", string
, len
, U_SUCCESS(err
) ? "available" : "unavailable");
153 if (count1
!= count2
) {
154 log_err("FAILURE! uenum_reset(allNamesEnum, &err); doesn't work\n");
157 uenum_close(allNamesEnum
);
160 /*Tests ucnv_getAvailableName(), getAvialableCount()*/
162 log_verbose("Testing ucnv_countAvailable()...");
164 testLong1
=ucnv_countAvailable();
165 log_info("Number of available codepages: %d/%d\n", testLong1
, allNamesCount
);
167 log_verbose("\n---Testing ucnv_getAvailableName.."); /*need to check this out */
169 available_conv
= ucnv_getAvailableName(testLong1
);
170 /*test ucnv_getAvailableName with err condition*/
171 log_verbose("\n---Testing ucnv_getAvailableName..with index < 0 ");
172 available_conv
= ucnv_getAvailableName(-1);
173 if(available_conv
!= NULL
){
174 log_err("ucnv_getAvailableName() with index < 0) should return NULL\n");
177 /* Test ucnv_countAliases() etc. */
178 count
= ucnv_countAliases("utf-8", &err
);
180 log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err
));
181 } else if(count
<= 0) {
182 log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count
);
184 /* try to get the aliases individually */
186 alias
= ucnv_getAlias("utf-8", 0, &err
);
188 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s\n", myErrorName(err
));
189 } else if(strcmp("UTF-8", alias
) != 0) {
190 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s instead of UTF-8\n", alias
);
193 for(aliasNum
= 0; aliasNum
< count
; ++aliasNum
) {
194 alias
= ucnv_getAlias("utf-8", aliasNum
, &err
);
196 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum
, myErrorName(err
));
197 } else if(strlen(alias
) > 20) {
199 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> alias %s insanely long, corrupt?!\n", aliasNum
, alias
);
201 log_verbose("alias %d for utf-8: %s\n", aliasNum
, alias
);
205 /* try to fill an array with all aliases */
206 const char **aliases
;
207 aliases
=(const char **)malloc(count
* sizeof(const char *));
209 ucnv_getAliases("utf-8", aliases
, &err
);
211 log_err("FAILURE! ucnv_getAliases(\"utf-8\") -> %s\n", myErrorName(err
));
213 for(aliasNum
= 0; aliasNum
< count
; ++aliasNum
) {
214 /* compare the pointers with the ones returned individually */
215 alias
= ucnv_getAlias("utf-8", aliasNum
, &err
);
217 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum
, myErrorName(err
));
218 } else if(aliases
[aliasNum
] != alias
) {
219 log_err("FAILURE! ucnv_getAliases(\"utf-8\")[%d] != ucnv_getAlias(\"utf-8\", %d)\n", aliasNum
, aliasNum
);
223 free((char **)aliases
);
231 static void TestConvert()
233 #if !UCONFIG_NO_LEGACY_CONVERSION
236 int32_t testLong1
= 0;
240 FILE* ucs_file_in
= NULL
;
242 UChar myUChar
= 0x0000;
243 char* mytarget
; /* [MAX_FILE_LEN] */
246 UChar
* consumedUni
= NULL
;
247 char* consumed
= NULL
;
248 char* output_cp_buffer
; /* [MAX_FILE_LEN] */
249 UChar
* ucs_file_buffer
; /* [MAX_FILE_LEN] */
250 UChar
* ucs_file_buffer_use
;
251 UChar
* my_ucs_file_buffer
; /* [MAX_FILE_LEN] */
252 UChar
* my_ucs_file_buffer_1
;
254 uint16_t codepage_index
= 0;
256 UErrorCode err
= U_ZERO_ERROR
;
257 char ucs_file_name
[UCS_FILE_NAME_SIZE
];
258 UConverterFromUCallback MIA1
, MIA1_2
;
259 UConverterToUCallback MIA2
, MIA2_2
;
260 const void *MIA1Context
, *MIA1Context2
, *MIA2Context
, *MIA2Context2
;
261 UConverter
* someConverters
[5];
262 UConverter
* myConverter
= 0;
263 UChar
* displayname
= 0;
270 int32_t targetcapacity2
;
271 int32_t targetcapacity
;
275 const UChar
* tmp_ucs_buf
;
276 const UChar
* tmp_consumedUni
=NULL
;
277 const char* tmp_mytarget_use
;
278 const char* tmp_consumed
;
280 /******************************************************************
281 Checking Unicode -> ksc
282 ******************************************************************/
284 const char* CodePagesToTest
[NUM_CODEPAGE
] =
290 const uint16_t CodePageNumberToTest
[NUM_CODEPAGE
] =
296 const int8_t CodePagesMinChars
[NUM_CODEPAGE
] =
302 const int8_t CodePagesMaxChars
[NUM_CODEPAGE
] =
308 const uint16_t CodePagesSubstitutionChars
[NUM_CODEPAGE
] =
313 const char* CodePagesTestFiles
[NUM_CODEPAGE
] =
319 const UConverterPlatform CodePagesPlatform
[NUM_CODEPAGE
] =
325 const char* CodePagesLocale
[NUM_CODEPAGE
] =
330 UConverterFromUCallback oldFromUAction
= NULL
;
331 UConverterToUCallback oldToUAction
= NULL
;
332 const void* oldFromUContext
= NULL
;
333 const void* oldToUContext
= NULL
;
335 /* Allocate memory */
336 mytarget
= (char*) malloc(MAX_FILE_LEN
* sizeof(mytarget
[0]));
337 output_cp_buffer
= (char*) malloc(MAX_FILE_LEN
* sizeof(output_cp_buffer
[0]));
338 ucs_file_buffer
= (UChar
*) malloc(MAX_FILE_LEN
* sizeof(ucs_file_buffer
[0]));
339 my_ucs_file_buffer
= (UChar
*) malloc(MAX_FILE_LEN
* sizeof(my_ucs_file_buffer
[0]));
341 ucs_file_buffer_use
= ucs_file_buffer
;
343 mytarget_use
= mytarget
;
344 my_ucs_file_buffer_1
=my_ucs_file_buffer
;
346 /* flush the converter cache to get a consistent state before the flushing is tested */
349 /*Testing ucnv_openU()*/
351 UChar converterName
[]={ 0x0069, 0x0062, 0x006d, 0x002d, 0x0039, 0x0034, 0x0033, 0x0000}; /*ibm-943*/
352 UChar firstSortedName
[]={ 0x0021, 0x0000}; /* ! */
353 UChar lastSortedName
[]={ 0x007E, 0x0000}; /* ~ */
354 const char *illegalNameChars
={ "ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943"};
355 UChar illegalName
[100];
356 UConverter
*converter
=NULL
;
358 converter
=ucnv_openU(converterName
, &err
);
360 log_data_err("FAILURE! ucnv_openU(ibm-943, err) failed. %s\n", myErrorName(err
));
362 ucnv_close(converter
);
364 converter
=ucnv_openU(NULL
, &err
);
366 log_err("FAILURE! ucnv_openU(NULL, err) failed. %s\n", myErrorName(err
));
368 ucnv_close(converter
);
369 /*testing with error value*/
370 err
=U_ILLEGAL_ARGUMENT_ERROR
;
371 converter
=ucnv_openU(converterName
, &err
);
372 if(!(converter
== NULL
)){
373 log_data_err("FAILURE! ucnv_openU(ibm-943, U_ILLEGAL_ARGUMENT_ERROR) is expected to fail\n");
375 ucnv_close(converter
);
377 u_uastrcpy(illegalName
, "");
378 u_uastrcpy(illegalName
, illegalNameChars
);
379 ucnv_openU(illegalName
, &err
);
380 if(!(err
==U_ILLEGAL_ARGUMENT_ERROR
)){
381 log_err("FAILURE! ucnv_openU(illegalName, err) is expected to fail\n");
385 ucnv_openU(firstSortedName
, &err
);
386 if(err
!=U_FILE_ACCESS_ERROR
){
387 log_err("FAILURE! ucnv_openU(firstSortedName, err) is expected to fail\n");
391 ucnv_openU(lastSortedName
, &err
);
392 if(err
!=U_FILE_ACCESS_ERROR
){
393 log_err("FAILURE! ucnv_openU(lastSortedName, err) is expected to fail\n");
398 log_verbose("Testing ucnv_open() with converter name greater than 7 characters\n");
400 UConverter
*cnv
=NULL
;
402 cnv
=ucnv_open("ibm-949,Madhu", &err
);
404 log_data_err("FAILURE! ucnv_open(\"ibm-949,Madhu\", err) failed. %s\n", myErrorName(err
));
409 /*Testing ucnv_convert()*/
411 int32_t targetLimit
=0, sourceLimit
=0, i
=0, targetCapacity
=0;
412 const uint8_t source
[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00};
413 const uint8_t expectedTarget
[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00};
415 sourceLimit
=UPRV_LENGTHOF(source
);
419 targetCapacity
=ucnv_convert("ibm-1364", "ibm-1363", NULL
, targetLimit
, (const char*)source
, sourceLimit
, &err
);
420 if(err
== U_BUFFER_OVERFLOW_ERROR
){
422 targetLimit
=targetCapacity
+1;
423 target
=(char*)malloc(sizeof(char) * targetLimit
);
424 targetCapacity
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
, sourceLimit
, &err
);
427 log_data_err("FAILURE! ucnv_convert(ibm-1363->ibm-1364) failed. %s\n", myErrorName(err
));
430 for(i
=0; i
<targetCapacity
; i
++){
431 if(target
[i
] != expectedTarget
[i
]){
432 log_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i
, (UChar
)expectedTarget
[i
], (uint8_t)target
[i
]);
436 i
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
+1, -1, &err
);
437 if(U_FAILURE(err
) || i
!=7){
438 log_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 7\n",
439 u_errorName(err
), i
);
442 /*Test error conditions*/
444 i
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
, 0, &err
);
446 log_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n");
449 err
=U_ILLEGAL_ARGUMENT_ERROR
;
450 sourceLimit
=UPRV_LENGTHOF(source
);
451 i
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
, sourceLimit
, &err
);
453 log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n");
457 sourceLimit
=UPRV_LENGTHOF(source
);
459 i
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
, sourceLimit
, &err
);
460 if(!(U_FAILURE(err
) && err
==U_BUFFER_OVERFLOW_ERROR
)){
461 log_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n");
468 /*Testing ucnv_openCCSID and ucnv_open with error conditions*/
469 log_verbose("\n---Testing ucnv_open with err ! = U_ZERO_ERROR...\n");
470 err
=U_ILLEGAL_ARGUMENT_ERROR
;
471 if(ucnv_open(NULL
, &err
) != NULL
){
472 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
474 if(ucnv_openCCSID(1051, UCNV_IBM
, &err
) != NULL
){
475 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
479 /* Testing ucnv_openCCSID(), ucnv_open(), ucnv_getName() */
480 log_verbose("\n---Testing ucnv_open default...\n");
481 someConverters
[0] = ucnv_open(NULL
,&err
);
482 someConverters
[1] = ucnv_open(NULL
,&err
);
483 someConverters
[2] = ucnv_open("utf8", &err
);
484 someConverters
[3] = ucnv_openCCSID(949,UCNV_IBM
,&err
);
485 ucnv_close(ucnv_openCCSID(1051, UCNV_IBM
, &err
)); /* test for j350; ucnv_close(NULL) is safe */
486 if (U_FAILURE(err
)){ log_data_err("FAILURE! %s\n", myErrorName(err
));}
488 /* Testing ucnv_getName()*/
489 /*default code page */
490 ucnv_getName(someConverters
[0], &err
);
492 log_data_err("getName[0] failed\n");
494 log_verbose("getName(someConverters[0]) returned %s\n", ucnv_getName(someConverters
[0], &err
));
496 ucnv_getName(someConverters
[1], &err
);
498 log_data_err("getName[1] failed\n");
500 log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters
[1], &err
));
503 ucnv_close(someConverters
[0]);
504 ucnv_close(someConverters
[1]);
505 ucnv_close(someConverters
[2]);
506 ucnv_close(someConverters
[3]);
509 for (codepage_index
=0; codepage_index
< NUM_CODEPAGE
; ++codepage_index
)
515 strcpy(ucs_file_name
, U_TOPSRCDIR U_FILE_SEP_STRING
"test"U_FILE_SEP_STRING
"testdata"U_FILE_SEP_STRING
);
517 strcpy(ucs_file_name
, loadTestData(&err
));
520 log_err("\nCouldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err
));
525 char* index
= strrchr(ucs_file_name
,(char)U_FILE_SEP_CHAR
);
527 if((unsigned int)(index
-ucs_file_name
) != (strlen(ucs_file_name
)-1)){
532 strcat(ucs_file_name
,".."U_FILE_SEP_STRING
);
534 strcat(ucs_file_name
, CodePagesTestFiles
[codepage_index
]);
536 ucs_file_in
= fopen(ucs_file_name
,"rb");
539 log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name
);
543 /*Creates a converter and testing ucnv_openCCSID(u_int code_page, platform, errstatus*/
545 /* myConverter =ucnv_openCCSID(CodePageNumberToTest[codepage_index],UCNV_IBM, &err); */
546 /* ucnv_flushCache(); */
547 myConverter
=ucnv_open( "ibm-949", &err
);
548 if (!myConverter
|| U_FAILURE(err
))
550 log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err
));
555 /*testing for ucnv_getName() */
556 log_verbose("Testing ucnv_getName()...\n");
557 ucnv_getName(myConverter
, &err
);
559 log_err("Error in getName\n");
562 log_verbose("getName o.k. %s\n", ucnv_getName(myConverter
, &err
));
564 if (uprv_stricmp(ucnv_getName(myConverter
, &err
), CodePagesToTest
[codepage_index
]))
565 log_err("getName failed\n");
567 log_verbose("getName ok\n");
568 /*Test getName with error condition*/
571 err
=U_ILLEGAL_ARGUMENT_ERROR
;
572 log_verbose("Testing ucnv_getName with err != U_ZERO_ERROR");
573 name
=ucnv_getName(myConverter
, &err
);
575 log_err("ucnv_getName() with err != U_ZERO_ERROR is expected to fail");
581 /*Tests ucnv_getMaxCharSize() and ucnv_getMinCharSize()*/
583 log_verbose("Testing ucnv_getMaxCharSize()...\n");
584 if (ucnv_getMaxCharSize(myConverter
)==CodePagesMaxChars
[codepage_index
])
585 log_verbose("Max byte per character OK\n");
587 log_err("Max byte per character failed\n");
589 log_verbose("\n---Testing ucnv_getMinCharSize()...\n");
590 if (ucnv_getMinCharSize(myConverter
)==CodePagesMinChars
[codepage_index
])
591 log_verbose("Min byte per character OK\n");
593 log_err("Min byte per character failed\n");
596 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars()*/
597 log_verbose("\n---Testing ucnv_getSubstChars...\n");
599 ucnv_getSubstChars(myConverter
, myptr
, &ii
, &err
);
601 log_err("ucnv_getSubstChars returned a negative number %d\n", ii
);
605 rest
= (uint16_t)(((unsigned char)rest
<< 8) + (unsigned char)myptr
[x
]);
606 if (rest
==CodePagesSubstitutionChars
[codepage_index
])
607 log_verbose("Substitution character ok\n");
609 log_err("Substitution character failed.\n");
611 log_verbose("\n---Testing ucnv_setSubstChars RoundTrip Test ...\n");
612 ucnv_setSubstChars(myConverter
, myptr
, ii
, &err
);
615 log_err("FAILURE! %s\n", myErrorName(err
));
617 ucnv_getSubstChars(myConverter
,save
, &ii
, &err
);
620 log_err("FAILURE! %s\n", myErrorName(err
));
623 if (strncmp(save
, myptr
, ii
))
624 log_err("Saved substitution character failed\n");
626 log_verbose("Saved substitution character ok\n");
628 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars() with error conditions*/
629 log_verbose("\n---Testing ucnv_getSubstChars.. with len < minBytesPerChar\n");
631 ucnv_getSubstChars(myConverter
, myptr
, &ii
, &err
);
632 if(err
!= U_INDEX_OUTOFBOUNDS_ERROR
){
633 log_err("ucnv_getSubstChars() with len < minBytesPerChar should throw U_INDEX_OUTOFBOUNDS_ERROR Got %s\n", myErrorName(err
));
637 ucnv_getSubstChars(myConverter
, myptr
, &ii
, &err
);
638 log_verbose("\n---Testing ucnv_setSubstChars.. with len < minBytesPerChar\n");
639 ucnv_setSubstChars(myConverter
, myptr
, 0, &err
);
640 if(err
!= U_ILLEGAL_ARGUMENT_ERROR
){
641 log_err("ucnv_setSubstChars() with len < minBytesPerChar should throw U_ILLEGAL_ARGUMENT_ERROR Got %s\n", myErrorName(err
));
643 log_verbose("\n---Testing ucnv_setSubstChars.. with err != U_ZERO_ERROR \n");
644 strcpy(myptr
, "abc");
645 ucnv_setSubstChars(myConverter
, myptr
, ii
, &err
);
647 ucnv_getSubstChars(myConverter
, save
, &ii
, &err
);
648 if(strncmp(save
, myptr
, ii
) == 0){
649 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't set the SubstChars and just return\n");
651 log_verbose("\n---Testing ucnv_getSubstChars.. with err != U_ZERO_ERROR \n");
653 strcpy(myptr
, "abc");
654 ucnv_setSubstChars(myConverter
, myptr
, ii
, &err
);
655 err
=U_ILLEGAL_ARGUMENT_ERROR
;
656 ucnv_getSubstChars(myConverter
, save
, &ii
, &err
);
657 if(strncmp(save
, myptr
, ii
) == 0){
658 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't fill the SubstChars in the buffer, it just returns\n");
663 #ifdef U_ENABLE_GENERIC_ISO_2022
664 /*resetState ucnv_reset()*/
665 log_verbose("\n---Testing ucnv_reset()..\n");
666 ucnv_reset(myConverter
);
669 const uint8_t in
[]={ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x61, 0xc0, 0x80, 0xe0, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80};
670 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
671 UConverter
*cnv
=ucnv_open("ISO_2022", &err
);
673 log_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err
));
675 c
=ucnv_getNextUChar(cnv
, &source
, limit
, &err
);
676 if((U_FAILURE(err
) || c
!= (UChar32
)0x0031)) {
677 log_err("ucnv_getNextUChar() failed: %s\n", u_errorName(err
));
686 log_verbose("\n---Testing ucnv_getDisplayName()...\n");
687 locale
=CodePagesLocale
[codepage_index
];
690 disnamelen
= ucnv_getDisplayName(myConverter
, locale
, displayname
, len
, &err
);
691 if(err
==U_BUFFER_OVERFLOW_ERROR
) {
693 displayname
=(UChar
*)malloc((disnamelen
+1) * sizeof(UChar
));
694 ucnv_getDisplayName(myConverter
,locale
,displayname
,disnamelen
+1, &err
);
696 log_err("getDisplayName failed. The error is %s\n", myErrorName(err
));
699 log_verbose(" getDisplayName o.k.\n");
705 log_err("getDisplayName preflight doesn't work. Error is %s\n", myErrorName(err
));
707 /*test ucnv_getDiaplayName with error condition*/
708 err
= U_ILLEGAL_ARGUMENT_ERROR
;
709 len
=ucnv_getDisplayName(myConverter
,locale
,NULL
,0, &err
);
711 log_err("ucnv_getDisplayName() with err != U_ZERO_ERROR is supposed to return 0\n");
713 /*test ucnv_getDiaplayName with error condition*/
715 len
=ucnv_getDisplayName(NULL
,locale
,NULL
,0, &err
);
716 if( len
!=0 || U_SUCCESS(err
)){
717 log_err("ucnv_getDisplayName(NULL) with cnv == NULL is supposed to return 0\n");
721 /* testing ucnv_setFromUCallBack() and ucnv_getFromUCallBack()*/
722 ucnv_getFromUCallBack(myConverter
, &MIA1
, &MIA1Context
);
724 log_verbose("\n---Testing ucnv_setFromUCallBack...\n");
725 ucnv_setFromUCallBack(myConverter
, otherUnicodeAction(MIA1
), &BOM
, &oldFromUAction
, &oldFromUContext
, &err
);
726 if (U_FAILURE(err
) || oldFromUAction
!= MIA1
|| oldFromUContext
!= MIA1Context
)
728 log_err("FAILURE! %s\n", myErrorName(err
));
731 ucnv_getFromUCallBack(myConverter
, &MIA1_2
, &MIA1Context2
);
732 if (MIA1_2
!= otherUnicodeAction(MIA1
) || MIA1Context2
!= &BOM
)
733 log_err("get From UCallBack failed\n");
735 log_verbose("get From UCallBack ok\n");
737 log_verbose("\n---Testing getFromUCallBack Roundtrip...\n");
738 ucnv_setFromUCallBack(myConverter
,MIA1
, MIA1Context
, &oldFromUAction
, &oldFromUContext
, &err
);
739 if (U_FAILURE(err
) || oldFromUAction
!= otherUnicodeAction(MIA1
) || oldFromUContext
!= &BOM
)
741 log_err("FAILURE! %s\n", myErrorName(err
));
744 ucnv_getFromUCallBack(myConverter
, &MIA1_2
, &MIA1Context2
);
745 if (MIA1_2
!= MIA1
|| MIA1Context2
!= MIA1Context
)
746 log_err("get From UCallBack action failed\n");
748 log_verbose("get From UCallBack action ok\n");
750 /*testing ucnv_setToUCallBack with error conditions*/
751 err
=U_ILLEGAL_ARGUMENT_ERROR
;
752 log_verbose("\n---Testing setFromUCallBack. with err != U_ZERO_ERROR..\n");
753 ucnv_setFromUCallBack(myConverter
, otherUnicodeAction(MIA1
), &BOM
, &oldFromUAction
, &oldFromUContext
, &err
);
754 ucnv_getFromUCallBack(myConverter
, &MIA1_2
, &MIA1Context2
);
755 if(MIA1_2
== otherUnicodeAction(MIA1
) || MIA1Context2
== &BOM
){
756 log_err("To setFromUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
761 /*testing ucnv_setToUCallBack() and ucnv_getToUCallBack()*/
762 ucnv_getToUCallBack(myConverter
, &MIA2
, &MIA2Context
);
764 log_verbose("\n---Testing setTo UCallBack...\n");
765 ucnv_setToUCallBack(myConverter
,otherCharAction(MIA2
), &BOM
, &oldToUAction
, &oldToUContext
, &err
);
766 if (U_FAILURE(err
) || oldToUAction
!= MIA2
|| oldToUContext
!= MIA2Context
)
768 log_err("FAILURE! %s\n", myErrorName(err
));
771 ucnv_getToUCallBack(myConverter
, &MIA2_2
, &MIA2Context2
);
772 if (MIA2_2
!= otherCharAction(MIA2
) || MIA2Context2
!= &BOM
)
773 log_err("To UCallBack failed\n");
775 log_verbose("To UCallBack ok\n");
777 log_verbose("\n---Testing setTo UCallBack Roundtrip...\n");
778 ucnv_setToUCallBack(myConverter
,MIA2
, MIA2Context
, &oldToUAction
, &oldToUContext
, &err
);
779 if (U_FAILURE(err
) || oldToUAction
!= otherCharAction(MIA2
) || oldToUContext
!= &BOM
)
780 { log_err("FAILURE! %s\n", myErrorName(err
)); }
782 ucnv_getToUCallBack(myConverter
, &MIA2_2
, &MIA2Context2
);
783 if (MIA2_2
!= MIA2
|| MIA2Context2
!= MIA2Context
)
784 log_err("To UCallBack failed\n");
786 log_verbose("To UCallBack ok\n");
788 /*testing ucnv_setToUCallBack with error conditions*/
789 err
=U_ILLEGAL_ARGUMENT_ERROR
;
790 log_verbose("\n---Testing setToUCallBack. with err != U_ZERO_ERROR..\n");
791 ucnv_setToUCallBack(myConverter
,otherCharAction(MIA2
), NULL
, &oldToUAction
, &oldToUContext
, &err
);
792 ucnv_getToUCallBack(myConverter
, &MIA2_2
, &MIA2Context2
);
793 if (MIA2_2
== otherCharAction(MIA2
) || MIA2Context2
== &BOM
){
794 log_err("To setToUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
799 /*getcodepageid testing ucnv_getCCSID() */
800 log_verbose("\n----Testing getCCSID....\n");
801 cp
= ucnv_getCCSID(myConverter
,&err
);
804 log_err("FAILURE!..... %s\n", myErrorName(err
));
806 if (cp
!= CodePageNumberToTest
[codepage_index
])
807 log_err("Codepage number test failed\n");
809 log_verbose("Codepage number test OK\n");
811 /*testing ucnv_getCCSID() with err != U_ZERO_ERROR*/
812 err
=U_ILLEGAL_ARGUMENT_ERROR
;
813 if( ucnv_getCCSID(myConverter
,&err
) != -1){
814 log_err("ucnv_getCCSID() with err != U_ZERO_ERROR is supposed to fail\n");
818 /*getCodepagePlatform testing ucnv_getPlatform()*/
819 log_verbose("\n---Testing getCodepagePlatform ..\n");
820 if (CodePagesPlatform
[codepage_index
]!=ucnv_getPlatform(myConverter
, &err
))
821 log_err("Platform codepage test failed\n");
823 log_verbose("Platform codepage test ok\n");
827 log_err("FAILURE! %s\n", myErrorName(err
));
829 /*testing ucnv_getPlatform() with err != U_ZERO_ERROR*/
830 err
= U_ILLEGAL_ARGUMENT_ERROR
;
831 if(ucnv_getPlatform(myConverter
, &err
) != UCNV_UNKNOWN
){
832 log_err("ucnv)getPlatform with err != U_ZERO_ERROR is supposed to fail\n");
839 // Note: gcc produces a compile warning if the return value from fread() is ignored.
840 size_t numRead
= fread(&BOM
, sizeof(UChar
), 1, ucs_file_in
);
843 if (BOM
!=0xFEFF && BOM
!=0xFFFE)
845 log_err("File Missing BOM...Bailing!\n");
851 /*Reads in the file*/
852 while(!feof(ucs_file_in
)&&(i
+=fread(ucs_file_buffer
+i
, sizeof(UChar
), 1, ucs_file_in
)))
854 myUChar
= ucs_file_buffer
[i
-1];
856 ucs_file_buffer
[i
-1] = (UChar
)((BOM
==0xFEFF)?myUChar
:((myUChar
>> 8) | (myUChar
<< 8))); /*adjust if BIG_ENDIAN*/
859 myUChar
= ucs_file_buffer
[i
-1];
860 ucs_file_buffer
[i
-1] = (UChar
)((BOM
==0xFEFF)?myUChar
:((myUChar
>> 8) | (myUChar
<< 8))); /*adjust if BIG_ENDIAN Corner Case*/
863 /*testing ucnv_fromUChars() and ucnv_toUChars() */
864 /*uchar1---fromUChar--->output_cp_buffer --toUChar--->uchar2*/
866 uchar1
=(UChar
*)malloc(sizeof(UChar
) * (i
+1));
867 u_uastrcpy(uchar1
,"");
868 u_strncpy(uchar1
,ucs_file_buffer
,i
);
871 uchar3
=(UChar
*)malloc(sizeof(UChar
)*(i
+1));
872 u_uastrcpy(uchar3
,"");
873 u_strncpy(uchar3
,ucs_file_buffer
,i
);
876 /*Calls the Conversion Routine */
877 testLong1
= MAX_FILE_LEN
;
878 log_verbose("\n---Testing ucnv_fromUChars()\n");
879 targetcapacity
= ucnv_fromUChars(myConverter
, output_cp_buffer
, testLong1
, uchar1
, -1, &err
);
882 log_err("\nFAILURE...%s\n", myErrorName(err
));
885 log_verbose(" ucnv_fromUChars() o.k.\n");
887 /*test the conversion routine */
888 log_verbose("\n---Testing ucnv_toUChars()\n");
889 /*call it first time for trapping the targetcapacity and size needed to allocate memory for the buffer uchar2 */
891 targetsize
= ucnv_toUChars(myConverter
,
895 strlen(output_cp_buffer
),
897 /*if there is an buffer overflow then trap the values and pass them and make the actual call*/
899 if(err
==U_BUFFER_OVERFLOW_ERROR
)
902 uchar2
=(UChar
*)malloc((targetsize
+1) * sizeof(UChar
));
903 targetsize
= ucnv_toUChars(myConverter
,
907 strlen(output_cp_buffer
),
911 log_err("ucnv_toUChars() FAILED %s\n", myErrorName(err
));
913 log_verbose(" ucnv_toUChars() o.k.\n");
915 if(u_strcmp(uchar1
,uchar2
)!=0)
916 log_err("equality test failed with conversion routine\n");
920 log_err("ERR: calling toUChars: Didn't get U_BUFFER_OVERFLOW .. expected it.\n");
922 /*Testing ucnv_fromUChars and ucnv_toUChars with error conditions*/
923 err
=U_ILLEGAL_ARGUMENT_ERROR
;
924 log_verbose("\n---Testing ucnv_fromUChars() with err != U_ZERO_ERROR\n");
925 targetcapacity
= ucnv_fromUChars(myConverter
, output_cp_buffer
, testLong1
, uchar1
, -1, &err
);
926 if (targetcapacity
!=0) {
927 log_err("\nFAILURE: ucnv_fromUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
930 log_verbose("\n---Testing ucnv_fromUChars() with converter=NULL\n");
931 targetcapacity
= ucnv_fromUChars(NULL
, output_cp_buffer
, testLong1
, uchar1
, -1, &err
);
932 if (targetcapacity
!=0 || err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
933 log_err("\nFAILURE: ucnv_fromUChars with converter=NULL is expected to fail\n");
936 log_verbose("\n---Testing ucnv_fromUChars() with sourceLength = 0\n");
937 targetcapacity
= ucnv_fromUChars(myConverter
, output_cp_buffer
, testLong1
, uchar1
, 0, &err
);
938 if (targetcapacity
!=0) {
939 log_err("\nFAILURE: ucnv_fromUChars with sourceLength 0 is expected to return 0\n");
941 log_verbose("\n---Testing ucnv_fromUChars() with targetLength = 0\n");
942 targetcapacity
= ucnv_fromUChars(myConverter
, output_cp_buffer
, 0, uchar1
, -1, &err
);
943 if (err
!= U_BUFFER_OVERFLOW_ERROR
) {
944 log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n");
946 /*toUChars with error conditions*/
947 targetsize
= ucnv_toUChars(myConverter
, uchar2
, targetsize
, output_cp_buffer
, strlen(output_cp_buffer
), &err
);
949 log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
952 targetsize
= ucnv_toUChars(myConverter
, uchar2
, -1, output_cp_buffer
, strlen(output_cp_buffer
), &err
);
953 if(targetsize
!= 0 || err
!= U_ILLEGAL_ARGUMENT_ERROR
){
954 log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n");
957 targetsize
= ucnv_toUChars(myConverter
, uchar2
, 0, output_cp_buffer
, 0, &err
);
958 if (targetsize
!=0) {
959 log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n");
962 targetsize
= ucnv_toUChars(myConverter
, NULL
, targetcapacity2
, output_cp_buffer
, strlen(output_cp_buffer
), &err
);
963 if (err
!= U_STRING_NOT_TERMINATED_WARNING
) {
964 log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n",
971 /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */
972 /*Clean up re-usable vars*/
973 log_verbose("Testing ucnv_fromUnicode().....\n");
974 tmp_ucs_buf
=ucs_file_buffer_use
;
975 ucnv_fromUnicode(myConverter
, &mytarget_1
,
976 mytarget
+ MAX_FILE_LEN
,
978 ucs_file_buffer_use
+i
,
982 consumedUni
= (UChar
*)tmp_consumedUni
;
983 (void)consumedUni
; /* Suppress set but not used warning. */
987 log_err("FAILURE! %s\n", myErrorName(err
));
990 log_verbose("ucnv_fromUnicode() o.k.\n");
992 /*Uni1 ----ToUnicode----> Cp2 ----FromUnicode---->Uni3 */
993 log_verbose("Testing ucnv_toUnicode().....\n");
994 tmp_mytarget_use
=mytarget_use
;
995 tmp_consumed
= consumed
;
996 ucnv_toUnicode(myConverter
, &my_ucs_file_buffer_1
,
997 my_ucs_file_buffer
+ MAX_FILE_LEN
,
999 mytarget_use
+ (mytarget_1
- mytarget
),
1003 consumed
= (char*)tmp_consumed
;
1006 log_err("FAILURE! %s\n", myErrorName(err
));
1009 log_verbose("ucnv_toUnicode() o.k.\n");
1012 log_verbose("\n---Testing RoundTrip ...\n");
1015 u_strncpy(uchar3
, my_ucs_file_buffer
,i
);
1018 if(u_strcmp(uchar1
,uchar3
)==0)
1019 log_verbose("Equality test o.k.\n");
1021 log_err("Equality test failed\n");
1026 log_err("uchar2 was NULL (ccapitst.c line %d), couldn't do sanity check\n", __LINE__
);
1030 if(u_strcmp(uchar2
, uchar3
)==0)
1031 log_verbose("Equality test o.k.\n");
1033 log_err("Equality test failed\n");
1036 fclose(ucs_file_in
);
1037 ucnv_close(myConverter
);
1038 if (uchar1
!= 0) free(uchar1
);
1039 if (uchar2
!= 0) free(uchar2
);
1040 if (uchar3
!= 0) free(uchar3
);
1043 free((void*)mytarget
);
1044 free((void*)output_cp_buffer
);
1045 free((void*)ucs_file_buffer
);
1046 free((void*)my_ucs_file_buffer
);
1050 #if !UCONFIG_NO_LEGACY_CONVERSION
1051 static UConverterFromUCallback
otherUnicodeAction(UConverterFromUCallback MIA
)
1053 return (MIA
==(UConverterFromUCallback
)UCNV_FROM_U_CALLBACK_STOP
)?(UConverterFromUCallback
)UCNV_FROM_U_CALLBACK_SUBSTITUTE
:(UConverterFromUCallback
)UCNV_FROM_U_CALLBACK_STOP
;
1056 static UConverterToUCallback
otherCharAction(UConverterToUCallback MIA
)
1058 return (MIA
==(UConverterToUCallback
)UCNV_TO_U_CALLBACK_STOP
)?(UConverterToUCallback
)UCNV_TO_U_CALLBACK_SUBSTITUTE
:(UConverterToUCallback
)UCNV_TO_U_CALLBACK_STOP
;
1062 static void TestFlushCache(void) {
1063 #if !UCONFIG_NO_LEGACY_CONVERSION
1064 UErrorCode err
= U_ZERO_ERROR
;
1065 UConverter
* someConverters
[5];
1068 /* flush the converter cache to get a consistent state before the flushing is tested */
1071 /*Testing ucnv_open()*/
1072 /* Note: These converters have been chosen because they do NOT
1073 encode the Latin characters (U+0041, ...), and therefore are
1074 highly unlikely to be chosen as system default codepages */
1076 someConverters
[0] = ucnv_open("ibm-1047", &err
);
1077 if (U_FAILURE(err
)) {
1078 log_data_err("FAILURE! %s\n", myErrorName(err
));
1081 someConverters
[1] = ucnv_open("ibm-1047", &err
);
1082 if (U_FAILURE(err
)) {
1083 log_data_err("FAILURE! %s\n", myErrorName(err
));
1086 someConverters
[2] = ucnv_open("ibm-1047", &err
);
1087 if (U_FAILURE(err
)) {
1088 log_data_err("FAILURE! %s\n", myErrorName(err
));
1091 someConverters
[3] = ucnv_open("gb18030", &err
);
1092 if (U_FAILURE(err
)) {
1093 log_data_err("FAILURE! %s\n", myErrorName(err
));
1096 someConverters
[4] = ucnv_open("ibm-954", &err
);
1097 if (U_FAILURE(err
)) {
1098 log_data_err("FAILURE! %s\n", myErrorName(err
));
1102 /* Testing ucnv_flushCache() */
1103 log_verbose("\n---Testing ucnv_flushCache...\n");
1104 if ((flushCount
=ucnv_flushCache())==0)
1105 log_verbose("Flush cache ok\n");
1107 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__
, flushCount
);
1109 /*testing ucnv_close() and ucnv_flushCache() */
1110 ucnv_close(someConverters
[0]);
1111 ucnv_close(someConverters
[1]);
1113 if ((flushCount
=ucnv_flushCache())==0)
1114 log_verbose("Flush cache ok\n");
1116 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__
, flushCount
);
1118 ucnv_close(someConverters
[2]);
1119 ucnv_close(someConverters
[3]);
1121 if ((flushCount
=ucnv_flushCache())==2)
1122 log_verbose("Flush cache ok\n"); /*because first, second and third are same */
1124 log_data_err("Flush Cache failed line %d, got %d expected 2 or there is an error in ucnv_close()\n",
1128 ucnv_close(someConverters
[4]);
1129 if ( (flushCount
=ucnv_flushCache())==1)
1130 log_verbose("Flush cache ok\n");
1132 log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__
, flushCount
);
1137 * Test the converter alias API, specifically the fuzzy matching of
1138 * alias names and the alias table integrity. Make sure each
1139 * converter has at least one alias (itself), and that its listed
1140 * aliases map back to itself. Check some hard-coded UTF-8 and
1141 * ISO_2022 aliases to make sure they work.
1143 static void TestAlias() {
1145 UErrorCode status
= U_ZERO_ERROR
;
1147 /* Predetermined aliases that we expect to map back to ISO_2022
1148 * and UTF-8. UPDATE THIS DATA AS NECESSARY. */
1149 const char* ISO_2022_NAMES
[] =
1150 {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2",
1151 "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"};
1152 int32_t ISO_2022_NAMES_LENGTH
= UPRV_LENGTHOF(ISO_2022_NAMES
);
1153 const char *UTF8_NAMES
[] =
1154 { "UTF-8", "utf-8", "utf8", "ibm-1208",
1155 "utf_8", "ibm1208", "cp1208" };
1156 int32_t UTF8_NAMES_LENGTH
= UPRV_LENGTHOF(UTF8_NAMES
);
1161 } CONVERTERS_NAMES
[] = {
1162 { "UTF-32BE", "UTF32_BigEndian" },
1163 { "UTF-32LE", "UTF32_LittleEndian" },
1164 { "UTF-32", "ISO-10646-UCS-4" },
1165 { "UTF32_PlatformEndian", "UTF32_PlatformEndian" },
1166 { "UTF-32", "ucs-4" }
1168 int32_t CONVERTERS_NAMES_LENGTH
= UPRV_LENGTHOF(CONVERTERS_NAMES
);
1170 /* When there are bugs in gencnval or in ucnv_io, converters can
1171 appear to have no aliases. */
1172 ncnv
= ucnv_countAvailable();
1173 log_verbose("%d converters\n", ncnv
);
1174 for (i
=0; i
<ncnv
; ++i
) {
1175 const char *name
= ucnv_getAvailableName(i
);
1177 uint16_t na
= ucnv_countAliases(name
, &status
);
1182 log_err("FAIL: Converter \"%s\" (i=%d)"
1183 " has no aliases; expect at least one\n",
1187 cnv
= ucnv_open(name
, &status
);
1188 if (U_FAILURE(status
)) {
1189 log_data_err("FAIL: Converter \"%s\" (i=%d)"
1190 " can't be opened.\n",
1194 if (strcmp(ucnv_getName(cnv
, &status
), name
) != 0
1195 && (strstr(name
, "PlatformEndian") == 0 && strstr(name
, "OppositeEndian") == 0)) {
1196 log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. "
1197 "They should be the same\n",
1198 name
, ucnv_getName(cnv
, &status
));
1203 status
= U_ZERO_ERROR
;
1204 alias0
= ucnv_getAlias(name
, 0, &status
);
1205 for (j
=1; j
<na
; ++j
) {
1207 /* Make sure each alias maps back to the the same list of
1208 aliases. Assume that if alias 0 is the same, the whole
1209 list is the same (this should always be true). */
1210 const char *mapBack
;
1212 status
= U_ZERO_ERROR
;
1213 alias
= ucnv_getAlias(name
, j
, &status
);
1214 if (status
== U_AMBIGUOUS_ALIAS_WARNING
) {
1215 log_err("FAIL: Converter \"%s\"is ambiguous\n", name
);
1218 if (alias
== NULL
) {
1219 log_err("FAIL: Converter \"%s\" -> "
1225 mapBack
= ucnv_getAlias(alias
, 0, &status
);
1227 if (mapBack
== NULL
) {
1228 log_err("FAIL: Converter \"%s\" -> "
1229 "alias[%d]=\"%s\" -> "
1230 "alias[0]=NULL, exp. \"%s\"\n",
1231 name
, j
, alias
, alias0
);
1235 if (0 != strcmp(alias0
, mapBack
)) {
1237 UBool foundAlias
= FALSE
;
1238 if (status
== U_AMBIGUOUS_ALIAS_WARNING
) {
1239 /* Make sure that we only get this mismapping when there is
1240 an ambiguous alias, and the other converter has this alias too. */
1241 for (idx
= 0; idx
< ucnv_countAliases(mapBack
, &status
); idx
++) {
1242 if (strcmp(ucnv_getAlias(mapBack
, (uint16_t)idx
, &status
), alias
) == 0) {
1248 /* else not ambiguous, and this is a real problem. foundAlias = FALSE */
1251 log_err("FAIL: Converter \"%s\" -> "
1252 "alias[%d]=\"%s\" -> "
1253 "alias[0]=\"%s\", exp. \"%s\"\n",
1254 name
, j
, alias
, mapBack
, alias0
);
1261 /* Check a list of predetermined aliases that we expect to map
1262 * back to ISO_2022 and UTF-8. */
1263 for (i
=1; i
<ISO_2022_NAMES_LENGTH
; ++i
) {
1264 const char* mapBack
= ucnv_getAlias(ISO_2022_NAMES
[i
], 0, &status
);
1266 log_data_err("Couldn't get alias for %s. You probably have no data\n", ISO_2022_NAMES
[i
]);
1269 if (0 != strcmp(mapBack
, ISO_2022_NAMES
[0])) {
1270 log_err("FAIL: \"%s\" -> \"%s\", expect \"ISO_2022,locale=ja,version=2\"\n",
1271 ISO_2022_NAMES
[i
], mapBack
);
1276 for (i
=1; i
<UTF8_NAMES_LENGTH
; ++i
) {
1277 const char* mapBack
= ucnv_getAlias(UTF8_NAMES
[i
], 0, &status
);
1279 log_data_err("Couldn't get alias for %s. You probably have no data\n", UTF8_NAMES
[i
]);
1282 if (mapBack
&& 0 != strcmp(mapBack
, UTF8_NAMES
[0])) {
1283 log_err("FAIL: \"%s\" -> \"%s\", expect UTF-8\n",
1284 UTF8_NAMES
[i
], mapBack
);
1289 * Check a list of predetermined aliases that we expect to map
1290 * back to predermined converter names.
1293 for (i
= 0; i
< CONVERTERS_NAMES_LENGTH
; ++i
) {
1294 const char* mapBack
= ucnv_getAlias(CONVERTERS_NAMES
[i
].alias
, 0, &status
);
1296 log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES
[i
].name
);
1299 if (0 != strcmp(mapBack
, CONVERTERS_NAMES
[i
].name
)) {
1300 log_err("FAIL: \"%s\" -> \"%s\", expect %s\n",
1301 CONVERTERS_NAMES
[i
].alias
, mapBack
, CONVERTERS_NAMES
[i
].name
);
1307 static void TestDuplicateAlias(void) {
1309 UErrorCode status
= U_ZERO_ERROR
;
1311 status
= U_ZERO_ERROR
;
1312 alias
= ucnv_getStandardName("Shift_JIS", "IBM", &status
);
1313 if (alias
== NULL
|| strcmp(alias
, "ibm-943") != 0 || status
!= U_AMBIGUOUS_ALIAS_WARNING
) {
1314 log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias
);
1316 status
= U_ZERO_ERROR
;
1317 alias
= ucnv_getStandardName("ibm-943", "IANA", &status
);
1318 if (alias
== NULL
|| strcmp(alias
, "Shift_JIS") != 0 || status
!= U_AMBIGUOUS_ALIAS_WARNING
) {
1319 log_data_err("FAIL: Didn't get Shift_JIS for ibm-943 {IANA}. Got %s\n", alias
);
1321 status
= U_ZERO_ERROR
;
1322 alias
= ucnv_getStandardName("ibm-943_P130-2000", "IANA", &status
);
1323 if (alias
!= NULL
|| status
== U_AMBIGUOUS_ALIAS_WARNING
) {
1324 log_data_err("FAIL: Didn't get NULL for ibm-943 {IANA}. Got %s\n", alias
);
1329 /* Test safe clone callback */
1331 static uint32_t TSCC_nextSerial()
1333 static uint32_t n
= 1;
1340 uint32_t magic
; /* 0xC0FFEE to identify that the object is OK */
1341 uint32_t serial
; /* minted from nextSerial, above */
1342 UBool wasClosed
; /* close happened on the object */
1345 static TSCCContext
*TSCC_clone(TSCCContext
*ctx
)
1347 TSCCContext
*newCtx
= (TSCCContext
*)malloc(sizeof(TSCCContext
));
1349 newCtx
->serial
= TSCC_nextSerial();
1350 newCtx
->wasClosed
= 0;
1351 newCtx
->magic
= 0xC0FFEE;
1353 log_verbose("TSCC_clone: %p:%d -> new context %p:%d\n", ctx
, ctx
->serial
, newCtx
, newCtx
->serial
);
1358 #if !UCONFIG_NO_LEGACY_CONVERSION
1359 static void TSCC_fromU(const void *context
,
1360 UConverterFromUnicodeArgs
*fromUArgs
,
1361 const UChar
* codeUnits
,
1364 UConverterCallbackReason reason
,
1367 TSCCContext
*ctx
= (TSCCContext
*)context
;
1368 UConverterFromUCallback junkFrom
;
1370 log_verbose("TSCC_fromU: Context %p:%d called, reason %d on cnv %p\n", ctx
, ctx
->serial
, reason
, fromUArgs
->converter
);
1372 if(ctx
->magic
!= 0xC0FFEE) {
1373 log_err("TSCC_fromU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx
,ctx
->serial
, ctx
->magic
);
1377 if(reason
== UCNV_CLONE
) {
1378 UErrorCode subErr
= U_ZERO_ERROR
;
1379 TSCCContext
*newCtx
;
1380 TSCCContext
*junkCtx
;
1381 TSCCContext
**pjunkCtx
= &junkCtx
;
1384 log_verbose("TSCC_fromU: cloning..\n");
1385 newCtx
= TSCC_clone(ctx
);
1387 if(newCtx
== NULL
) {
1388 log_err("TSCC_fromU: internal clone failed on %p\n", ctx
);
1392 ucnv_getFromUCallBack(fromUArgs
->converter
, &junkFrom
, (const void**)pjunkCtx
);
1393 ucnv_setFromUCallBack(fromUArgs
->converter
, junkFrom
, newCtx
, NULL
, NULL
, &subErr
);
1395 if(U_FAILURE(subErr
)) {
1400 if(reason
== UCNV_CLOSE
) {
1401 log_verbose("TSCC_fromU: Context %p:%d closing\n", ctx
, ctx
->serial
);
1402 ctx
->wasClosed
= TRUE
;
1406 static void TSCC_toU(const void *context
,
1407 UConverterToUnicodeArgs
*toUArgs
,
1408 const char* codeUnits
,
1410 UConverterCallbackReason reason
,
1413 TSCCContext
*ctx
= (TSCCContext
*)context
;
1414 UConverterToUCallback junkFrom
;
1416 log_verbose("TSCC_toU: Context %p:%d called, reason %d on cnv %p\n", ctx
, ctx
->serial
, reason
, toUArgs
->converter
);
1418 if(ctx
->magic
!= 0xC0FFEE) {
1419 log_err("TSCC_toU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx
,ctx
->serial
, ctx
->magic
);
1423 if(reason
== UCNV_CLONE
) {
1424 UErrorCode subErr
= U_ZERO_ERROR
;
1425 TSCCContext
*newCtx
;
1426 TSCCContext
*junkCtx
;
1427 TSCCContext
**pjunkCtx
= &junkCtx
;
1430 log_verbose("TSCC_toU: cloning..\n");
1431 newCtx
= TSCC_clone(ctx
);
1433 if(newCtx
== NULL
) {
1434 log_err("TSCC_toU: internal clone failed on %p\n", ctx
);
1438 ucnv_getToUCallBack(toUArgs
->converter
, &junkFrom
, (const void**)pjunkCtx
);
1439 ucnv_setToUCallBack(toUArgs
->converter
, junkFrom
, newCtx
, NULL
, NULL
, &subErr
);
1441 if(U_FAILURE(subErr
)) {
1446 if(reason
== UCNV_CLOSE
) {
1447 log_verbose("TSCC_toU: Context %p:%d closing\n", ctx
, ctx
->serial
);
1448 ctx
->wasClosed
= TRUE
;
1452 static void TSCC_init(TSCCContext
*q
)
1454 q
->magic
= 0xC0FFEE;
1455 q
->serial
= TSCC_nextSerial();
1459 static void TSCC_print_log(TSCCContext
*q
, const char *name
)
1462 log_verbose("TSCContext: %s is NULL!!\n", name
);
1464 if(q
->magic
!= 0xC0FFEE) {
1465 log_err("TSCCContext: %p:%d's magic is %x, supposed to be 0xC0FFEE\n",
1466 q
,q
->serial
, q
->magic
);
1468 log_verbose("TSCCContext %p:%d=%s - magic %x, %s\n",
1469 q
, q
->serial
, name
, q
->magic
, q
->wasClosed
?"CLOSED":"open");
1473 static void TestConvertSafeCloneCallback()
1475 UErrorCode err
= U_ZERO_ERROR
;
1476 TSCCContext from1
, to1
;
1477 TSCCContext
*from2
, *from3
, *to2
, *to3
;
1478 TSCCContext
**pfrom2
= &from2
, **pfrom3
= &from3
, **pto2
= &to2
, **pto3
= &to3
;
1480 int32_t hunkSize
= 8192;
1481 UConverterFromUCallback junkFrom
;
1482 UConverterToUCallback junkTo
;
1483 UConverter
*conv1
, *conv2
= NULL
;
1485 conv1
= ucnv_open("iso-8859-3", &err
);
1487 if(U_FAILURE(err
)) {
1488 log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err
));
1492 log_verbose("Opened conv1=%p\n", conv1
);
1497 TSCC_print_log(&from1
, "from1");
1498 TSCC_print_log(&to1
, "to1");
1500 ucnv_setFromUCallBack(conv1
, TSCC_fromU
, &from1
, NULL
, NULL
, &err
);
1501 log_verbose("Set from1 on conv1\n");
1502 TSCC_print_log(&from1
, "from1");
1504 ucnv_setToUCallBack(conv1
, TSCC_toU
, &to1
, NULL
, NULL
, &err
);
1505 log_verbose("Set to1 on conv1\n");
1506 TSCC_print_log(&to1
, "to1");
1508 conv2
= ucnv_safeClone(conv1
, hunk
, &hunkSize
, &err
);
1509 if(U_FAILURE(err
)) {
1510 log_err("safeClone failed: %s\n", u_errorName(err
));
1513 log_verbose("Cloned to conv2=%p.\n", conv2
);
1515 /********** from *********************/
1516 ucnv_getFromUCallBack(conv2
, &junkFrom
, (const void**)pfrom2
);
1517 ucnv_getFromUCallBack(conv1
, &junkFrom
, (const void**)pfrom3
);
1519 TSCC_print_log(from2
, "from2");
1520 TSCC_print_log(from3
, "from3(==from1)");
1523 log_err("FAIL! from2 is null \n");
1528 log_err("FAIL! from3 is null \n");
1532 if(from3
!= (&from1
) ) {
1533 log_err("FAIL! conv1's FROM context changed!\n");
1536 if(from2
== (&from1
) ) {
1537 log_err("FAIL! conv1's FROM context is the same as conv2's!\n");
1540 if(from1
.wasClosed
) {
1541 log_err("FAIL! from1 is closed \n");
1544 if(from2
->wasClosed
) {
1545 log_err("FAIL! from2 was closed\n");
1548 /********** to *********************/
1549 ucnv_getToUCallBack(conv2
, &junkTo
, (const void**)pto2
);
1550 ucnv_getToUCallBack(conv1
, &junkTo
, (const void**)pto3
);
1552 TSCC_print_log(to2
, "to2");
1553 TSCC_print_log(to3
, "to3(==to1)");
1556 log_err("FAIL! to2 is null \n");
1561 log_err("FAIL! to3 is null \n");
1565 if(to3
!= (&to1
) ) {
1566 log_err("FAIL! conv1's TO context changed!\n");
1569 if(to2
== (&to1
) ) {
1570 log_err("FAIL! conv1's TO context is the same as conv2's!\n");
1574 log_err("FAIL! to1 is closed \n");
1577 if(to2
->wasClosed
) {
1578 log_err("FAIL! to2 was closed\n");
1581 /*************************************/
1584 log_verbose("ucnv_closed (conv1)\n");
1585 TSCC_print_log(&from1
, "from1");
1586 TSCC_print_log(from2
, "from2");
1587 TSCC_print_log(&to1
, "to1");
1588 TSCC_print_log(to2
, "to2");
1590 if(from1
.wasClosed
== FALSE
) {
1591 log_err("FAIL! from1 is NOT closed \n");
1594 if(from2
->wasClosed
) {
1595 log_err("FAIL! from2 was closed\n");
1598 if(to1
.wasClosed
== FALSE
) {
1599 log_err("FAIL! to1 is NOT closed \n");
1602 if(to2
->wasClosed
) {
1603 log_err("FAIL! to2 was closed\n");
1607 log_verbose("ucnv_closed (conv2)\n");
1609 TSCC_print_log(&from1
, "from1");
1610 TSCC_print_log(from2
, "from2");
1612 if(from1
.wasClosed
== FALSE
) {
1613 log_err("FAIL! from1 is NOT closed \n");
1616 if(from2
->wasClosed
== FALSE
) {
1617 log_err("FAIL! from2 was NOT closed\n");
1620 TSCC_print_log(&to1
, "to1");
1621 TSCC_print_log(to2
, "to2");
1623 if(to1
.wasClosed
== FALSE
) {
1624 log_err("FAIL! to1 is NOT closed \n");
1627 if(to2
->wasClosed
== FALSE
) {
1628 log_err("FAIL! to2 was NOT closed\n");
1632 free(to2
); /* to1 is stack based */
1634 if(from2
!= (&from1
)) {
1635 free(from2
); /* from1 is stack based */
1641 containsAnyOtherByte(uint8_t *p
, int32_t length
, uint8_t b
) {
1652 static void TestConvertSafeClone()
1654 /* one 'regular' & all the 'private stateful' converters */
1655 static const char *const names
[] = {
1656 #if !UCONFIG_NO_LEGACY_CONVERSION
1658 "ISO_2022,locale=zh,version=1",
1661 #if !UCONFIG_NO_LEGACY_CONVERSION
1665 "ISO_2022,locale=kr,version=1",
1666 "ISO_2022,locale=jp,version=2",
1670 #if !UCONFIG_NO_LEGACY_CONVERSION
1671 "IMAP-mailbox-name",
1678 /* store the actual sizes of each converter */
1679 int32_t actualSizes
[UPRV_LENGTHOF(names
)];
1681 static const int32_t bufferSizes
[] = {
1682 U_CNV_SAFECLONE_BUFFERSIZE
,
1683 (int32_t)(3*sizeof(UConverter
))/2, /* 1.5*sizeof(UConverter) */
1684 (int32_t)sizeof(UConverter
)/2 /* 0.5*sizeof(UConverter) */
1687 char charBuffer
[21]; /* Leave at an odd number for alignment testing */
1688 uint8_t buffer
[3] [U_CNV_SAFECLONE_BUFFERSIZE
];
1689 int32_t bufferSize
, maxBufferSize
;
1690 const char *maxName
;
1691 UConverter
* cnv
, *cnv2
;
1695 const char *pConstCharBuffer
;
1696 const char *charBufferLimit
= charBuffer
+ UPRV_LENGTHOF(charBuffer
);
1697 UChar uniBuffer
[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */
1698 UChar uniCharBuffer
[20];
1699 char charSourceBuffer
[] = { 0x1b, 0x24, 0x42 };
1700 const char *pCharSource
= charSourceBuffer
;
1701 const char *pCharSourceLimit
= charSourceBuffer
+ sizeof(charSourceBuffer
);
1702 UChar
*pUCharTarget
= uniCharBuffer
;
1703 UChar
*pUCharTargetLimit
= uniCharBuffer
+ UPRV_LENGTHOF(uniCharBuffer
);
1704 const UChar
* pUniBuffer
;
1705 const UChar
*uniBufferLimit
= uniBuffer
+ UPRV_LENGTHOF(uniBuffer
);
1709 cnv
= ucnv_open(names
[0], &err
);
1710 if(U_SUCCESS(err
)) {
1711 /* Check the various error & informational states: */
1713 /* Null status - just returns NULL */
1714 bufferSize
= U_CNV_SAFECLONE_BUFFERSIZE
;
1715 if (NULL
!= ucnv_safeClone(cnv
, buffer
[0], &bufferSize
, NULL
))
1717 log_err("FAIL: Cloned converter failed to deal correctly with null status\n");
1719 /* error status - should return 0 & keep error the same */
1720 err
= U_MEMORY_ALLOCATION_ERROR
;
1721 if (NULL
!= ucnv_safeClone(cnv
, buffer
[0], &bufferSize
, &err
) || err
!= U_MEMORY_ALLOCATION_ERROR
)
1723 log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n");
1727 /* Null buffer size pointer is ok */
1728 if (NULL
== (cnv2
= ucnv_safeClone(cnv
, buffer
[0], NULL
, &err
)) || U_FAILURE(err
))
1730 log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n");
1735 /* buffer size pointer is 0 - fill in pbufferSize with a size */
1737 if (NULL
!= ucnv_safeClone(cnv
, buffer
[0], &bufferSize
, &err
) || U_FAILURE(err
) || bufferSize
<= 0)
1739 log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n");
1741 /* Verify our define is large enough */
1742 if (U_CNV_SAFECLONE_BUFFERSIZE
< bufferSize
)
1744 log_err("FAIL: Pre-calculated buffer size is too small\n");
1746 /* Verify we can use this run-time calculated size */
1747 if (NULL
== (cnv2
= ucnv_safeClone(cnv
, buffer
[0], &bufferSize
, &err
)) || U_FAILURE(err
))
1749 log_err("FAIL: Converter can't be cloned with run-time size\n");
1755 /* size one byte too small - should allocate & let us know */
1757 if (NULL
== (cnv2
= ucnv_safeClone(cnv
, NULL
, &bufferSize
, &err
)) || err
!= U_SAFECLONE_ALLOCATED_WARNING
)
1759 log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n");
1766 bufferSize
= U_CNV_SAFECLONE_BUFFERSIZE
;
1768 /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */
1769 if (NULL
== (cnv2
= ucnv_safeClone(cnv
, NULL
, &bufferSize
, &err
)) || err
!= U_SAFECLONE_ALLOCATED_WARNING
)
1771 log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n");
1779 /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */
1780 if (NULL
!= ucnv_safeClone(NULL
, buffer
[0], &bufferSize
, &err
) || err
!= U_ILLEGAL_ARGUMENT_ERROR
)
1782 log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n");
1791 /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/
1793 for(j
= 0; j
< UPRV_LENGTHOF(bufferSizes
); ++j
) {
1794 for (idx
= 0; idx
< UPRV_LENGTHOF(names
); idx
++)
1797 cnv
= ucnv_open(names
[idx
], &err
);
1798 if(U_FAILURE(err
)) {
1799 log_data_err("ucnv_open(\"%s\") failed - %s\n", names
[idx
], u_errorName(err
));
1804 /* preflight to get maxBufferSize */
1805 actualSizes
[idx
] = 0;
1806 ucnv_safeClone(cnv
, NULL
, &actualSizes
[idx
], &err
);
1807 if(actualSizes
[idx
] > maxBufferSize
) {
1808 maxBufferSize
= actualSizes
[idx
];
1809 maxName
= names
[idx
];
1813 memset(buffer
, 0xaa, sizeof(buffer
));
1815 bufferSize
= bufferSizes
[j
];
1816 cnv2
= ucnv_safeClone(cnv
, buffer
[1], &bufferSize
, &err
);
1818 /* close the original immediately to make sure that the clone works by itself */
1821 if( actualSizes
[idx
] <= (bufferSizes
[j
] - (int32_t)sizeof(UAlignedMemory
)) &&
1822 err
== U_SAFECLONE_ALLOCATED_WARNING
1824 log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names
[idx
]);
1827 /* check if the clone function overwrote any bytes that it is not supposed to touch */
1828 if(bufferSize
<= bufferSizes
[j
]) {
1829 /* used the stack buffer */
1830 if( containsAnyOtherByte(buffer
[0], (int32_t)sizeof(buffer
[0]), 0xaa) ||
1831 containsAnyOtherByte(buffer
[1]+bufferSize
, (int32_t)(sizeof(buffer
)-(sizeof(buffer
[0])+bufferSize
)), 0xaa)
1833 log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n",
1834 names
[idx
], bufferSize
, bufferSizes
[j
]);
1837 /* heap-allocated the clone */
1838 if(containsAnyOtherByte(buffer
[0], (int32_t)sizeof(buffer
), 0xaa)) {
1839 log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n",
1840 names
[idx
], bufferSize
, bufferSizes
[j
]);
1844 pCharBuffer
= charBuffer
;
1845 pUniBuffer
= uniBuffer
;
1847 ucnv_fromUnicode(cnv2
,
1856 log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err
));
1858 ucnv_toUnicode(cnv2
,
1869 log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err
));
1872 pConstCharBuffer
= charBuffer
;
1873 if (uniBuffer
[0] != ucnv_getNextUChar(cnv2
, &pConstCharBuffer
, pCharBuffer
, &err
))
1875 log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err
));
1881 log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1882 sizeof(UConverter
), maxBufferSize
, maxName
, (int)U_CNV_SAFECLONE_BUFFERSIZE
);
1883 if(maxBufferSize
> U_CNV_SAFECLONE_BUFFERSIZE
) {
1884 log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1885 maxBufferSize
, maxName
, (int)U_CNV_SAFECLONE_BUFFERSIZE
);
1889 static void TestCCSID() {
1890 #if !UCONFIG_NO_LEGACY_CONVERSION
1892 UErrorCode errorCode
;
1893 int32_t ccsids
[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 };
1896 for(i
=0; i
<UPRV_LENGTHOF(ccsids
); ++i
) {
1899 errorCode
=U_ZERO_ERROR
;
1900 cnv
=ucnv_openCCSID(ccsid
, UCNV_IBM
, &errorCode
);
1901 if(U_FAILURE(errorCode
)) {
1902 log_data_err("error: ucnv_openCCSID(%ld) failed (%s)\n", ccsid
, u_errorName(errorCode
));
1906 if(ccsid
!=ucnv_getCCSID(cnv
, &errorCode
)) {
1907 log_err("error: ucnv_getCCSID(ucnv_openCCSID(%ld))=%ld\n", ccsid
, ucnv_getCCSID(cnv
, &errorCode
));
1910 /* skip gb18030(ccsid 1392) */
1911 if(ccsid
!= 1392 && UCNV_IBM
!=ucnv_getPlatform(cnv
, &errorCode
)) {
1912 log_err("error: ucnv_getPlatform(ucnv_openCCSID(%ld))=%ld!=UCNV_IBM\n", ccsid
, ucnv_getPlatform(cnv
, &errorCode
));
1920 /* jitterbug 932: ucnv_convert() bugs --------------------------------------- */
1922 /* CHUNK_SIZE defined in common\ucnv.c: */
1923 #define CHUNK_SIZE 1024
1925 static void bug1(void);
1926 static void bug2(void);
1927 static void bug3(void);
1932 bug1(); /* Unicode intermediate buffer straddle bug */
1933 bug2(); /* pre-flighting size incorrect caused by simple overflow */
1934 bug3(); /* pre-flighting size incorrect caused by expansion overflow */
1938 * jitterbug 932: test chunking boundary conditions in
1940 int32_t ucnv_convert(const char *toConverterName,
1941 const char *fromConverterName,
1948 * See discussions on the icu mailing list in
1949 * 2001-April with the subject "converter 'flush' question".
1951 * Bug report and test code provided by Edward J. Batutis.
1955 #if !UCONFIG_NO_LEGACY_CONVERSION
1956 char char_in
[CHUNK_SIZE
+32];
1957 char char_out
[CHUNK_SIZE
*2];
1959 /* GB 18030 equivalent of U+10000 is 90308130 */
1960 static const char test_seq
[]={ (char)0x90u
, 0x30, (char)0x81u
, 0x30 };
1962 UErrorCode err
= U_ZERO_ERROR
;
1963 int32_t i
, test_seq_len
= sizeof(test_seq
);
1966 * causes straddle bug in Unicode intermediate buffer by sliding the test sequence forward
1967 * until the straddle bug appears. I didn't want to hard-code everything so this test could
1968 * be expanded - however this is the only type of straddle bug I can think of at the moment -
1969 * a high surrogate in the last position of the Unicode intermediate buffer. Apparently no
1970 * other Unicode sequences cause a bug since combining sequences are not supported by the
1974 for (i
= test_seq_len
; i
>= 0; i
--) {
1975 /* put character sequence into input buffer */
1976 memset(char_in
, 0x61, sizeof(char_in
)); /* GB 18030 'a' */
1977 memcpy(char_in
+ (CHUNK_SIZE
- i
), test_seq
, test_seq_len
);
1979 /* do the conversion */
1980 ucnv_convert("us-ascii", /* out */
1989 if (err
== U_TRUNCATED_CHAR_FOUND
) {
1990 /* this happens when surrogate pair straddles the intermediate buffer in
1991 * T_UConverter_fromCodepageToCodepage */
1992 log_err("error j932 bug 1: expected success, got U_TRUNCATED_CHAR_FOUND\n");
1998 /* bug2: pre-flighting loop bug: simple overflow causes bug */
2001 /* US-ASCII "1234567890" */
2002 static const char source
[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 };
2003 #if !UCONFIG_ONLY_HTML_CONVERSION
2004 static const char sourceUTF8
[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (char)0xef, (char)0x80, (char)0x80 };
2005 static const char sourceUTF32
[]={ 0x00, 0x00, 0x00, 0x30,
2006 0x00, 0x00, 0x00, 0x31,
2007 0x00, 0x00, 0x00, 0x32,
2008 0x00, 0x00, 0x00, 0x33,
2009 0x00, 0x00, 0x00, 0x34,
2010 0x00, 0x00, 0x00, 0x35,
2011 0x00, 0x00, 0x00, 0x36,
2012 0x00, 0x00, 0x00, 0x37,
2013 0x00, 0x00, 0x00, 0x38,
2014 0x00, 0x00, (char)0xf0, 0x00};
2017 static char target
[5];
2019 UErrorCode err
= U_ZERO_ERROR
;
2022 /* do the conversion */
2023 size
= ucnv_convert("iso-8859-1", /* out */
2024 "us-ascii", /* in */
2032 /* bug2: size is 5, should be 10 */
2033 log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting size %d instead of 10\n", size
);
2036 #if !UCONFIG_ONLY_HTML_CONVERSION
2038 /* do the conversion */
2039 size
= ucnv_convert("UTF-32BE", /* out */
2048 /* bug2: size is 5, should be 32 */
2049 log_err("error j932 bug 2 UTF-8->UTF-32BE: got preflighting size %d instead of 32\n", size
);
2053 /* do the conversion */
2054 size
= ucnv_convert("UTF-8", /* out */
2055 "UTF-32BE", /* in */
2059 sizeof(sourceUTF32
),
2063 /* bug2: size is 5, should be 12 */
2064 log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size
);
2070 * bug3: when the characters expand going from source to target codepage
2071 * you get bug3 in addition to bug2
2075 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
2076 char char_in
[CHUNK_SIZE
*4];
2078 UErrorCode err
= U_ZERO_ERROR
;
2082 * first get the buggy size from bug2 then
2083 * compare it to buggy size with an expansion
2085 memset(char_in
, 0x61, sizeof(char_in
)); /* US-ASCII 'a' */
2087 /* do the conversion */
2088 size
= ucnv_convert("lmbcs", /* out */
2089 "us-ascii", /* in */
2096 if ( size
!= sizeof(char_in
) ) {
2098 * bug2: size is 0x2805 (CHUNK_SIZE*2+5 - maybe 5 is the size of the overflow buffer
2099 * in the converter?), should be CHUNK_SIZE*4
2101 * Markus 2001-05-18: 5 is the size of our target[] here, ucnv_convert() did not reset targetSize...
2103 log_data_err("error j932 bug 2/3a: expected preflighting size 0x%04x, got 0x%04x\n", sizeof(char_in
), size
);
2107 * now do the conversion with expansion
2108 * ascii 0x08 expands to 0x0F 0x28 in lmbcs
2110 memset(char_in
, 8, sizeof(char_in
));
2113 /* do the conversion */
2114 size
= ucnv_convert("lmbcs", /* out */
2115 "us-ascii", /* in */
2122 /* expect 2X expansion */
2123 if ( size
!= sizeof(char_in
) * 2 ) {
2126 * bug2 would lead us to expect 0x2805, but it isn't that either, it is 0x3c05:
2128 log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in
) * 2, size
);
2134 convertExStreaming(UConverter
*srcCnv
, UConverter
*targetCnv
,
2135 const char *src
, int32_t srcLength
,
2136 const char *expectTarget
, int32_t expectTargetLength
,
2138 const char *testName
,
2139 UErrorCode expectCode
) {
2140 UChar pivotBuffer
[CHUNK_SIZE
];
2141 UChar
*pivotSource
, *pivotTarget
;
2142 const UChar
*pivotLimit
;
2144 char targetBuffer
[CHUNK_SIZE
];
2146 const char *srcLimit
, *finalSrcLimit
, *targetLimit
;
2148 int32_t targetLength
;
2152 UErrorCode errorCode
;
2155 if(chunkSize
>CHUNK_SIZE
) {
2156 chunkSize
=CHUNK_SIZE
;
2159 pivotSource
=pivotTarget
=pivotBuffer
;
2160 pivotLimit
=pivotBuffer
+chunkSize
;
2162 finalSrcLimit
=src
+srcLength
;
2163 target
=targetBuffer
;
2164 targetLimit
=targetBuffer
+chunkSize
;
2166 ucnv_resetToUnicode(srcCnv
);
2167 ucnv_resetFromUnicode(targetCnv
);
2169 errorCode
=U_ZERO_ERROR
;
2172 /* convert, streaming-style (both converters and pivot keep state) */
2174 /* for testing, give ucnv_convertEx() at most <chunkSize> input/pivot/output units at a time */
2175 if(src
+chunkSize
<=finalSrcLimit
) {
2176 srcLimit
=src
+chunkSize
;
2178 srcLimit
=finalSrcLimit
;
2180 ucnv_convertEx(targetCnv
, srcCnv
,
2181 &target
, targetLimit
,
2183 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotLimit
,
2184 FALSE
, flush
, &errorCode
);
2185 targetLength
=(int32_t)(target
-targetBuffer
);
2186 if(target
>targetLimit
) {
2187 log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n",
2188 testName
, chunkSize
, target
, targetLimit
);
2189 break; /* TODO: major problem! */
2191 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2192 /* continue converting another chunk */
2193 errorCode
=U_ZERO_ERROR
;
2194 if(targetLength
+chunkSize
<=sizeof(targetBuffer
)) {
2195 targetLimit
=target
+chunkSize
;
2197 targetLimit
=targetBuffer
+sizeof(targetBuffer
);
2199 } else if(U_FAILURE(errorCode
)) {
2205 } else if(src
==finalSrcLimit
&& pivotSource
==pivotTarget
) {
2206 /* all consumed, now flush without input (separate from conversion for testing) */
2211 if(!(errorCode
==expectCode
|| (expectCode
==U_ZERO_ERROR
&& errorCode
==U_STRING_NOT_TERMINATED_WARNING
))) {
2212 log_err("ucnv_convertEx(%s) chunk[%d] results in %s instead of %s\n",
2213 testName
, chunkSize
, u_errorName(errorCode
), u_errorName(expectCode
));
2214 } else if(targetLength
!=expectTargetLength
) {
2215 log_err("ucnv_convertEx(%s) chunk[%d] writes %d bytes instead of %d\n",
2216 testName
, chunkSize
, targetLength
, expectTargetLength
);
2217 } else if(memcmp(targetBuffer
, expectTarget
, targetLength
)!=0) {
2218 log_err("ucnv_convertEx(%s) chunk[%d] writes different bytes than expected\n",
2219 testName
, chunkSize
);
2224 convertExMultiStreaming(UConverter
*srcCnv
, UConverter
*targetCnv
,
2225 const char *src
, int32_t srcLength
,
2226 const char *expectTarget
, int32_t expectTargetLength
,
2227 const char *testName
,
2228 UErrorCode expectCode
) {
2229 convertExStreaming(srcCnv
, targetCnv
,
2231 expectTarget
, expectTargetLength
,
2232 1, testName
, expectCode
);
2233 convertExStreaming(srcCnv
, targetCnv
,
2235 expectTarget
, expectTargetLength
,
2236 3, testName
, expectCode
);
2237 convertExStreaming(srcCnv
, targetCnv
,
2239 expectTarget
, expectTargetLength
,
2240 7, testName
, expectCode
);
2243 static void TestConvertEx() {
2244 #if !UCONFIG_NO_LEGACY_CONVERSION
2245 static const uint8_t
2247 /* 4e00 30a1 ff61 0410 */
2248 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2251 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2255 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2256 * SUB, SUB, 0x40, SUB, SUB, 0x40
2258 0xfc, 0xfc, 0xfc, 0xfc, 0x40, 0xfc, 0xfc, 0xfc, 0xfc, 0x40
2261 char srcBuffer
[100], targetBuffer
[100];
2266 UChar pivotBuffer
[100];
2267 UChar
*pivotSource
, *pivotTarget
;
2269 UConverter
*cnv1
, *cnv2
;
2270 UErrorCode errorCode
;
2272 errorCode
=U_ZERO_ERROR
;
2273 cnv1
=ucnv_open("UTF-8", &errorCode
);
2274 if(U_FAILURE(errorCode
)) {
2275 log_err("unable to open a UTF-8 converter - %s\n", u_errorName(errorCode
));
2279 cnv2
=ucnv_open("Shift-JIS", &errorCode
);
2280 if(U_FAILURE(errorCode
)) {
2281 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode
));
2286 /* test ucnv_convertEx() with streaming conversion style */
2287 convertExMultiStreaming(cnv1
, cnv2
,
2288 (const char *)utf8
, sizeof(utf8
), (const char *)shiftJIS
, sizeof(shiftJIS
),
2289 "UTF-8 -> Shift-JIS", U_ZERO_ERROR
);
2291 convertExMultiStreaming(cnv2
, cnv1
,
2292 (const char *)shiftJIS
, sizeof(shiftJIS
), (const char *)utf8
, sizeof(utf8
),
2293 "Shift-JIS -> UTF-8", U_ZERO_ERROR
);
2295 /* U_ZERO_ERROR because by default the SUB callbacks are set */
2296 convertExMultiStreaming(cnv1
, cnv2
,
2297 (const char *)shiftJIS
, sizeof(shiftJIS
), (const char *)errorTarget
, sizeof(errorTarget
),
2298 "shiftJIS[] UTF-8 -> Shift-JIS", U_ZERO_ERROR
);
2300 /* test some simple conversions */
2302 /* NUL-terminated source and target */
2303 errorCode
=U_STRING_NOT_TERMINATED_WARNING
;
2304 memcpy(srcBuffer
, utf8
, sizeof(utf8
));
2305 srcBuffer
[sizeof(utf8
)]=0;
2307 target
=targetBuffer
;
2308 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2309 NULL
, NULL
, NULL
, NULL
, TRUE
, TRUE
, &errorCode
);
2310 if( errorCode
!=U_ZERO_ERROR
||
2311 target
-targetBuffer
!=sizeof(shiftJIS
) ||
2313 memcmp(targetBuffer
, shiftJIS
, sizeof(shiftJIS
))!=0
2315 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s - writes %d bytes, expect %d\n",
2316 u_errorName(errorCode
), target
-targetBuffer
, sizeof(shiftJIS
));
2319 /* NUL-terminated source and U_STRING_NOT_TERMINATED_WARNING */
2320 errorCode
=U_AMBIGUOUS_ALIAS_WARNING
;
2321 memset(targetBuffer
, 0xff, sizeof(targetBuffer
));
2323 target
=targetBuffer
;
2324 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(shiftJIS
), &src
, NULL
,
2325 NULL
, NULL
, NULL
, NULL
, TRUE
, TRUE
, &errorCode
);
2326 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
||
2327 target
-targetBuffer
!=sizeof(shiftJIS
) ||
2328 *target
!=(char)0xff ||
2329 memcmp(targetBuffer
, shiftJIS
, sizeof(shiftJIS
))!=0
2331 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s, expect U_STRING_NOT_TERMINATED_WARNING - writes %d bytes, expect %d\n",
2332 u_errorName(errorCode
), target
-targetBuffer
, sizeof(shiftJIS
));
2336 errorCode
=U_MESSAGE_PARSE_ERROR
;
2338 target
=targetBuffer
;
2339 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2340 NULL
, NULL
, NULL
, NULL
, TRUE
, TRUE
, &errorCode
);
2341 if(errorCode
!=U_MESSAGE_PARSE_ERROR
) {
2342 log_err("ucnv_convertEx(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode
));
2345 /* pivotLimit==pivotStart */
2346 errorCode
=U_ZERO_ERROR
;
2347 pivotSource
=pivotTarget
=pivotBuffer
;
2348 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2349 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
, TRUE
, TRUE
, &errorCode
);
2350 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2351 log_err("ucnv_convertEx(pivotLimit==pivotStart) sets %s\n", u_errorName(errorCode
));
2354 /* *pivotSource==NULL */
2355 errorCode
=U_ZERO_ERROR
;
2357 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2358 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
+1, TRUE
, TRUE
, &errorCode
);
2359 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2360 log_err("ucnv_convertEx(*pivotSource==NULL) sets %s\n", u_errorName(errorCode
));
2364 errorCode
=U_ZERO_ERROR
;
2366 pivotSource
=pivotBuffer
;
2367 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2368 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
+1, TRUE
, TRUE
, &errorCode
);
2369 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2370 log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode
));
2373 /* streaming conversion without a pivot buffer */
2374 errorCode
=U_ZERO_ERROR
;
2376 pivotSource
=pivotBuffer
;
2377 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2378 NULL
, &pivotSource
, &pivotTarget
, pivotBuffer
+1, TRUE
, FALSE
, &errorCode
);
2379 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2380 log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode
));
2388 /* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */
2389 static const char *const badUTF8
[]={
2393 /* truncated multi-byte sequences */
2430 "\xfc\x80\x80\x80\x80",
2432 /* complete sequences but non-shortest forms or out of range etc. */
2438 "\xf8\x80\x80\x80\x80",
2439 "\xfc\x80\x80\x80\x80\x80",
2444 #define ARG_CHAR_ARR_SIZE 8
2446 /* get some character that can be converted and convert it */
2447 static UBool
getTestChar(UConverter
*cnv
, const char *converterName
,
2448 char charUTF8
[4], int32_t *pCharUTF8Length
,
2449 char char0
[ARG_CHAR_ARR_SIZE
], int32_t *pChar0Length
,
2450 char char1
[ARG_CHAR_ARR_SIZE
], int32_t *pChar1Length
) {
2451 UChar utf16
[U16_MAX_LENGTH
];
2452 int32_t utf16Length
;
2454 const UChar
*utf16Source
;
2459 UErrorCode errorCode
;
2461 errorCode
=U_ZERO_ERROR
;
2462 set
=uset_open(1, 0);
2463 ucnv_getUnicodeSet(cnv
, set
, UCNV_ROUNDTRIP_SET
, &errorCode
);
2464 c
=uset_charAt(set
, uset_size(set
)/2);
2468 U16_APPEND_UNSAFE(utf16
, utf16Length
, c
);
2470 U8_APPEND_UNSAFE(charUTF8
, *pCharUTF8Length
, c
);
2474 ucnv_fromUnicode(cnv
,
2475 &target
, char0
+ARG_CHAR_ARR_SIZE
,
2476 &utf16Source
, utf16
+utf16Length
,
2477 NULL
, FALSE
, &errorCode
);
2478 *pChar0Length
=(int32_t)(target
-char0
);
2482 ucnv_fromUnicode(cnv
,
2483 &target
, char1
+ARG_CHAR_ARR_SIZE
,
2484 &utf16Source
, utf16
+utf16Length
,
2485 NULL
, FALSE
, &errorCode
);
2486 *pChar1Length
=(int32_t)(target
-char1
);
2488 if(U_FAILURE(errorCode
)) {
2489 log_err("unable to get test character for %s - %s\n", converterName
, u_errorName(errorCode
));
2495 static void testFromTruncatedUTF8(UConverter
*utf8Cnv
, UConverter
*cnv
, const char *converterName
,
2496 char charUTF8
[4], int32_t charUTF8Length
,
2497 char char0
[8], int32_t char0Length
,
2498 char char1
[8], int32_t char1Length
) {
2503 int32_t outputLength
;
2505 char invalidChars
[8];
2506 int8_t invalidLength
;
2511 UChar pivotBuffer
[8];
2512 UChar
*pivotSource
, *pivotTarget
;
2514 UErrorCode errorCode
;
2517 /* test truncated sequences */
2518 errorCode
=U_ZERO_ERROR
;
2519 ucnv_setToUCallBack(utf8Cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2521 memcpy(utf8
, charUTF8
, charUTF8Length
);
2523 for(i
=0; i
<UPRV_LENGTHOF(badUTF8
); ++i
) {
2524 /* truncated sequence? */
2525 int32_t length
=strlen(badUTF8
[i
]);
2526 if(length
>=(1+U8_COUNT_TRAIL_BYTES(badUTF8
[i
][0]))) {
2530 /* assemble a string with the test character and the truncated sequence */
2531 memcpy(utf8
+charUTF8Length
, badUTF8
[i
], length
);
2532 utf8Length
=charUTF8Length
+length
;
2534 /* convert and check the invalidChars */
2537 pivotSource
=pivotTarget
=pivotBuffer
;
2538 errorCode
=U_ZERO_ERROR
;
2539 ucnv_convertEx(cnv
, utf8Cnv
,
2540 &target
, output
+sizeof(output
),
2541 &source
, utf8
+utf8Length
,
2542 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
+UPRV_LENGTHOF(pivotBuffer
),
2543 TRUE
, TRUE
, /* reset & flush */
2545 outputLength
=(int32_t)(target
-output
);
2546 (void)outputLength
; /* Suppress set but not used warning. */
2547 if(errorCode
!=U_TRUNCATED_CHAR_FOUND
|| pivotSource
!=pivotBuffer
) {
2548 log_err("unexpected error %s from %s badUTF8[%ld]\n", u_errorName(errorCode
), converterName
, (long)i
);
2552 errorCode
=U_ZERO_ERROR
;
2553 invalidLength
=(int8_t)sizeof(invalidChars
);
2554 ucnv_getInvalidChars(utf8Cnv
, invalidChars
, &invalidLength
, &errorCode
);
2555 if(invalidLength
!=length
|| 0!=memcmp(invalidChars
, badUTF8
[i
], length
)) {
2556 log_err("wrong invalidChars from %s badUTF8[%ld]\n", converterName
, (long)i
);
2561 static void testFromBadUTF8(UConverter
*utf8Cnv
, UConverter
*cnv
, const char *converterName
,
2562 char charUTF8
[4], int32_t charUTF8Length
,
2563 char char0
[8], int32_t char0Length
,
2564 char char1
[8], int32_t char1Length
) {
2565 char utf8
[600], expect
[600];
2566 int32_t utf8Length
, expectLength
;
2570 UErrorCode errorCode
;
2573 errorCode
=U_ZERO_ERROR
;
2574 ucnv_setToUCallBack(utf8Cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, NULL
, NULL
, &errorCode
);
2577 * assemble an input string with the test character between each
2579 * and an expected string with repeated test character output
2581 memcpy(utf8
, charUTF8
, charUTF8Length
);
2582 utf8Length
=charUTF8Length
;
2584 memcpy(expect
, char0
, char0Length
);
2585 expectLength
=char0Length
;
2587 for(i
=0; i
<UPRV_LENGTHOF(badUTF8
); ++i
) {
2588 int32_t length
=strlen(badUTF8
[i
]);
2589 memcpy(utf8
+utf8Length
, badUTF8
[i
], length
);
2592 memcpy(utf8
+utf8Length
, charUTF8
, charUTF8Length
);
2593 utf8Length
+=charUTF8Length
;
2595 memcpy(expect
+expectLength
, char1
, char1Length
);
2596 expectLength
+=char1Length
;
2599 /* expect that each bad UTF-8 sequence is detected and skipped */
2600 strcpy(testName
, "from bad UTF-8 to ");
2601 strcat(testName
, converterName
);
2603 convertExMultiStreaming(utf8Cnv
, cnv
,
2605 expect
, expectLength
,
2610 /* Test illegal UTF-8 input. */
2611 static void TestConvertExFromUTF8() {
2612 static const char *const converterNames
[]={
2613 #if !UCONFIG_NO_LEGACY_CONVERSION
2622 UConverter
*utf8Cnv
, *cnv
;
2623 UErrorCode errorCode
;
2626 /* fromUnicode versions of some character, from initial state and later */
2627 char charUTF8
[4], char0
[8], char1
[8];
2628 int32_t charUTF8Length
, char0Length
, char1Length
;
2630 errorCode
=U_ZERO_ERROR
;
2631 utf8Cnv
=ucnv_open("UTF-8", &errorCode
);
2632 if(U_FAILURE(errorCode
)) {
2633 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode
));
2637 for(i
=0; i
<UPRV_LENGTHOF(converterNames
); ++i
) {
2638 errorCode
=U_ZERO_ERROR
;
2639 cnv
=ucnv_open(converterNames
[i
], &errorCode
);
2640 if(U_FAILURE(errorCode
)) {
2641 log_data_err("unable to open %s converter - %s\n", converterNames
[i
], u_errorName(errorCode
));
2644 if(!getTestChar(cnv
, converterNames
[i
], charUTF8
, &charUTF8Length
, char0
, &char0Length
, char1
, &char1Length
)) {
2647 testFromTruncatedUTF8(utf8Cnv
, cnv
, converterNames
[i
], charUTF8
, charUTF8Length
, char0
, char0Length
, char1
, char1Length
);
2648 testFromBadUTF8(utf8Cnv
, cnv
, converterNames
[i
], charUTF8
, charUTF8Length
, char0
, char0Length
, char1
, char1Length
);
2651 ucnv_close(utf8Cnv
);
2654 static void TestConvertExFromUTF8_C5F0() {
2655 static const char *const converterNames
[]={
2656 #if !UCONFIG_NO_LEGACY_CONVERSION
2665 UConverter
*utf8Cnv
, *cnv
;
2666 UErrorCode errorCode
;
2669 static const char bad_utf8
[2]={ (char)0xC5, (char)0xF0 };
2670 /* Expect "��" (2x U+FFFD as decimal NCRs) */
2671 static const char twoNCRs
[16]={
2672 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B,
2673 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B
2675 static const char twoFFFD
[6]={
2676 (char)0xef, (char)0xbf, (char)0xbd,
2677 (char)0xef, (char)0xbf, (char)0xbd
2679 const char *expected
;
2680 int32_t expectedLength
;
2681 char dest
[20]; /* longer than longest expectedLength */
2686 UChar pivotBuffer
[128];
2687 UChar
*pivotSource
, *pivotTarget
;
2689 errorCode
=U_ZERO_ERROR
;
2690 utf8Cnv
=ucnv_open("UTF-8", &errorCode
);
2691 if(U_FAILURE(errorCode
)) {
2692 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode
));
2696 for(i
=0; i
<UPRV_LENGTHOF(converterNames
); ++i
) {
2697 errorCode
=U_ZERO_ERROR
;
2698 cnv
=ucnv_open(converterNames
[i
], &errorCode
);
2699 ucnv_setFromUCallBack(cnv
, UCNV_FROM_U_CALLBACK_ESCAPE
, UCNV_ESCAPE_XML_DEC
,
2700 NULL
, NULL
, &errorCode
);
2701 if(U_FAILURE(errorCode
)) {
2702 log_data_err("unable to open %s converter - %s\n",
2703 converterNames
[i
], u_errorName(errorCode
));
2708 uprv_memset(dest
, 9, sizeof(dest
));
2709 if(i
==UPRV_LENGTHOF(converterNames
)-1) {
2710 /* conversion to UTF-8 yields two U+FFFD directly */
2714 /* conversion to a non-Unicode charset yields two NCRs */
2721 pivotSource
=pivotTarget
=pivotBuffer
;
2724 &target
, dest
+expectedLength
,
2725 &src
, bad_utf8
+sizeof(bad_utf8
),
2726 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
+UPRV_LENGTHOF(pivotBuffer
),
2727 TRUE
, TRUE
, &errorCode
);
2728 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
|| src
!=bad_utf8
+2 ||
2729 target
!=dest
+expectedLength
|| 0!=uprv_memcmp(dest
, expected
, expectedLength
) ||
2730 dest
[expectedLength
]!=9
2732 log_err("ucnv_convertEx(UTF-8 C5 F0 -> %s/decimal NCRs) failed\n", converterNames
[i
]);
2736 ucnv_close(utf8Cnv
);
2740 TestConvertAlgorithmic() {
2741 #if !UCONFIG_NO_LEGACY_CONVERSION
2742 static const uint8_t
2744 /* 4e00 30a1 ff61 0410 */
2745 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2748 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2752 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2753 * SUB, SUB, 0x40, SUB, SUB, 0x40
2755 /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/
2758 0xfe, 0xff /* BOM only, no text */
2760 #if !UCONFIG_ONLY_HTML_CONVERSION
2761 static const uint8_t utf32
[]={
2762 0xff, 0xfe, 0, 0 /* BOM only, no text */
2766 char target
[100], utf8NUL
[100], shiftJISNUL
[100];
2769 UErrorCode errorCode
;
2773 errorCode
=U_ZERO_ERROR
;
2774 cnv
=ucnv_open("Shift-JIS", &errorCode
);
2775 if(U_FAILURE(errorCode
)) {
2776 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode
));
2781 memcpy(utf8NUL
, utf8
, sizeof(utf8
));
2782 utf8NUL
[sizeof(utf8
)]=0;
2783 memcpy(shiftJISNUL
, shiftJIS
, sizeof(shiftJIS
));
2784 shiftJISNUL
[sizeof(shiftJIS
)]=0;
2787 * The to/from algorithmic convenience functions share a common implementation,
2788 * so we need not test all permutations of them.
2791 /* length in, not terminated out */
2792 errorCode
=U_ZERO_ERROR
;
2793 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF8
, target
, sizeof(shiftJIS
), (const char *)utf8
, sizeof(utf8
), &errorCode
);
2794 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
||
2795 length
!=sizeof(shiftJIS
) ||
2796 memcmp(target
, shiftJIS
, length
)!=0
2798 log_err("ucnv_fromAlgorithmic(UTF-8 -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect %d\n",
2799 u_errorName(errorCode
), length
, sizeof(shiftJIS
));
2802 /* terminated in and out */
2803 memset(target
, 0x55, sizeof(target
));
2804 errorCode
=U_STRING_NOT_TERMINATED_WARNING
;
2805 length
=ucnv_toAlgorithmic(UCNV_UTF8
, cnv
, target
, sizeof(target
), shiftJISNUL
, -1, &errorCode
);
2806 if( errorCode
!=U_ZERO_ERROR
||
2807 length
!=sizeof(utf8
) ||
2808 memcmp(target
, utf8
, length
)!=0
2810 log_err("ucnv_toAlgorithmic(Shift-JIS -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect %d\n",
2811 u_errorName(errorCode
), length
, sizeof(shiftJIS
));
2814 /* empty string, some target buffer */
2815 errorCode
=U_STRING_NOT_TERMINATED_WARNING
;
2816 length
=ucnv_toAlgorithmic(UCNV_UTF8
, cnv
, target
, sizeof(target
), shiftJISNUL
, 0, &errorCode
);
2817 if( errorCode
!=U_ZERO_ERROR
||
2820 log_err("ucnv_toAlgorithmic(empty string -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect 0\n",
2821 u_errorName(errorCode
), length
);
2824 /* pseudo-empty string, no target buffer */
2825 errorCode
=U_ZERO_ERROR
;
2826 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF16
, target
, 0, (const char *)utf16
, 2, &errorCode
);
2827 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
||
2830 log_err("ucnv_fromAlgorithmic(UTF-16 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
2831 u_errorName(errorCode
), length
);
2834 #if !UCONFIG_ONLY_HTML_CONVERSION
2835 errorCode
=U_ZERO_ERROR
;
2836 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF32
, target
, 0, (const char *)utf32
, 4, &errorCode
);
2837 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
||
2840 log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
2841 u_errorName(errorCode
), length
);
2846 errorCode
=U_MESSAGE_PARSE_ERROR
;
2847 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF16
, target
, 0, (const char *)utf16
, 2, &errorCode
);
2848 if(errorCode
!=U_MESSAGE_PARSE_ERROR
) {
2849 log_err("ucnv_fromAlgorithmic(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode
));
2853 errorCode
=U_ZERO_ERROR
;
2854 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF16
, target
, 0, NULL
, 2, &errorCode
);
2855 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2856 log_err("ucnv_fromAlgorithmic(source==NULL) sets %s\n", u_errorName(errorCode
));
2859 /* illegal alg. type */
2860 errorCode
=U_ZERO_ERROR
;
2861 length
=ucnv_fromAlgorithmic(cnv
, (UConverterType
)99, target
, 0, (const char *)utf16
, 2, &errorCode
);
2862 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2863 log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode
));
2869 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
2870 static void TestLMBCSMaxChar(void) {
2871 static const struct {
2875 /* some non-LMBCS converters - perfect test setup here */
2886 { 4, "IMAP-mailbox-name"},
2889 { 1, "windows-1256"},
2901 { 8, "ISO-2022-KR"},
2902 { 6, "ISO-2022-JP"},
2903 { 8, "ISO-2022-CN"},
2921 for (idx
= 0; idx
< UPRV_LENGTHOF(converter
); idx
++) {
2922 UErrorCode status
= U_ZERO_ERROR
;
2923 UConverter
*cnv
= cnv_open(converter
[idx
].name
, &status
);
2924 if (U_FAILURE(status
)) {
2927 if (converter
[idx
].maxSize
!= ucnv_getMaxCharSize(cnv
)) {
2928 log_err("error: ucnv_getMaxCharSize(%s) expected %d, got %d\n",
2929 converter
[idx
].name
, converter
[idx
].maxSize
, ucnv_getMaxCharSize(cnv
));
2934 /* mostly test that the macro compiles */
2935 if(UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10) {
2936 log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n");
2941 static void TestJ1968(void) {
2942 UErrorCode err
= U_ZERO_ERROR
;
2944 char myConvName
[] = "My really really really really really really really really really really really"
2945 " really really really really really really really really really really really"
2946 " really really really really really really really really long converter name";
2947 UChar myConvNameU
[sizeof(myConvName
)];
2949 u_charsToUChars(myConvName
, myConvNameU
, sizeof(myConvName
));
2952 myConvNameU
[UCNV_MAX_CONVERTER_NAME_LENGTH
+1] = 0;
2953 cnv
= ucnv_openU(myConvNameU
, &err
);
2954 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2955 log_err("1U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
2959 myConvNameU
[UCNV_MAX_CONVERTER_NAME_LENGTH
] = 0;
2960 cnv
= ucnv_openU(myConvNameU
, &err
);
2961 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2962 log_err("2U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
2966 myConvNameU
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = 0;
2967 cnv
= ucnv_openU(myConvNameU
, &err
);
2968 if (cnv
|| err
!= U_FILE_ACCESS_ERROR
) {
2969 log_err("3U) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err
));
2976 cnv
= ucnv_open(myConvName
, &err
);
2977 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2978 log_err("1) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
2982 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
] = ',';
2983 cnv
= ucnv_open(myConvName
, &err
);
2984 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2985 log_err("2) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
2989 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = ',';
2990 cnv
= ucnv_open(myConvName
, &err
);
2991 if (cnv
|| err
!= U_FILE_ACCESS_ERROR
) {
2992 log_err("3) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err
));
2996 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = ',';
2997 strncpy(myConvName
+ UCNV_MAX_CONVERTER_NAME_LENGTH
, "locale=", 7);
2998 cnv
= ucnv_open(myConvName
, &err
);
2999 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3000 log_err("4) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
3003 /* The comma isn't really a part of the converter name. */
3005 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
] = 0;
3006 cnv
= ucnv_open(myConvName
, &err
);
3007 if (cnv
|| err
!= U_FILE_ACCESS_ERROR
) {
3008 log_err("5) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err
));
3012 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = ' ';
3013 cnv
= ucnv_open(myConvName
, &err
);
3014 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3015 log_err("6) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
3019 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = 0;
3020 cnv
= ucnv_open(myConvName
, &err
);
3021 if (cnv
|| err
!= U_FILE_ACCESS_ERROR
) {
3022 log_err("7) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err
));
3027 #if !UCONFIG_NO_LEGACY_CONVERSION
3029 testSwap(const char *name
, UBool swap
) {
3031 * Test Unicode text.
3032 * Contains characters that are the highest for some of the
3033 * tested conversions, to make sure that the ucnvmbcs.c code that modifies the
3034 * tables copies the entire tables.
3036 static const UChar text
[]={
3037 0x61, 0xd, 0x62, 0xa, 0x4e00, 0x3000, 0xfffd, 0xa, 0x20, 0x85, 0xff5e, 0x7a
3040 UChar uNormal
[32], uSwapped
[32];
3041 char normal
[32], swapped
[32];
3045 int32_t i
, normalLength
, swappedLength
;
3049 const char *swappedName
;
3050 UConverter
*cnv
, *swapCnv
;
3051 UErrorCode errorCode
;
3053 /* if the swap flag is FALSE, then the test encoding is not EBCDIC and must not swap */
3055 /* open both the normal and the LF/NL-swapping converters */
3056 strcpy(swapped
, name
);
3057 strcat(swapped
, UCNV_SWAP_LFNL_OPTION_STRING
);
3059 errorCode
=U_ZERO_ERROR
;
3060 swapCnv
=ucnv_open(swapped
, &errorCode
);
3061 cnv
=ucnv_open(name
, &errorCode
);
3062 if(U_FAILURE(errorCode
)) {
3063 log_data_err("TestEBCDICSwapLFNL error: unable to open %s or %s (%s)\n", name
, swapped
, u_errorName(errorCode
));
3067 /* the name must contain the swap option if and only if we expect the converter to swap */
3068 swappedName
=ucnv_getName(swapCnv
, &errorCode
);
3069 if(U_FAILURE(errorCode
)) {
3070 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl) failed (%s)\n", name
, u_errorName(errorCode
));
3074 pc
=strstr(swappedName
, UCNV_SWAP_LFNL_OPTION_STRING
);
3075 if(swap
!= (pc
!=NULL
)) {
3076 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl)=%s should (%d) contain 'swaplfnl'\n", name
, swappedName
, swap
);
3080 /* convert to EBCDIC */
3083 ucnv_fromUnicode(cnv
, &pc
, normal
+UPRV_LENGTHOF(normal
), &pcu
, text
+UPRV_LENGTHOF(text
), NULL
, TRUE
, &errorCode
);
3084 normalLength
=(int32_t)(pc
-normal
);
3088 ucnv_fromUnicode(swapCnv
, &pc
, swapped
+UPRV_LENGTHOF(swapped
), &pcu
, text
+UPRV_LENGTHOF(text
), NULL
, TRUE
, &errorCode
);
3089 swappedLength
=(int32_t)(pc
-swapped
);
3091 if(U_FAILURE(errorCode
)) {
3092 log_err("TestEBCDICSwapLFNL error converting to %s - (%s)\n", name
, u_errorName(errorCode
));
3096 /* compare EBCDIC output */
3097 if(normalLength
!=swappedLength
) {
3098 log_err("TestEBCDICSwapLFNL error converting to %s - output lengths %d vs. %d\n", name
, normalLength
, swappedLength
);
3101 for(i
=0; i
<normalLength
; ++i
) {
3102 /* swap EBCDIC LF/NL for comparison */
3107 } else if(c
==0x25) {
3113 log_err("TestEBCDICSwapLFNL error converting to %s - did not swap properly, output[%d]=0x%02x\n", name
, i
, (uint8_t)swapped
[i
]);
3118 /* convert back to Unicode (may not roundtrip) */
3121 ucnv_toUnicode(cnv
, &pu
, uNormal
+UPRV_LENGTHOF(uNormal
), (const char **)&pc
, normal
+normalLength
, NULL
, TRUE
, &errorCode
);
3122 normalLength
=(int32_t)(pu
-uNormal
);
3126 ucnv_toUnicode(swapCnv
, &pu
, uSwapped
+UPRV_LENGTHOF(uSwapped
), (const char **)&pc
, normal
+swappedLength
, NULL
, TRUE
, &errorCode
);
3127 swappedLength
=(int32_t)(pu
-uSwapped
);
3129 if(U_FAILURE(errorCode
)) {
3130 log_err("TestEBCDICSwapLFNL error converting from %s - (%s)\n", name
, u_errorName(errorCode
));
3134 /* compare EBCDIC output */
3135 if(normalLength
!=swappedLength
) {
3136 log_err("TestEBCDICSwapLFNL error converting from %s - output lengths %d vs. %d\n", name
, normalLength
, swappedLength
);
3139 for(i
=0; i
<normalLength
; ++i
) {
3140 /* swap EBCDIC LF/NL for comparison */
3145 } else if(u
==0x85) {
3150 if(u
!=uSwapped
[i
]) {
3151 log_err("TestEBCDICSwapLFNL error converting from %s - did not swap properly, output[%d]=U+%04x\n", name
, i
, uSwapped
[i
]);
3159 ucnv_close(swapCnv
);
3163 TestEBCDICSwapLFNL() {
3164 static const struct {
3169 { "ibm-1047", TRUE
},
3170 { "ibm-1140", TRUE
},
3171 { "ibm-930", TRUE
},
3172 { "iso-8859-3", FALSE
}
3177 for(i
=0; i
<UPRV_LENGTHOF(tests
); ++i
) {
3178 testSwap(tests
[i
].name
, tests
[i
].swap
);
3183 TestEBCDICSwapLFNL() {
3184 /* test nothing... */
3188 static void TestFromUCountPending(){
3189 #if !UCONFIG_NO_LEGACY_CONVERSION
3190 UErrorCode status
= U_ZERO_ERROR
;
3191 /* const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; */
3192 static const struct {
3196 }fromUnicodeTests
[] = {
3199 {{ 0xdbc4, 0xde34, 0xd84d},3,1},
3200 {{ 0xdbc4, 0xde34, 0xd900},3,3},
3203 UConverter
* cnv
= ucnv_openPackage(loadTestData(&status
), "test3", &status
);
3204 if(U_FAILURE(status
)){
3205 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status
));
3208 for(i
=0; i
<UPRV_LENGTHOF(fromUnicodeTests
); ++i
) {
3211 char* targetLimit
= target
+ 10;
3212 const UChar
* source
= fromUnicodeTests
[i
].input
;
3213 const UChar
* sourceLimit
= source
+ fromUnicodeTests
[i
].len
;
3216 ucnv_fromUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3217 len
= ucnv_fromUCountPending(cnv
, &status
);
3218 if(U_FAILURE(status
)){
3219 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3220 status
= U_ZERO_ERROR
;
3223 if(len
!= fromUnicodeTests
[i
].exp
){
3224 log_err("Did not get the expeced output for ucnv_fromUInputConsumed.\n");
3227 status
= U_ZERO_ERROR
;
3230 * The converter has to read the tail before it knows that
3231 * only head alone matches.
3232 * At the end, the output for head will overflow the target,
3233 * middle will be pending, and tail will not have been consumed.
3236 \U00101234 -> x (<U101234> \x07 |0)
3237 \U00101234\U00050005 -> y (<U101234>+<U50005> \x07+\x00+\x01\x02\x0e+\x05 |0)
3238 \U00101234\U00050005\U00060006 -> z (<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0)
3239 \U00060007 -> unassigned
3241 static const UChar head
[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */
3242 static const UChar middle
[] = {0xD940,0x0000}; /* first half of \U00060006 or \U00060007 */
3243 static const UChar tail
[] = {0xDC07,0x0000};/* second half of \U00060007 */
3246 char* targetLimit
= target
+ 2; /* expect overflow from converting \U00101234\U00050005 */
3247 const UChar
* source
= head
;
3248 const UChar
* sourceLimit
= source
+ u_strlen(head
);
3251 ucnv_fromUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3252 len
= ucnv_fromUCountPending(cnv
, &status
);
3253 if(U_FAILURE(status
)){
3254 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3255 status
= U_ZERO_ERROR
;
3258 log_err("ucnv_fromUInputHeld did not return correct length for head\n");
3261 sourceLimit
= source
+ u_strlen(middle
);
3262 ucnv_fromUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3263 len
= ucnv_fromUCountPending(cnv
, &status
);
3264 if(U_FAILURE(status
)){
3265 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3266 status
= U_ZERO_ERROR
;
3269 log_err("ucnv_fromUInputHeld did not return correct length for middle\n");
3272 sourceLimit
= source
+ u_strlen(tail
);
3273 ucnv_fromUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3274 if(status
!= U_BUFFER_OVERFLOW_ERROR
){
3275 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3277 status
= U_ZERO_ERROR
;
3278 len
= ucnv_fromUCountPending(cnv
, &status
);
3279 /* middle[1] is pending, tail has not been consumed */
3280 if(U_FAILURE(status
)){
3281 log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status
));
3284 log_err("ucnv_fromUInputHeld did not return correct length for tail\n");
3292 TestToUCountPending(){
3293 #if !UCONFIG_NO_LEGACY_CONVERSION
3294 UErrorCode status
= U_ZERO_ERROR
;
3295 static const struct {
3299 }toUnicodeTests
[] = {
3301 {{0x05, 0x01, 0x02},3,3},
3303 {{0x07, 0x00, 0x01, 0x02},4,4},
3307 UConverterToUCallback
*oldToUAction
= NULL
;
3308 UConverter
* cnv
= ucnv_openPackage(loadTestData(&status
), "test3", &status
);
3309 if(U_FAILURE(status
)){
3310 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status
));
3313 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, oldToUAction
, NULL
, &status
);
3314 for(i
=0; i
<UPRV_LENGTHOF(toUnicodeTests
); ++i
) {
3316 UChar
* target
= tgt
;
3317 UChar
* targetLimit
= target
+ 20;
3318 const char* source
= toUnicodeTests
[i
].input
;
3319 const char* sourceLimit
= source
+ toUnicodeTests
[i
].len
;
3322 ucnv_toUnicode(cnv
, &target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3323 len
= ucnv_toUCountPending(cnv
,&status
);
3324 if(U_FAILURE(status
)){
3325 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3326 status
= U_ZERO_ERROR
;
3329 if(len
!= toUnicodeTests
[i
].exp
){
3330 log_err("Did not get the expeced output for ucnv_toUInputConsumed.\n");
3333 status
= U_ZERO_ERROR
;
3338 * The converter has to read the tail before it knows that
3339 * only head alone matches.
3340 * At the end, the output for head will overflow the target,
3341 * mid will be pending, and tail will not have been consumed.
3343 char head
[] = { 0x01, 0x02, 0x03, 0x0a , 0x00};
3344 char mid
[] = { 0x01, 0x02, 0x03, 0x0b, 0x00 };
3345 char tail
[] = { 0x01, 0x02, 0x03, 0x0d, 0x00 };
3347 0x01, 0x02, 0x03, 0x0a -> x (<U23456> \x01\x02\x03\x0a |0)
3348 0x01, 0x02, 0x03, 0x0b -> y (<U000b> \x01\x02\x03\x0b |0)
3349 0x01, 0x02, 0x03, 0x0d -> z (<U34567> \x01\x02\x03\x0d |3)
3350 0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar")
3353 UChar
* target
= tgt
;
3354 UChar
* targetLimit
= target
+ 1; /* expect overflow from converting */
3355 const char* source
= head
;
3356 const char* sourceLimit
= source
+ strlen(head
);
3358 cnv
= ucnv_openPackage(loadTestData(&status
), "test4", &status
);
3359 if(U_FAILURE(status
)){
3360 log_err("Could not create converter for test3. Error: %s\n", u_errorName(status
));
3363 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, oldToUAction
, NULL
, &status
);
3364 ucnv_toUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3365 len
= ucnv_toUCountPending(cnv
,&status
);
3366 if(U_FAILURE(status
)){
3367 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3370 log_err("Did not get the expected len for head.\n");
3373 sourceLimit
= source
+strlen(mid
);
3374 ucnv_toUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3375 len
= ucnv_toUCountPending(cnv
,&status
);
3376 if(U_FAILURE(status
)){
3377 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3380 log_err("Did not get the expected len for mid.\n");
3384 sourceLimit
= source
+strlen(tail
);
3385 targetLimit
= target
;
3386 ucnv_toUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3387 if(status
!= U_BUFFER_OVERFLOW_ERROR
){
3388 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3390 status
= U_ZERO_ERROR
;
3391 len
= ucnv_toUCountPending(cnv
,&status
);
3392 /* mid[4] is pending, tail has not been consumed */
3393 if(U_FAILURE(status
)){
3394 log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status
));
3397 log_err("Did not get the expected len for tail.\n");
3404 static void TestOneDefaultNameChange(const char *name
, const char *expected
) {
3405 UErrorCode status
= U_ZERO_ERROR
;
3407 ucnv_setDefaultName(name
);
3408 if(strcmp(ucnv_getDefaultName(), expected
)==0)
3409 log_verbose("setDefaultName of %s works.\n", name
);
3411 log_err("setDefaultName of %s failed\n", name
);
3412 cnv
=ucnv_open(NULL
, &status
);
3413 if (U_FAILURE(status
) || cnv
== NULL
) {
3414 log_err("opening the default converter of %s failed\n", name
);
3417 if(strcmp(ucnv_getName(cnv
, &status
), expected
)==0)
3418 log_verbose("ucnv_getName of %s works.\n", name
);
3420 log_err("ucnv_getName of %s failed\n", name
);
3424 static void TestDefaultName(void) {
3425 /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/
3426 static char defaultName
[UCNV_MAX_CONVERTER_NAME_LENGTH
+ 1];
3427 strcpy(defaultName
, ucnv_getDefaultName());
3429 log_verbose("getDefaultName returned %s\n", defaultName
);
3431 /*change the default name by setting it */
3432 TestOneDefaultNameChange("UTF-8", "UTF-8");
3433 #if U_CHARSET_IS_UTF8
3434 TestOneDefaultNameChange("ISCII,version=1", "UTF-8");
3435 TestOneDefaultNameChange("ISCII,version=2", "UTF-8");
3436 TestOneDefaultNameChange("ISO-8859-1", "UTF-8");
3438 # if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
3439 TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1");
3440 TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2");
3442 TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1");
3445 /*set the default name back*/
3446 ucnv_setDefaultName(defaultName
);
3449 /* Test that ucnv_compareNames() matches names according to spec. ----------- */
3463 compareNames(const char **names
) {
3464 const char *relation
, *name1
, *name2
;
3468 if(*relation
=='=') {
3470 } else if(*relation
=='<') {
3480 while((name2
=*names
++)!=NULL
) {
3481 result
=ucnv_compareNames(name1
, name2
);
3482 if(sign(result
)!=rel
) {
3483 log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1
, name2
, result
, rel
);
3490 TestCompareNames() {
3491 static const char *equalUTF8
[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL
};
3492 static const char *equalIBM
[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL
};
3493 static const char *lessMac
[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL
};
3494 static const char *lessUTF080
[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL
};
3496 compareNames(equalUTF8
);
3497 compareNames(equalIBM
);
3498 compareNames(lessMac
);
3499 compareNames(lessUTF080
);
3504 static const UChar surrogate
[1]={ 0xd900 };
3507 static const UChar sub
[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3508 static const char subChars
[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3510 UErrorCode errorCode
;
3514 /* UTF-16/32: test that the BOM is output before the sub character */
3515 errorCode
=U_ZERO_ERROR
;
3516 cnv
=ucnv_open("UTF-16", &errorCode
);
3517 if(U_FAILURE(errorCode
)) {
3518 log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode
));
3521 length
=ucnv_fromUChars(cnv
, buffer
, (int32_t)sizeof(buffer
), surrogate
, 1, &errorCode
);
3523 if(U_FAILURE(errorCode
) ||
3525 NULL
== ucnv_detectUnicodeSignature(buffer
, length
, NULL
, &errorCode
)
3527 log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n");
3530 errorCode
=U_ZERO_ERROR
;
3531 cnv
=ucnv_open("UTF-32", &errorCode
);
3532 if(U_FAILURE(errorCode
)) {
3533 log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode
));
3536 length
=ucnv_fromUChars(cnv
, buffer
, (int32_t)sizeof(buffer
), surrogate
, 1, &errorCode
);
3538 if(U_FAILURE(errorCode
) ||
3540 NULL
== ucnv_detectUnicodeSignature(buffer
, length
, NULL
, &errorCode
)
3542 log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n");
3545 /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */
3546 errorCode
=U_ZERO_ERROR
;
3547 cnv
=ucnv_open("ISO-8859-1", &errorCode
);
3548 if(U_FAILURE(errorCode
)) {
3549 log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode
));
3552 ucnv_setSubstString(cnv
, sub
, UPRV_LENGTHOF(sub
), &errorCode
);
3553 if(U_FAILURE(errorCode
)) {
3554 log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode
));
3556 len8
= sizeof(buffer
);
3557 ucnv_getSubstChars(cnv
, buffer
, &len8
, &errorCode
);
3558 /* Stateless converter, we expect the string converted to charset bytes. */
3559 if(U_FAILURE(errorCode
) || len8
!=sizeof(subChars
) || 0!=uprv_memcmp(buffer
, subChars
, len8
)) {
3560 log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode
));
3565 #if !UCONFIG_NO_LEGACY_CONVERSION
3566 errorCode
=U_ZERO_ERROR
;
3567 cnv
=ucnv_open("HZ", &errorCode
);
3568 if(U_FAILURE(errorCode
)) {
3569 log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode
));
3572 ucnv_setSubstString(cnv
, sub
, UPRV_LENGTHOF(sub
), &errorCode
);
3573 if(U_FAILURE(errorCode
)) {
3574 log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode
));
3576 len8
= sizeof(buffer
);
3577 ucnv_getSubstChars(cnv
, buffer
, &len8
, &errorCode
);
3578 /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */
3579 if(U_FAILURE(errorCode
) || len8
!=0) {
3580 log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode
));
3586 * Further testing of ucnv_setSubstString() is done via intltest convert.
3587 * We do not test edge cases of illegal arguments and similar because the
3588 * function implementation uses all of its parameters in calls to other
3589 * functions with UErrorCode parameters.
3594 InvalidArguments() {
3596 UErrorCode errorCode
;
3597 char charBuffer
[2] = {1, 1};
3598 char ucharAsCharBuffer
[2] = {2, 2};
3599 char *charsPtr
= charBuffer
;
3600 UChar
*ucharsPtr
= (UChar
*)ucharAsCharBuffer
;
3601 UChar
*ucharsBadPtr
= (UChar
*)(ucharAsCharBuffer
+ 1);
3603 errorCode
=U_ZERO_ERROR
;
3604 cnv
=ucnv_open("UTF-8", &errorCode
);
3605 if(U_FAILURE(errorCode
)) {
3606 log_err("ucnv_open() failed - %s\n", u_errorName(errorCode
));
3610 errorCode
=U_ZERO_ERROR
;
3611 /* This one should fail because an incomplete UChar is being passed in */
3612 ucnv_fromUnicode(cnv
, &charsPtr
, charsPtr
, (const UChar
**)&ucharsPtr
, ucharsBadPtr
, NULL
, TRUE
, &errorCode
);
3613 if(errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3614 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode
));
3617 errorCode
=U_ZERO_ERROR
;
3618 /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3619 ucnv_fromUnicode(cnv
, &charsPtr
, charsPtr
, (const UChar
**)&ucharsBadPtr
, ucharsPtr
, NULL
, TRUE
, &errorCode
);
3620 if(errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3621 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode
));
3624 errorCode
=U_ZERO_ERROR
;
3625 /* This one should fail because an incomplete UChar is being passed in */
3626 ucnv_toUnicode(cnv
, &ucharsPtr
, ucharsBadPtr
, (const char **)&charsPtr
, charsPtr
, NULL
, TRUE
, &errorCode
);
3627 if(errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3628 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode
));
3631 errorCode
=U_ZERO_ERROR
;
3632 /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3633 ucnv_toUnicode(cnv
, &ucharsBadPtr
, ucharsPtr
, (const char **)&charsPtr
, charsPtr
, NULL
, TRUE
, &errorCode
);
3634 if(errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3635 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode
));
3638 if (charBuffer
[0] != 1 || charBuffer
[1] != 1
3639 || ucharAsCharBuffer
[0] != 2 || ucharAsCharBuffer
[1] != 2)
3641 log_err("Data was incorrectly written to buffers\n");
3647 static void TestGetName() {
3648 static const char *const names
[] = {
3649 "Unicode", "UTF-16",
3650 "UnicodeBigUnmarked", "UTF-16BE",
3651 "UnicodeBig", "UTF-16BE,version=1",
3652 "UnicodeLittleUnmarked", "UTF-16LE",
3653 "UnicodeLittle", "UTF-16LE,version=1",
3654 "x-UTF-16LE-BOM", "UTF-16LE,version=1"
3657 for(i
= 0; i
< UPRV_LENGTHOF(names
); i
+= 2) {
3658 UErrorCode errorCode
= U_ZERO_ERROR
;
3659 UConverter
*cnv
= ucnv_open(names
[i
], &errorCode
);
3660 if(U_SUCCESS(errorCode
)) {
3661 const char *name
= ucnv_getName(cnv
, &errorCode
);
3662 if(U_FAILURE(errorCode
) || 0 != strcmp(name
, names
[i
+1])) {
3663 log_err("ucnv_getName(%s) = %s != %s -- %s\n",
3664 names
[i
], name
, names
[i
+1], u_errorName(errorCode
));
3671 static void TestUTFBOM() {
3672 static const UChar a16
[] = { 0x61 };
3673 static const char *const names
[] = {
3681 static const uint8_t expected
[][5] = {
3683 { 4, 0xfe, 0xff, 0, 0x61 },
3684 { 4, 0xfe, 0xff, 0, 0x61 },
3686 { 4, 0xff, 0xfe, 0x61, 0 },
3687 { 4, 0xff, 0xfe, 0x61, 0 },
3691 { 4, 0xfe, 0xff, 0, 0x61 },
3694 { 4, 0xff, 0xfe, 0x61, 0 }
3700 for(i
= 0; i
< UPRV_LENGTHOF(names
); ++i
) {
3701 UErrorCode errorCode
= U_ZERO_ERROR
;
3702 UConverter
*cnv
= ucnv_open(names
[i
], &errorCode
);
3704 const uint8_t *exp
= expected
[i
];
3705 if (U_FAILURE(errorCode
)) {
3706 log_err_status(errorCode
, "Unable to open converter: %s got error code: %s\n", names
[i
], u_errorName(errorCode
));
3709 length
= ucnv_fromUChars(cnv
, bytes
, (int32_t)sizeof(bytes
), a16
, 1, &errorCode
);
3711 if(U_FAILURE(errorCode
) || length
!= exp
[0] || 0 != memcmp(bytes
, exp
+1, length
)) {
3712 log_err("unexpected %s BOM writing behavior -- %s\n",
3713 names
[i
], u_errorName(errorCode
));