1 /********************************************************************
3 * Copyright (c) 1997-2012, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /*****************************************************************************
10 * Modification History:
12 * Madhu Katragadda Ported for C API
13 ******************************************************************************
19 #include "unicode/uloc.h"
20 #include "unicode/ucnv.h"
21 #include "unicode/ucnv_err.h"
22 #include "unicode/putil.h"
23 #include "unicode/uset.h"
24 #include "unicode/ustring.h"
25 #include "ucnv_bld.h" /* for sizeof(UConverter) */
26 #include "cmemory.h" /* for UAlignedMemory */
31 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
33 #define NUM_CODEPAGE 1
34 #define MAX_FILE_LEN 1024*20
35 #define UCS_FILE_NAME_SIZE 512
37 /*returns an action other than the one provided*/
38 #if !UCONFIG_NO_LEGACY_CONVERSION
39 static UConverterFromUCallback
otherUnicodeAction(UConverterFromUCallback MIA
);
40 static UConverterToUCallback
otherCharAction(UConverterToUCallback MIA
);
44 cnv_open(const char *name
, UErrorCode
*pErrorCode
) {
45 if(name
!=NULL
&& name
[0]=='*') {
46 return ucnv_openPackage(loadTestData(pErrorCode
), name
+1, pErrorCode
);
48 return ucnv_open(name
, pErrorCode
);
53 static void ListNames(void);
54 static void TestFlushCache(void);
55 static void TestDuplicateAlias(void);
56 static void TestCCSID(void);
57 static void TestJ932(void);
58 static void TestJ1968(void);
59 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
60 static void TestLMBCSMaxChar(void);
63 #if !UCONFIG_NO_LEGACY_CONVERSION
64 static void TestConvertSafeCloneCallback(void);
67 static void TestEBCDICSwapLFNL(void);
68 static void TestConvertEx(void);
69 static void TestConvertExFromUTF8(void);
70 static void TestConvertExFromUTF8_C5F0(void);
71 static void TestConvertAlgorithmic(void);
72 void TestDefaultConverterError(void); /* defined in cctest.c */
73 void TestDefaultConverterSet(void); /* defined in cctest.c */
74 static void TestToUCountPending(void);
75 static void TestFromUCountPending(void);
76 static void TestDefaultName(void);
77 static void TestCompareNames(void);
78 static void TestSubstString(void);
79 static void InvalidArguments(void);
80 static void TestGetName(void);
81 static void TestUTFBOM(void);
83 void addTestConvert(TestNode
** root
);
85 void addTestConvert(TestNode
** root
)
87 addTest(root
, &ListNames
, "tsconv/ccapitst/ListNames");
88 addTest(root
, &TestConvert
, "tsconv/ccapitst/TestConvert");
89 addTest(root
, &TestFlushCache
, "tsconv/ccapitst/TestFlushCache");
90 addTest(root
, &TestAlias
, "tsconv/ccapitst/TestAlias");
91 addTest(root
, &TestDuplicateAlias
, "tsconv/ccapitst/TestDuplicateAlias");
92 addTest(root
, &TestConvertSafeClone
, "tsconv/ccapitst/TestConvertSafeClone");
93 #if !UCONFIG_NO_LEGACY_CONVERSION
94 addTest(root
, &TestConvertSafeCloneCallback
,"tsconv/ccapitst/TestConvertSafeCloneCallback");
96 addTest(root
, &TestCCSID
, "tsconv/ccapitst/TestCCSID");
97 addTest(root
, &TestJ932
, "tsconv/ccapitst/TestJ932");
98 addTest(root
, &TestJ1968
, "tsconv/ccapitst/TestJ1968");
99 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
100 addTest(root
, &TestLMBCSMaxChar
, "tsconv/ccapitst/TestLMBCSMaxChar");
102 addTest(root
, &TestEBCDICSwapLFNL
, "tsconv/ccapitst/TestEBCDICSwapLFNL");
103 addTest(root
, &TestConvertEx
, "tsconv/ccapitst/TestConvertEx");
104 addTest(root
, &TestConvertExFromUTF8
, "tsconv/ccapitst/TestConvertExFromUTF8");
105 addTest(root
, &TestConvertExFromUTF8_C5F0
, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0");
106 addTest(root
, &TestConvertAlgorithmic
, "tsconv/ccapitst/TestConvertAlgorithmic");
107 addTest(root
, &TestDefaultConverterError
, "tsconv/ccapitst/TestDefaultConverterError");
108 addTest(root
, &TestDefaultConverterSet
, "tsconv/ccapitst/TestDefaultConverterSet");
109 #if !UCONFIG_NO_FILE_IO
110 addTest(root
, &TestToUCountPending
, "tsconv/ccapitst/TestToUCountPending");
111 addTest(root
, &TestFromUCountPending
, "tsconv/ccapitst/TestFromUCountPending");
113 addTest(root
, &TestDefaultName
, "tsconv/ccapitst/TestDefaultName");
114 addTest(root
, &TestCompareNames
, "tsconv/ccapitst/TestCompareNames");
115 addTest(root
, &TestSubstString
, "tsconv/ccapitst/TestSubstString");
116 addTest(root
, &InvalidArguments
, "tsconv/ccapitst/InvalidArguments");
117 addTest(root
, &TestGetName
, "tsconv/ccapitst/TestGetName");
118 addTest(root
, &TestUTFBOM
, "tsconv/ccapitst/TestUTFBOM");
121 static void ListNames(void) {
122 UErrorCode err
= U_ZERO_ERROR
;
123 int32_t testLong1
= 0;
124 const char* available_conv
;
125 UEnumeration
*allNamesEnum
= NULL
;
126 int32_t allNamesCount
= 0;
129 log_verbose("Testing ucnv_openAllNames()...");
130 allNamesEnum
= ucnv_openAllNames(&err
);
132 log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err
));
135 const char *string
= NULL
;
139 allNamesCount
= uenum_count(allNamesEnum
, &err
);
140 while ((string
= uenum_next(allNamesEnum
, &len
, &err
))) {
142 log_verbose("read \"%s\", length %i\n", string
, len
);
144 if (U_FAILURE(err
)) {
145 log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err
));
148 uenum_reset(allNamesEnum
, &err
);
149 while ((string
= uenum_next(allNamesEnum
, &len
, &err
))) {
151 ucnv_close(ucnv_open(string
, &err
));
152 log_verbose("read \"%s\", length %i (%s)\n", string
, len
, U_SUCCESS(err
) ? "available" : "unavailable");
155 if (count1
!= count2
) {
156 log_err("FAILURE! uenum_reset(allNamesEnum, &err); doesn't work\n");
159 uenum_close(allNamesEnum
);
162 /*Tests ucnv_getAvailableName(), getAvialableCount()*/
164 log_verbose("Testing ucnv_countAvailable()...");
166 testLong1
=ucnv_countAvailable();
167 log_info("Number of available codepages: %d/%d\n", testLong1
, allNamesCount
);
169 log_verbose("\n---Testing ucnv_getAvailableName.."); /*need to check this out */
171 available_conv
= ucnv_getAvailableName(testLong1
);
172 /*test ucnv_getAvailableName with err condition*/
173 log_verbose("\n---Testing ucnv_getAvailableName..with index < 0 ");
174 available_conv
= ucnv_getAvailableName(-1);
175 if(available_conv
!= NULL
){
176 log_err("ucnv_getAvailableName() with index < 0) should return NULL\n");
179 /* Test ucnv_countAliases() etc. */
180 count
= ucnv_countAliases("utf-8", &err
);
182 log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err
));
183 } else if(count
<= 0) {
184 log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count
);
186 /* try to get the aliases individually */
188 alias
= ucnv_getAlias("utf-8", 0, &err
);
190 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s\n", myErrorName(err
));
191 } else if(strcmp("UTF-8", alias
) != 0) {
192 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s instead of UTF-8\n", alias
);
195 for(aliasNum
= 0; aliasNum
< count
; ++aliasNum
) {
196 alias
= ucnv_getAlias("utf-8", aliasNum
, &err
);
198 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum
, myErrorName(err
));
199 } else if(strlen(alias
) > 20) {
201 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> alias %s insanely long, corrupt?!\n", aliasNum
, alias
);
203 log_verbose("alias %d for utf-8: %s\n", aliasNum
, alias
);
207 /* try to fill an array with all aliases */
208 const char **aliases
;
209 aliases
=(const char **)malloc(count
* sizeof(const char *));
211 ucnv_getAliases("utf-8", aliases
, &err
);
213 log_err("FAILURE! ucnv_getAliases(\"utf-8\") -> %s\n", myErrorName(err
));
215 for(aliasNum
= 0; aliasNum
< count
; ++aliasNum
) {
216 /* compare the pointers with the ones returned individually */
217 alias
= ucnv_getAlias("utf-8", aliasNum
, &err
);
219 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum
, myErrorName(err
));
220 } else if(aliases
[aliasNum
] != alias
) {
221 log_err("FAILURE! ucnv_getAliases(\"utf-8\")[%d] != ucnv_getAlias(\"utf-8\", %d)\n", aliasNum
, aliasNum
);
225 free((char **)aliases
);
233 static void TestConvert()
235 #if !UCONFIG_NO_LEGACY_CONVERSION
238 int32_t testLong1
= 0;
242 FILE* ucs_file_in
= NULL
;
244 UChar myUChar
= 0x0000;
245 char* mytarget
; /* [MAX_FILE_LEN] */
248 UChar
* consumedUni
= NULL
;
249 char* consumed
= NULL
;
250 char* output_cp_buffer
; /* [MAX_FILE_LEN] */
251 UChar
* ucs_file_buffer
; /* [MAX_FILE_LEN] */
252 UChar
* ucs_file_buffer_use
;
253 UChar
* my_ucs_file_buffer
; /* [MAX_FILE_LEN] */
254 UChar
* my_ucs_file_buffer_1
;
257 uint16_t codepage_index
= 0;
259 UErrorCode err
= U_ZERO_ERROR
;
260 char ucs_file_name
[UCS_FILE_NAME_SIZE
];
261 UConverterFromUCallback MIA1
, MIA1_2
;
262 UConverterToUCallback MIA2
, MIA2_2
;
263 const void *MIA1Context
, *MIA1Context2
, *MIA2Context
, *MIA2Context2
;
264 UConverter
* someConverters
[5];
265 UConverter
* myConverter
= 0;
266 UChar
* displayname
= 0;
273 int32_t targetcapacity2
;
274 int32_t targetcapacity
;
278 const UChar
* tmp_ucs_buf
;
279 const UChar
* tmp_consumedUni
=NULL
;
280 const char* tmp_mytarget_use
;
281 const char* tmp_consumed
;
283 /******************************************************************
284 Checking Unicode -> ksc
285 ******************************************************************/
287 const char* CodePagesToTest
[NUM_CODEPAGE
] =
293 const uint16_t CodePageNumberToTest
[NUM_CODEPAGE
] =
299 const int8_t CodePagesMinChars
[NUM_CODEPAGE
] =
305 const int8_t CodePagesMaxChars
[NUM_CODEPAGE
] =
311 const uint16_t CodePagesSubstitutionChars
[NUM_CODEPAGE
] =
316 const char* CodePagesTestFiles
[NUM_CODEPAGE
] =
322 const UConverterPlatform CodePagesPlatform
[NUM_CODEPAGE
] =
328 const char* CodePagesLocale
[NUM_CODEPAGE
] =
333 UConverterFromUCallback oldFromUAction
= NULL
;
334 UConverterToUCallback oldToUAction
= NULL
;
335 const void* oldFromUContext
= NULL
;
336 const void* oldToUContext
= NULL
;
338 /* Allocate memory */
339 mytarget
= (char*) malloc(MAX_FILE_LEN
* sizeof(mytarget
[0]));
340 output_cp_buffer
= (char*) malloc(MAX_FILE_LEN
* sizeof(output_cp_buffer
[0]));
341 ucs_file_buffer
= (UChar
*) malloc(MAX_FILE_LEN
* sizeof(ucs_file_buffer
[0]));
342 my_ucs_file_buffer
= (UChar
*) malloc(MAX_FILE_LEN
* sizeof(my_ucs_file_buffer
[0]));
344 ucs_file_buffer_use
= ucs_file_buffer
;
346 mytarget_use
= mytarget
;
347 my_ucs_file_buffer_1
=my_ucs_file_buffer
;
349 /* flush the converter cache to get a consistent state before the flushing is tested */
352 /*Testing ucnv_openU()*/
354 UChar converterName
[]={ 0x0069, 0x0062, 0x006d, 0x002d, 0x0039, 0x0034, 0x0033, 0x0000}; /*ibm-943*/
355 UChar firstSortedName
[]={ 0x0021, 0x0000}; /* ! */
356 UChar lastSortedName
[]={ 0x007E, 0x0000}; /* ~ */
357 const char *illegalNameChars
={ "ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943"};
358 UChar illegalName
[100];
359 UConverter
*converter
=NULL
;
361 converter
=ucnv_openU(converterName
, &err
);
363 log_data_err("FAILURE! ucnv_openU(ibm-943, err) failed. %s\n", myErrorName(err
));
365 ucnv_close(converter
);
367 converter
=ucnv_openU(NULL
, &err
);
369 log_err("FAILURE! ucnv_openU(NULL, err) failed. %s\n", myErrorName(err
));
371 ucnv_close(converter
);
372 /*testing with error value*/
373 err
=U_ILLEGAL_ARGUMENT_ERROR
;
374 converter
=ucnv_openU(converterName
, &err
);
375 if(!(converter
== NULL
)){
376 log_data_err("FAILURE! ucnv_openU(ibm-943, U_ILLEGAL_ARGUMENT_ERROR) is expected to fail\n");
378 ucnv_close(converter
);
380 u_uastrcpy(illegalName
, "");
381 u_uastrcpy(illegalName
, illegalNameChars
);
382 ucnv_openU(illegalName
, &err
);
383 if(!(err
==U_ILLEGAL_ARGUMENT_ERROR
)){
384 log_err("FAILURE! ucnv_openU(illegalName, err) is expected to fail\n");
388 ucnv_openU(firstSortedName
, &err
);
389 if(err
!=U_FILE_ACCESS_ERROR
){
390 log_err("FAILURE! ucnv_openU(firstSortedName, err) is expected to fail\n");
394 ucnv_openU(lastSortedName
, &err
);
395 if(err
!=U_FILE_ACCESS_ERROR
){
396 log_err("FAILURE! ucnv_openU(lastSortedName, err) is expected to fail\n");
401 log_verbose("Testing ucnv_open() with converter name greater than 7 characters\n");
403 UConverter
*cnv
=NULL
;
405 cnv
=ucnv_open("ibm-949,Madhu", &err
);
407 log_data_err("FAILURE! ucnv_open(\"ibm-949,Madhu\", err) failed. %s\n", myErrorName(err
));
412 /*Testing ucnv_convert()*/
414 int32_t targetLimit
=0, sourceLimit
=0, i
=0, targetCapacity
=0;
415 const uint8_t source
[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00};
416 const uint8_t expectedTarget
[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00};
418 sourceLimit
=sizeof(source
)/sizeof(source
[0]);
422 targetCapacity
=ucnv_convert("ibm-1364", "ibm-1363", NULL
, targetLimit
, (const char*)source
, sourceLimit
, &err
);
423 if(err
== U_BUFFER_OVERFLOW_ERROR
){
425 targetLimit
=targetCapacity
+1;
426 target
=(char*)malloc(sizeof(char) * targetLimit
);
427 targetCapacity
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
, sourceLimit
, &err
);
430 log_data_err("FAILURE! ucnv_convert(ibm-1363->ibm-1364) failed. %s\n", myErrorName(err
));
433 for(i
=0; i
<targetCapacity
; i
++){
434 if(target
[i
] != expectedTarget
[i
]){
435 log_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i
, (UChar
)expectedTarget
[i
], (uint8_t)target
[i
]);
439 i
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
+1, -1, &err
);
440 if(U_FAILURE(err
) || i
!=7){
441 log_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 7\n",
442 u_errorName(err
), i
);
445 /*Test error conditions*/
447 i
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
, 0, &err
);
449 log_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n");
452 err
=U_ILLEGAL_ARGUMENT_ERROR
;
453 sourceLimit
=sizeof(source
)/sizeof(source
[0]);
454 i
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
, sourceLimit
, &err
);
456 log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n");
460 sourceLimit
=sizeof(source
)/sizeof(source
[0]);
462 i
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
, sourceLimit
, &err
);
463 if(!(U_FAILURE(err
) && err
==U_BUFFER_OVERFLOW_ERROR
)){
464 log_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n");
471 /*Testing ucnv_openCCSID and ucnv_open with error conditions*/
472 log_verbose("\n---Testing ucnv_open with err ! = U_ZERO_ERROR...\n");
473 err
=U_ILLEGAL_ARGUMENT_ERROR
;
474 if(ucnv_open(NULL
, &err
) != NULL
){
475 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
477 if(ucnv_openCCSID(1051, UCNV_IBM
, &err
) != NULL
){
478 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
482 /* Testing ucnv_openCCSID(), ucnv_open(), ucnv_getName() */
483 log_verbose("\n---Testing ucnv_open default...\n");
484 someConverters
[0] = ucnv_open(NULL
,&err
);
485 someConverters
[1] = ucnv_open(NULL
,&err
);
486 someConverters
[2] = ucnv_open("utf8", &err
);
487 someConverters
[3] = ucnv_openCCSID(949,UCNV_IBM
,&err
);
488 ucnv_close(ucnv_openCCSID(1051, UCNV_IBM
, &err
)); /* test for j350; ucnv_close(NULL) is safe */
489 if (U_FAILURE(err
)){ log_data_err("FAILURE! %s\n", myErrorName(err
));}
491 /* Testing ucnv_getName()*/
492 /*default code page */
493 ucnv_getName(someConverters
[0], &err
);
495 log_data_err("getName[0] failed\n");
497 log_verbose("getName(someConverters[0]) returned %s\n", ucnv_getName(someConverters
[0], &err
));
499 ucnv_getName(someConverters
[1], &err
);
501 log_data_err("getName[1] failed\n");
503 log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters
[1], &err
));
506 ucnv_close(someConverters
[0]);
507 ucnv_close(someConverters
[1]);
508 ucnv_close(someConverters
[2]);
509 ucnv_close(someConverters
[3]);
512 for (codepage_index
=0; codepage_index
< NUM_CODEPAGE
; ++codepage_index
)
518 strcpy(ucs_file_name
, U_TOPSRCDIR U_FILE_SEP_STRING
"test"U_FILE_SEP_STRING
"testdata"U_FILE_SEP_STRING
);
520 strcpy(ucs_file_name
, loadTestData(&err
));
523 log_err("\nCouldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err
));
528 char* index
= strrchr(ucs_file_name
,(char)U_FILE_SEP_CHAR
);
530 if((unsigned int)(index
-ucs_file_name
) != (strlen(ucs_file_name
)-1)){
535 strcat(ucs_file_name
,".."U_FILE_SEP_STRING
);
537 strcat(ucs_file_name
, CodePagesTestFiles
[codepage_index
]);
539 ucs_file_in
= fopen(ucs_file_name
,"rb");
542 log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name
);
546 /*Creates a converter and testing ucnv_openCCSID(u_int code_page, platform, errstatus*/
548 /* myConverter =ucnv_openCCSID(CodePageNumberToTest[codepage_index],UCNV_IBM, &err); */
549 /* ucnv_flushCache(); */
550 myConverter
=ucnv_open( "ibm-949", &err
);
551 if (!myConverter
|| U_FAILURE(err
))
553 log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err
));
558 /*testing for ucnv_getName() */
559 log_verbose("Testing ucnv_getName()...\n");
560 ucnv_getName(myConverter
, &err
);
562 log_err("Error in getName\n");
565 log_verbose("getName o.k. %s\n", ucnv_getName(myConverter
, &err
));
567 if (uprv_stricmp(ucnv_getName(myConverter
, &err
), CodePagesToTest
[codepage_index
]))
568 log_err("getName failed\n");
570 log_verbose("getName ok\n");
571 /*Test getName with error condition*/
574 err
=U_ILLEGAL_ARGUMENT_ERROR
;
575 log_verbose("Testing ucnv_getName with err != U_ZERO_ERROR");
576 name
=ucnv_getName(myConverter
, &err
);
578 log_err("ucnv_getName() with err != U_ZERO_ERROR is expected to fail");
584 /*Tests ucnv_getMaxCharSize() and ucnv_getMinCharSize()*/
586 log_verbose("Testing ucnv_getMaxCharSize()...\n");
587 if (ucnv_getMaxCharSize(myConverter
)==CodePagesMaxChars
[codepage_index
])
588 log_verbose("Max byte per character OK\n");
590 log_err("Max byte per character failed\n");
592 log_verbose("\n---Testing ucnv_getMinCharSize()...\n");
593 if (ucnv_getMinCharSize(myConverter
)==CodePagesMinChars
[codepage_index
])
594 log_verbose("Min byte per character OK\n");
596 log_err("Min byte per character failed\n");
599 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars()*/
600 log_verbose("\n---Testing ucnv_getSubstChars...\n");
602 ucnv_getSubstChars(myConverter
, myptr
, &ii
, &err
);
604 log_err("ucnv_getSubstChars returned a negative number %d\n", ii
);
608 rest
= (uint16_t)(((unsigned char)rest
<< 8) + (unsigned char)myptr
[x
]);
609 if (rest
==CodePagesSubstitutionChars
[codepage_index
])
610 log_verbose("Substitution character ok\n");
612 log_err("Substitution character failed.\n");
614 log_verbose("\n---Testing ucnv_setSubstChars RoundTrip Test ...\n");
615 ucnv_setSubstChars(myConverter
, myptr
, ii
, &err
);
618 log_err("FAILURE! %s\n", myErrorName(err
));
620 ucnv_getSubstChars(myConverter
,save
, &ii
, &err
);
623 log_err("FAILURE! %s\n", myErrorName(err
));
626 if (strncmp(save
, myptr
, ii
))
627 log_err("Saved substitution character failed\n");
629 log_verbose("Saved substitution character ok\n");
631 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars() with error conditions*/
632 log_verbose("\n---Testing ucnv_getSubstChars.. with len < minBytesPerChar\n");
634 ucnv_getSubstChars(myConverter
, myptr
, &ii
, &err
);
635 if(err
!= U_INDEX_OUTOFBOUNDS_ERROR
){
636 log_err("ucnv_getSubstChars() with len < minBytesPerChar should throw U_INDEX_OUTOFBOUNDS_ERROR Got %s\n", myErrorName(err
));
640 ucnv_getSubstChars(myConverter
, myptr
, &ii
, &err
);
641 log_verbose("\n---Testing ucnv_setSubstChars.. with len < minBytesPerChar\n");
642 ucnv_setSubstChars(myConverter
, myptr
, 0, &err
);
643 if(err
!= U_ILLEGAL_ARGUMENT_ERROR
){
644 log_err("ucnv_setSubstChars() with len < minBytesPerChar should throw U_ILLEGAL_ARGUMENT_ERROR Got %s\n", myErrorName(err
));
646 log_verbose("\n---Testing ucnv_setSubstChars.. with err != U_ZERO_ERROR \n");
647 strcpy(myptr
, "abc");
648 ucnv_setSubstChars(myConverter
, myptr
, ii
, &err
);
650 ucnv_getSubstChars(myConverter
, save
, &ii
, &err
);
651 if(strncmp(save
, myptr
, ii
) == 0){
652 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't set the SubstChars and just return\n");
654 log_verbose("\n---Testing ucnv_getSubstChars.. with err != U_ZERO_ERROR \n");
656 strcpy(myptr
, "abc");
657 ucnv_setSubstChars(myConverter
, myptr
, ii
, &err
);
658 err
=U_ILLEGAL_ARGUMENT_ERROR
;
659 ucnv_getSubstChars(myConverter
, save
, &ii
, &err
);
660 if(strncmp(save
, myptr
, ii
) == 0){
661 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't fill the SubstChars in the buffer, it just returns\n");
666 #ifdef U_ENABLE_GENERIC_ISO_2022
667 /*resetState ucnv_reset()*/
668 log_verbose("\n---Testing ucnv_reset()..\n");
669 ucnv_reset(myConverter
);
672 const uint8_t in
[]={ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x61, 0xc0, 0x80, 0xe0, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80};
673 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
674 UConverter
*cnv
=ucnv_open("ISO_2022", &err
);
676 log_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err
));
678 c
=ucnv_getNextUChar(cnv
, &source
, limit
, &err
);
679 if((U_FAILURE(err
) || c
!= (UChar32
)0x0031)) {
680 log_err("ucnv_getNextUChar() failed: %s\n", u_errorName(err
));
689 log_verbose("\n---Testing ucnv_getDisplayName()...\n");
690 locale
=CodePagesLocale
[codepage_index
];
693 disnamelen
= ucnv_getDisplayName(myConverter
, locale
, displayname
, len
, &err
);
694 if(err
==U_BUFFER_OVERFLOW_ERROR
) {
696 displayname
=(UChar
*)malloc((disnamelen
+1) * sizeof(UChar
));
697 ucnv_getDisplayName(myConverter
,locale
,displayname
,disnamelen
+1, &err
);
699 log_err("getDisplayName failed. The error is %s\n", myErrorName(err
));
702 log_verbose(" getDisplayName o.k.\n");
708 log_err("getDisplayName preflight doesn't work. Error is %s\n", myErrorName(err
));
710 /*test ucnv_getDiaplayName with error condition*/
711 err
= U_ILLEGAL_ARGUMENT_ERROR
;
712 len
=ucnv_getDisplayName(myConverter
,locale
,NULL
,0, &err
);
714 log_err("ucnv_getDisplayName() with err != U_ZERO_ERROR is supposed to return 0\n");
716 /*test ucnv_getDiaplayName with error condition*/
718 len
=ucnv_getDisplayName(NULL
,locale
,NULL
,0, &err
);
719 if( len
!=0 || U_SUCCESS(err
)){
720 log_err("ucnv_getDisplayName(NULL) with cnv == NULL is supposed to return 0\n");
724 /* testing ucnv_setFromUCallBack() and ucnv_getFromUCallBack()*/
725 ucnv_getFromUCallBack(myConverter
, &MIA1
, &MIA1Context
);
727 log_verbose("\n---Testing ucnv_setFromUCallBack...\n");
728 ucnv_setFromUCallBack(myConverter
, otherUnicodeAction(MIA1
), &BOM
, &oldFromUAction
, &oldFromUContext
, &err
);
729 if (U_FAILURE(err
) || oldFromUAction
!= MIA1
|| oldFromUContext
!= MIA1Context
)
731 log_err("FAILURE! %s\n", myErrorName(err
));
734 ucnv_getFromUCallBack(myConverter
, &MIA1_2
, &MIA1Context2
);
735 if (MIA1_2
!= otherUnicodeAction(MIA1
) || MIA1Context2
!= &BOM
)
736 log_err("get From UCallBack failed\n");
738 log_verbose("get From UCallBack ok\n");
740 log_verbose("\n---Testing getFromUCallBack Roundtrip...\n");
741 ucnv_setFromUCallBack(myConverter
,MIA1
, MIA1Context
, &oldFromUAction
, &oldFromUContext
, &err
);
742 if (U_FAILURE(err
) || oldFromUAction
!= otherUnicodeAction(MIA1
) || oldFromUContext
!= &BOM
)
744 log_err("FAILURE! %s\n", myErrorName(err
));
747 ucnv_getFromUCallBack(myConverter
, &MIA1_2
, &MIA1Context2
);
748 if (MIA1_2
!= MIA1
|| MIA1Context2
!= MIA1Context
)
749 log_err("get From UCallBack action failed\n");
751 log_verbose("get From UCallBack action ok\n");
753 /*testing ucnv_setToUCallBack with error conditions*/
754 err
=U_ILLEGAL_ARGUMENT_ERROR
;
755 log_verbose("\n---Testing setFromUCallBack. with err != U_ZERO_ERROR..\n");
756 ucnv_setFromUCallBack(myConverter
, otherUnicodeAction(MIA1
), &BOM
, &oldFromUAction
, &oldFromUContext
, &err
);
757 ucnv_getFromUCallBack(myConverter
, &MIA1_2
, &MIA1Context2
);
758 if(MIA1_2
== otherUnicodeAction(MIA1
) || MIA1Context2
== &BOM
){
759 log_err("To setFromUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
764 /*testing ucnv_setToUCallBack() and ucnv_getToUCallBack()*/
765 ucnv_getToUCallBack(myConverter
, &MIA2
, &MIA2Context
);
767 log_verbose("\n---Testing setTo UCallBack...\n");
768 ucnv_setToUCallBack(myConverter
,otherCharAction(MIA2
), &BOM
, &oldToUAction
, &oldToUContext
, &err
);
769 if (U_FAILURE(err
) || oldToUAction
!= MIA2
|| oldToUContext
!= MIA2Context
)
771 log_err("FAILURE! %s\n", myErrorName(err
));
774 ucnv_getToUCallBack(myConverter
, &MIA2_2
, &MIA2Context2
);
775 if (MIA2_2
!= otherCharAction(MIA2
) || MIA2Context2
!= &BOM
)
776 log_err("To UCallBack failed\n");
778 log_verbose("To UCallBack ok\n");
780 log_verbose("\n---Testing setTo UCallBack Roundtrip...\n");
781 ucnv_setToUCallBack(myConverter
,MIA2
, MIA2Context
, &oldToUAction
, &oldToUContext
, &err
);
782 if (U_FAILURE(err
) || oldToUAction
!= otherCharAction(MIA2
) || oldToUContext
!= &BOM
)
783 { log_err("FAILURE! %s\n", myErrorName(err
)); }
785 ucnv_getToUCallBack(myConverter
, &MIA2_2
, &MIA2Context2
);
786 if (MIA2_2
!= MIA2
|| MIA2Context2
!= MIA2Context
)
787 log_err("To UCallBack failed\n");
789 log_verbose("To UCallBack ok\n");
791 /*testing ucnv_setToUCallBack with error conditions*/
792 err
=U_ILLEGAL_ARGUMENT_ERROR
;
793 log_verbose("\n---Testing setToUCallBack. with err != U_ZERO_ERROR..\n");
794 ucnv_setToUCallBack(myConverter
,otherCharAction(MIA2
), NULL
, &oldToUAction
, &oldToUContext
, &err
);
795 ucnv_getToUCallBack(myConverter
, &MIA2_2
, &MIA2Context2
);
796 if (MIA2_2
== otherCharAction(MIA2
) || MIA2Context2
== &BOM
){
797 log_err("To setToUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
802 /*getcodepageid testing ucnv_getCCSID() */
803 log_verbose("\n----Testing getCCSID....\n");
804 cp
= ucnv_getCCSID(myConverter
,&err
);
807 log_err("FAILURE!..... %s\n", myErrorName(err
));
809 if (cp
!= CodePageNumberToTest
[codepage_index
])
810 log_err("Codepage number test failed\n");
812 log_verbose("Codepage number test OK\n");
814 /*testing ucnv_getCCSID() with err != U_ZERO_ERROR*/
815 err
=U_ILLEGAL_ARGUMENT_ERROR
;
816 if( ucnv_getCCSID(myConverter
,&err
) != -1){
817 log_err("ucnv_getCCSID() with err != U_ZERO_ERROR is supposed to fail\n");
821 /*getCodepagePlatform testing ucnv_getPlatform()*/
822 log_verbose("\n---Testing getCodepagePlatform ..\n");
823 if (CodePagesPlatform
[codepage_index
]!=ucnv_getPlatform(myConverter
, &err
))
824 log_err("Platform codepage test failed\n");
826 log_verbose("Platform codepage test ok\n");
830 log_err("FAILURE! %s\n", myErrorName(err
));
832 /*testing ucnv_getPlatform() with err != U_ZERO_ERROR*/
833 err
= U_ILLEGAL_ARGUMENT_ERROR
;
834 if(ucnv_getPlatform(myConverter
, &err
) != UCNV_UNKNOWN
){
835 log_err("ucnv)getPlatform with err != U_ZERO_ERROR is supposed to fail\n");
841 fread(&BOM
, sizeof(UChar
), 1, ucs_file_in
);
842 if (BOM
!=0xFEFF && BOM
!=0xFFFE)
844 log_err("File Missing BOM...Bailing!\n");
850 /*Reads in the file*/
851 while(!feof(ucs_file_in
)&&(i
+=fread(ucs_file_buffer
+i
, sizeof(UChar
), 1, ucs_file_in
)))
853 myUChar
= ucs_file_buffer
[i
-1];
855 ucs_file_buffer
[i
-1] = (UChar
)((BOM
==0xFEFF)?myUChar
:((myUChar
>> 8) | (myUChar
<< 8))); /*adjust if BIG_ENDIAN*/
858 myUChar
= ucs_file_buffer
[i
-1];
859 ucs_file_buffer
[i
-1] = (UChar
)((BOM
==0xFEFF)?myUChar
:((myUChar
>> 8) | (myUChar
<< 8))); /*adjust if BIG_ENDIAN Corner Case*/
862 /*testing ucnv_fromUChars() and ucnv_toUChars() */
863 /*uchar1---fromUChar--->output_cp_buffer --toUChar--->uchar2*/
865 uchar1
=(UChar
*)malloc(sizeof(UChar
) * (i
+1));
866 u_uastrcpy(uchar1
,"");
867 u_strncpy(uchar1
,ucs_file_buffer
,i
);
870 uchar3
=(UChar
*)malloc(sizeof(UChar
)*(i
+1));
871 u_uastrcpy(uchar3
,"");
872 u_strncpy(uchar3
,ucs_file_buffer
,i
);
875 /*Calls the Conversion Routine */
876 testLong1
= MAX_FILE_LEN
;
877 log_verbose("\n---Testing ucnv_fromUChars()\n");
878 targetcapacity
= ucnv_fromUChars(myConverter
, output_cp_buffer
, testLong1
, uchar1
, -1, &err
);
881 log_err("\nFAILURE...%s\n", myErrorName(err
));
884 log_verbose(" ucnv_fromUChars() o.k.\n");
886 /*test the conversion routine */
887 log_verbose("\n---Testing ucnv_toUChars()\n");
888 /*call it first time for trapping the targetcapacity and size needed to allocate memory for the buffer uchar2 */
890 targetsize
= ucnv_toUChars(myConverter
,
894 strlen(output_cp_buffer
),
896 /*if there is an buffer overflow then trap the values and pass them and make the actual call*/
898 if(err
==U_BUFFER_OVERFLOW_ERROR
)
901 uchar2
=(UChar
*)malloc((targetsize
+1) * sizeof(UChar
));
902 targetsize
= ucnv_toUChars(myConverter
,
906 strlen(output_cp_buffer
),
910 log_err("ucnv_toUChars() FAILED %s\n", myErrorName(err
));
912 log_verbose(" ucnv_toUChars() o.k.\n");
914 if(u_strcmp(uchar1
,uchar2
)!=0)
915 log_err("equality test failed with conversion routine\n");
919 log_err("ERR: calling toUChars: Didn't get U_BUFFER_OVERFLOW .. expected it.\n");
921 /*Testing ucnv_fromUChars and ucnv_toUChars with error conditions*/
922 err
=U_ILLEGAL_ARGUMENT_ERROR
;
923 log_verbose("\n---Testing ucnv_fromUChars() with err != U_ZERO_ERROR\n");
924 targetcapacity
= ucnv_fromUChars(myConverter
, output_cp_buffer
, testLong1
, uchar1
, -1, &err
);
925 if (targetcapacity
!=0) {
926 log_err("\nFAILURE: ucnv_fromUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
929 log_verbose("\n---Testing ucnv_fromUChars() with converter=NULL\n");
930 targetcapacity
= ucnv_fromUChars(NULL
, output_cp_buffer
, testLong1
, uchar1
, -1, &err
);
931 if (targetcapacity
!=0 || err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
932 log_err("\nFAILURE: ucnv_fromUChars with converter=NULL is expected to fail\n");
935 log_verbose("\n---Testing ucnv_fromUChars() with sourceLength = 0\n");
936 targetcapacity
= ucnv_fromUChars(myConverter
, output_cp_buffer
, testLong1
, uchar1
, 0, &err
);
937 if (targetcapacity
!=0) {
938 log_err("\nFAILURE: ucnv_fromUChars with sourceLength 0 is expected to return 0\n");
940 log_verbose("\n---Testing ucnv_fromUChars() with targetLength = 0\n");
941 targetcapacity
= ucnv_fromUChars(myConverter
, output_cp_buffer
, 0, uchar1
, -1, &err
);
942 if (err
!= U_BUFFER_OVERFLOW_ERROR
) {
943 log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n");
945 /*toUChars with error conditions*/
946 targetsize
= ucnv_toUChars(myConverter
, uchar2
, targetsize
, output_cp_buffer
, strlen(output_cp_buffer
), &err
);
948 log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
951 targetsize
= ucnv_toUChars(myConverter
, uchar2
, -1, output_cp_buffer
, strlen(output_cp_buffer
), &err
);
952 if(targetsize
!= 0 || err
!= U_ILLEGAL_ARGUMENT_ERROR
){
953 log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n");
956 targetsize
= ucnv_toUChars(myConverter
, uchar2
, 0, output_cp_buffer
, 0, &err
);
957 if (targetsize
!=0) {
958 log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n");
961 targetsize
= ucnv_toUChars(myConverter
, NULL
, targetcapacity2
, output_cp_buffer
, strlen(output_cp_buffer
), &err
);
962 if (err
!= U_STRING_NOT_TERMINATED_WARNING
) {
963 log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n",
970 /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */
971 /*Clean up re-usable vars*/
973 log_verbose("Testing ucnv_fromUnicode().....\n");
974 tmp_ucs_buf
=ucs_file_buffer_use
;
975 ucnv_fromUnicode(myConverter
, &mytarget_1
,
976 mytarget
+ MAX_FILE_LEN
,
978 ucs_file_buffer_use
+i
,
982 consumedUni
= (UChar
*)tmp_consumedUni
;
986 log_err("FAILURE! %s\n", myErrorName(err
));
989 log_verbose("ucnv_fromUnicode() o.k.\n");
991 /*Uni1 ----ToUnicode----> Cp2 ----FromUnicode---->Uni3 */
992 log_verbose("Testing ucnv_toUnicode().....\n");
993 tmp_mytarget_use
=mytarget_use
;
994 tmp_consumed
= consumed
;
995 ucnv_toUnicode(myConverter
, &my_ucs_file_buffer_1
,
996 my_ucs_file_buffer
+ MAX_FILE_LEN
,
998 mytarget_use
+ (mytarget_1
- mytarget
),
1002 consumed
= (char*)tmp_consumed
;
1005 log_err("FAILURE! %s\n", myErrorName(err
));
1008 log_verbose("ucnv_toUnicode() o.k.\n");
1011 log_verbose("\n---Testing RoundTrip ...\n");
1014 u_strncpy(uchar3
, my_ucs_file_buffer
,i
);
1017 if(u_strcmp(uchar1
,uchar3
)==0)
1018 log_verbose("Equality test o.k.\n");
1020 log_err("Equality test failed\n");
1025 log_err("uchar2 was NULL (ccapitst.c line %d), couldn't do sanity check\n", __LINE__
);
1029 if(u_strcmp(uchar2
, uchar3
)==0)
1030 log_verbose("Equality test o.k.\n");
1032 log_err("Equality test failed\n");
1035 fclose(ucs_file_in
);
1036 ucnv_close(myConverter
);
1037 if (uchar1
!= 0) free(uchar1
);
1038 if (uchar2
!= 0) free(uchar2
);
1039 if (uchar3
!= 0) free(uchar3
);
1042 free((void*)mytarget
);
1043 free((void*)output_cp_buffer
);
1044 free((void*)ucs_file_buffer
);
1045 free((void*)my_ucs_file_buffer
);
1049 #if !UCONFIG_NO_LEGACY_CONVERSION
1050 static UConverterFromUCallback
otherUnicodeAction(UConverterFromUCallback MIA
)
1052 return (MIA
==(UConverterFromUCallback
)UCNV_FROM_U_CALLBACK_STOP
)?(UConverterFromUCallback
)UCNV_FROM_U_CALLBACK_SUBSTITUTE
:(UConverterFromUCallback
)UCNV_FROM_U_CALLBACK_STOP
;
1055 static UConverterToUCallback
otherCharAction(UConverterToUCallback MIA
)
1057 return (MIA
==(UConverterToUCallback
)UCNV_TO_U_CALLBACK_STOP
)?(UConverterToUCallback
)UCNV_TO_U_CALLBACK_SUBSTITUTE
:(UConverterToUCallback
)UCNV_TO_U_CALLBACK_STOP
;
1061 static void TestFlushCache(void) {
1062 #if !UCONFIG_NO_LEGACY_CONVERSION
1063 UErrorCode err
= U_ZERO_ERROR
;
1064 UConverter
* someConverters
[5];
1067 /* flush the converter cache to get a consistent state before the flushing is tested */
1070 /*Testing ucnv_open()*/
1071 /* Note: These converters have been chosen because they do NOT
1072 encode the Latin characters (U+0041, ...), and therefore are
1073 highly unlikely to be chosen as system default codepages */
1075 someConverters
[0] = ucnv_open("ibm-1047", &err
);
1076 if (U_FAILURE(err
)) {
1077 log_data_err("FAILURE! %s\n", myErrorName(err
));
1080 someConverters
[1] = ucnv_open("ibm-1047", &err
);
1081 if (U_FAILURE(err
)) {
1082 log_data_err("FAILURE! %s\n", myErrorName(err
));
1085 someConverters
[2] = ucnv_open("ibm-1047", &err
);
1086 if (U_FAILURE(err
)) {
1087 log_data_err("FAILURE! %s\n", myErrorName(err
));
1090 someConverters
[3] = ucnv_open("gb18030", &err
);
1091 if (U_FAILURE(err
)) {
1092 log_data_err("FAILURE! %s\n", myErrorName(err
));
1095 someConverters
[4] = ucnv_open("ibm-954", &err
);
1096 if (U_FAILURE(err
)) {
1097 log_data_err("FAILURE! %s\n", myErrorName(err
));
1101 /* Testing ucnv_flushCache() */
1102 log_verbose("\n---Testing ucnv_flushCache...\n");
1103 if ((flushCount
=ucnv_flushCache())==0)
1104 log_verbose("Flush cache ok\n");
1106 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__
, flushCount
);
1108 /*testing ucnv_close() and ucnv_flushCache() */
1109 ucnv_close(someConverters
[0]);
1110 ucnv_close(someConverters
[1]);
1112 if ((flushCount
=ucnv_flushCache())==0)
1113 log_verbose("Flush cache ok\n");
1115 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__
, flushCount
);
1117 ucnv_close(someConverters
[2]);
1118 ucnv_close(someConverters
[3]);
1120 if ((flushCount
=ucnv_flushCache())==2)
1121 log_verbose("Flush cache ok\n"); /*because first, second and third are same */
1123 log_data_err("Flush Cache failed line %d, got %d expected 2 or there is an error in ucnv_close()\n",
1127 ucnv_close(someConverters
[4]);
1128 if ( (flushCount
=ucnv_flushCache())==1)
1129 log_verbose("Flush cache ok\n");
1131 log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__
, flushCount
);
1136 * Test the converter alias API, specifically the fuzzy matching of
1137 * alias names and the alias table integrity. Make sure each
1138 * converter has at least one alias (itself), and that its listed
1139 * aliases map back to itself. Check some hard-coded UTF-8 and
1140 * ISO_2022 aliases to make sure they work.
1142 static void TestAlias() {
1144 UErrorCode status
= U_ZERO_ERROR
;
1146 /* Predetermined aliases that we expect to map back to ISO_2022
1147 * and UTF-8. UPDATE THIS DATA AS NECESSARY. */
1148 const char* ISO_2022_NAMES
[] =
1149 {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2",
1150 "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"};
1151 int32_t ISO_2022_NAMES_LENGTH
=
1152 sizeof(ISO_2022_NAMES
) / sizeof(ISO_2022_NAMES
[0]);
1153 const char *UTF8_NAMES
[] =
1154 { "UTF-8", "utf-8", "utf8", "ibm-1208",
1155 "utf_8", "ibm1208", "cp1208" };
1156 int32_t UTF8_NAMES_LENGTH
=
1157 sizeof(UTF8_NAMES
) / sizeof(UTF8_NAMES
[0]);
1162 } CONVERTERS_NAMES
[] = {
1163 { "UTF-32BE", "UTF32_BigEndian" },
1164 { "UTF-32LE", "UTF32_LittleEndian" },
1165 { "UTF-32", "ISO-10646-UCS-4" },
1166 { "UTF32_PlatformEndian", "UTF32_PlatformEndian" },
1167 { "UTF-32", "ucs-4" }
1169 int32_t CONVERTERS_NAMES_LENGTH
= sizeof(CONVERTERS_NAMES
) / sizeof(*CONVERTERS_NAMES
);
1171 /* When there are bugs in gencnval or in ucnv_io, converters can
1172 appear to have no aliases. */
1173 ncnv
= ucnv_countAvailable();
1174 log_verbose("%d converters\n", ncnv
);
1175 for (i
=0; i
<ncnv
; ++i
) {
1176 const char *name
= ucnv_getAvailableName(i
);
1178 uint16_t na
= ucnv_countAliases(name
, &status
);
1183 log_err("FAIL: Converter \"%s\" (i=%d)"
1184 " has no aliases; expect at least one\n",
1188 cnv
= ucnv_open(name
, &status
);
1189 if (U_FAILURE(status
)) {
1190 log_data_err("FAIL: Converter \"%s\" (i=%d)"
1191 " can't be opened.\n",
1195 if (strcmp(ucnv_getName(cnv
, &status
), name
) != 0
1196 && (strstr(name
, "PlatformEndian") == 0 && strstr(name
, "OppositeEndian") == 0)) {
1197 log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. "
1198 "The should be the same\n",
1199 name
, ucnv_getName(cnv
, &status
));
1204 status
= U_ZERO_ERROR
;
1205 alias0
= ucnv_getAlias(name
, 0, &status
);
1206 for (j
=1; j
<na
; ++j
) {
1208 /* Make sure each alias maps back to the the same list of
1209 aliases. Assume that if alias 0 is the same, the whole
1210 list is the same (this should always be true). */
1211 const char *mapBack
;
1213 status
= U_ZERO_ERROR
;
1214 alias
= ucnv_getAlias(name
, j
, &status
);
1215 if (status
== U_AMBIGUOUS_ALIAS_WARNING
) {
1216 log_err("FAIL: Converter \"%s\"is ambiguous\n", name
);
1219 if (alias
== NULL
) {
1220 log_err("FAIL: Converter \"%s\" -> "
1226 mapBack
= ucnv_getAlias(alias
, 0, &status
);
1228 if (mapBack
== NULL
) {
1229 log_err("FAIL: Converter \"%s\" -> "
1230 "alias[%d]=\"%s\" -> "
1231 "alias[0]=NULL, exp. \"%s\"\n",
1232 name
, j
, alias
, alias0
);
1236 if (0 != strcmp(alias0
, mapBack
)) {
1238 UBool foundAlias
= FALSE
;
1239 if (status
== U_AMBIGUOUS_ALIAS_WARNING
) {
1240 /* Make sure that we only get this mismapping when there is
1241 an ambiguous alias, and the other converter has this alias too. */
1242 for (idx
= 0; idx
< ucnv_countAliases(mapBack
, &status
); idx
++) {
1243 if (strcmp(ucnv_getAlias(mapBack
, (uint16_t)idx
, &status
), alias
) == 0) {
1249 /* else not ambiguous, and this is a real problem. foundAlias = FALSE */
1252 log_err("FAIL: Converter \"%s\" -> "
1253 "alias[%d]=\"%s\" -> "
1254 "alias[0]=\"%s\", exp. \"%s\"\n",
1255 name
, j
, alias
, mapBack
, alias0
);
1262 /* Check a list of predetermined aliases that we expect to map
1263 * back to ISO_2022 and UTF-8. */
1264 for (i
=1; i
<ISO_2022_NAMES_LENGTH
; ++i
) {
1265 const char* mapBack
= ucnv_getAlias(ISO_2022_NAMES
[i
], 0, &status
);
1267 log_data_err("Couldn't get alias for %s. You probably have no data\n", ISO_2022_NAMES
[i
]);
1270 if (0 != strcmp(mapBack
, ISO_2022_NAMES
[0])) {
1271 log_err("FAIL: \"%s\" -> \"%s\", expect \"ISO_2022,locale=ja,version=2\"\n",
1272 ISO_2022_NAMES
[i
], mapBack
);
1277 for (i
=1; i
<UTF8_NAMES_LENGTH
; ++i
) {
1278 const char* mapBack
= ucnv_getAlias(UTF8_NAMES
[i
], 0, &status
);
1280 log_data_err("Couldn't get alias for %s. You probably have no data\n", UTF8_NAMES
[i
]);
1283 if (mapBack
&& 0 != strcmp(mapBack
, UTF8_NAMES
[0])) {
1284 log_err("FAIL: \"%s\" -> \"%s\", expect UTF-8\n",
1285 UTF8_NAMES
[i
], mapBack
);
1290 * Check a list of predetermined aliases that we expect to map
1291 * back to predermined converter names.
1294 for (i
= 0; i
< CONVERTERS_NAMES_LENGTH
; ++i
) {
1295 const char* mapBack
= ucnv_getAlias(CONVERTERS_NAMES
[i
].alias
, 0, &status
);
1297 log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES
[i
].name
);
1300 if (0 != strcmp(mapBack
, CONVERTERS_NAMES
[i
].name
)) {
1301 log_err("FAIL: \"%s\" -> \"%s\", expect %s\n",
1302 CONVERTERS_NAMES
[i
].alias
, mapBack
, CONVERTERS_NAMES
[i
].name
);
1308 static void TestDuplicateAlias(void) {
1310 UErrorCode status
= U_ZERO_ERROR
;
1312 status
= U_ZERO_ERROR
;
1313 alias
= ucnv_getStandardName("Shift_JIS", "IBM", &status
);
1314 if (alias
== NULL
|| strcmp(alias
, "ibm-943") != 0 || status
!= U_AMBIGUOUS_ALIAS_WARNING
) {
1315 log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias
);
1317 status
= U_ZERO_ERROR
;
1318 alias
= ucnv_getStandardName("ibm-943", "IANA", &status
);
1319 if (alias
== NULL
|| strcmp(alias
, "Shift_JIS") != 0 || status
!= U_AMBIGUOUS_ALIAS_WARNING
) {
1320 log_data_err("FAIL: Didn't get Shift_JIS for ibm-943 {IANA}. Got %s\n", alias
);
1322 status
= U_ZERO_ERROR
;
1323 alias
= ucnv_getStandardName("ibm-943_P130-2000", "IANA", &status
);
1324 if (alias
!= NULL
|| status
== U_AMBIGUOUS_ALIAS_WARNING
) {
1325 log_data_err("FAIL: Didn't get NULL for ibm-943 {IANA}. Got %s\n", alias
);
1330 /* Test safe clone callback */
1332 static uint32_t TSCC_nextSerial()
1334 static uint32_t n
= 1;
1341 uint32_t magic
; /* 0xC0FFEE to identify that the object is OK */
1342 uint32_t serial
; /* minted from nextSerial, above */
1343 UBool wasClosed
; /* close happened on the object */
1346 static TSCCContext
*TSCC_clone(TSCCContext
*ctx
)
1348 TSCCContext
*newCtx
= (TSCCContext
*)malloc(sizeof(TSCCContext
));
1350 newCtx
->serial
= TSCC_nextSerial();
1351 newCtx
->wasClosed
= 0;
1352 newCtx
->magic
= 0xC0FFEE;
1354 log_verbose("TSCC_clone: %p:%d -> new context %p:%d\n", ctx
, ctx
->serial
, newCtx
, newCtx
->serial
);
1359 #if !UCONFIG_NO_LEGACY_CONVERSION
1360 static void TSCC_fromU(const void *context
,
1361 UConverterFromUnicodeArgs
*fromUArgs
,
1362 const UChar
* codeUnits
,
1365 UConverterCallbackReason reason
,
1368 TSCCContext
*ctx
= (TSCCContext
*)context
;
1369 UConverterFromUCallback junkFrom
;
1371 log_verbose("TSCC_fromU: Context %p:%d called, reason %d on cnv %p\n", ctx
, ctx
->serial
, reason
, fromUArgs
->converter
);
1373 if(ctx
->magic
!= 0xC0FFEE) {
1374 log_err("TSCC_fromU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx
,ctx
->serial
, ctx
->magic
);
1378 if(reason
== UCNV_CLONE
) {
1379 UErrorCode subErr
= U_ZERO_ERROR
;
1380 TSCCContext
*newCtx
;
1381 TSCCContext
*junkCtx
;
1382 TSCCContext
**pjunkCtx
= &junkCtx
;
1385 log_verbose("TSCC_fromU: cloning..\n");
1386 newCtx
= TSCC_clone(ctx
);
1388 if(newCtx
== NULL
) {
1389 log_err("TSCC_fromU: internal clone failed on %p\n", ctx
);
1393 ucnv_getFromUCallBack(fromUArgs
->converter
, &junkFrom
, (const void**)pjunkCtx
);
1394 ucnv_setFromUCallBack(fromUArgs
->converter
, junkFrom
, newCtx
, NULL
, NULL
, &subErr
);
1396 if(U_FAILURE(subErr
)) {
1401 if(reason
== UCNV_CLOSE
) {
1402 log_verbose("TSCC_fromU: Context %p:%d closing\n", ctx
, ctx
->serial
);
1403 ctx
->wasClosed
= TRUE
;
1407 static void TSCC_toU(const void *context
,
1408 UConverterToUnicodeArgs
*toUArgs
,
1409 const char* codeUnits
,
1411 UConverterCallbackReason reason
,
1414 TSCCContext
*ctx
= (TSCCContext
*)context
;
1415 UConverterToUCallback junkFrom
;
1417 log_verbose("TSCC_toU: Context %p:%d called, reason %d on cnv %p\n", ctx
, ctx
->serial
, reason
, toUArgs
->converter
);
1419 if(ctx
->magic
!= 0xC0FFEE) {
1420 log_err("TSCC_toU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx
,ctx
->serial
, ctx
->magic
);
1424 if(reason
== UCNV_CLONE
) {
1425 UErrorCode subErr
= U_ZERO_ERROR
;
1426 TSCCContext
*newCtx
;
1427 TSCCContext
*junkCtx
;
1428 TSCCContext
**pjunkCtx
= &junkCtx
;
1431 log_verbose("TSCC_toU: cloning..\n");
1432 newCtx
= TSCC_clone(ctx
);
1434 if(newCtx
== NULL
) {
1435 log_err("TSCC_toU: internal clone failed on %p\n", ctx
);
1439 ucnv_getToUCallBack(toUArgs
->converter
, &junkFrom
, (const void**)pjunkCtx
);
1440 ucnv_setToUCallBack(toUArgs
->converter
, junkFrom
, newCtx
, NULL
, NULL
, &subErr
);
1442 if(U_FAILURE(subErr
)) {
1447 if(reason
== UCNV_CLOSE
) {
1448 log_verbose("TSCC_toU: Context %p:%d closing\n", ctx
, ctx
->serial
);
1449 ctx
->wasClosed
= TRUE
;
1453 static void TSCC_init(TSCCContext
*q
)
1455 q
->magic
= 0xC0FFEE;
1456 q
->serial
= TSCC_nextSerial();
1460 static void TSCC_print_log(TSCCContext
*q
, const char *name
)
1463 log_verbose("TSCContext: %s is NULL!!\n", name
);
1465 if(q
->magic
!= 0xC0FFEE) {
1466 log_err("TSCCContext: %p:%d's magic is %x, supposed to be 0xC0FFEE\n",
1467 q
,q
->serial
, q
->magic
);
1469 log_verbose("TSCCContext %p:%d=%s - magic %x, %s\n",
1470 q
, q
->serial
, name
, q
->magic
, q
->wasClosed
?"CLOSED":"open");
1474 static void TestConvertSafeCloneCallback()
1476 UErrorCode err
= U_ZERO_ERROR
;
1477 TSCCContext from1
, to1
;
1478 TSCCContext
*from2
, *from3
, *to2
, *to3
;
1479 TSCCContext
**pfrom2
= &from2
, **pfrom3
= &from3
, **pto2
= &to2
, **pto3
= &to3
;
1481 int32_t hunkSize
= 8192;
1482 UConverterFromUCallback junkFrom
;
1483 UConverterToUCallback junkTo
;
1484 UConverter
*conv1
, *conv2
= NULL
;
1486 conv1
= ucnv_open("iso-8859-3", &err
);
1488 if(U_FAILURE(err
)) {
1489 log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err
));
1493 log_verbose("Opened conv1=%p\n", conv1
);
1498 TSCC_print_log(&from1
, "from1");
1499 TSCC_print_log(&to1
, "to1");
1501 ucnv_setFromUCallBack(conv1
, TSCC_fromU
, &from1
, NULL
, NULL
, &err
);
1502 log_verbose("Set from1 on conv1\n");
1503 TSCC_print_log(&from1
, "from1");
1505 ucnv_setToUCallBack(conv1
, TSCC_toU
, &to1
, NULL
, NULL
, &err
);
1506 log_verbose("Set to1 on conv1\n");
1507 TSCC_print_log(&to1
, "to1");
1509 conv2
= ucnv_safeClone(conv1
, hunk
, &hunkSize
, &err
);
1510 if(U_FAILURE(err
)) {
1511 log_err("safeClone failed: %s\n", u_errorName(err
));
1514 log_verbose("Cloned to conv2=%p.\n", conv2
);
1516 /********** from *********************/
1517 ucnv_getFromUCallBack(conv2
, &junkFrom
, (const void**)pfrom2
);
1518 ucnv_getFromUCallBack(conv1
, &junkFrom
, (const void**)pfrom3
);
1520 TSCC_print_log(from2
, "from2");
1521 TSCC_print_log(from3
, "from3(==from1)");
1524 log_err("FAIL! from2 is null \n");
1529 log_err("FAIL! from3 is null \n");
1533 if(from3
!= (&from1
) ) {
1534 log_err("FAIL! conv1's FROM context changed!\n");
1537 if(from2
== (&from1
) ) {
1538 log_err("FAIL! conv1's FROM context is the same as conv2's!\n");
1541 if(from1
.wasClosed
) {
1542 log_err("FAIL! from1 is closed \n");
1545 if(from2
->wasClosed
) {
1546 log_err("FAIL! from2 was closed\n");
1549 /********** to *********************/
1550 ucnv_getToUCallBack(conv2
, &junkTo
, (const void**)pto2
);
1551 ucnv_getToUCallBack(conv1
, &junkTo
, (const void**)pto3
);
1553 TSCC_print_log(to2
, "to2");
1554 TSCC_print_log(to3
, "to3(==to1)");
1557 log_err("FAIL! to2 is null \n");
1562 log_err("FAIL! to3 is null \n");
1566 if(to3
!= (&to1
) ) {
1567 log_err("FAIL! conv1's TO context changed!\n");
1570 if(to2
== (&to1
) ) {
1571 log_err("FAIL! conv1's TO context is the same as conv2's!\n");
1575 log_err("FAIL! to1 is closed \n");
1578 if(to2
->wasClosed
) {
1579 log_err("FAIL! to2 was closed\n");
1582 /*************************************/
1585 log_verbose("ucnv_closed (conv1)\n");
1586 TSCC_print_log(&from1
, "from1");
1587 TSCC_print_log(from2
, "from2");
1588 TSCC_print_log(&to1
, "to1");
1589 TSCC_print_log(to2
, "to2");
1591 if(from1
.wasClosed
== FALSE
) {
1592 log_err("FAIL! from1 is NOT closed \n");
1595 if(from2
->wasClosed
) {
1596 log_err("FAIL! from2 was closed\n");
1599 if(to1
.wasClosed
== FALSE
) {
1600 log_err("FAIL! to1 is NOT closed \n");
1603 if(to2
->wasClosed
) {
1604 log_err("FAIL! to2 was closed\n");
1608 log_verbose("ucnv_closed (conv2)\n");
1610 TSCC_print_log(&from1
, "from1");
1611 TSCC_print_log(from2
, "from2");
1613 if(from1
.wasClosed
== FALSE
) {
1614 log_err("FAIL! from1 is NOT closed \n");
1617 if(from2
->wasClosed
== FALSE
) {
1618 log_err("FAIL! from2 was NOT closed\n");
1621 TSCC_print_log(&to1
, "to1");
1622 TSCC_print_log(to2
, "to2");
1624 if(to1
.wasClosed
== FALSE
) {
1625 log_err("FAIL! to1 is NOT closed \n");
1628 if(to2
->wasClosed
== FALSE
) {
1629 log_err("FAIL! to2 was NOT closed\n");
1633 free(to2
); /* to1 is stack based */
1635 if(from2
!= (&from1
)) {
1636 free(from2
); /* from1 is stack based */
1642 containsAnyOtherByte(uint8_t *p
, int32_t length
, uint8_t b
) {
1653 static void TestConvertSafeClone()
1655 /* one 'regular' & all the 'private stateful' converters */
1656 static const char *const names
[] = {
1657 #if !UCONFIG_NO_LEGACY_CONVERSION
1659 "ISO_2022,locale=zh,version=1",
1662 #if !UCONFIG_NO_LEGACY_CONVERSION
1666 "ISO_2022,locale=kr,version=1",
1667 "ISO_2022,locale=jp,version=2",
1671 #if !UCONFIG_NO_LEGACY_CONVERSION
1672 "IMAP-mailbox-name",
1679 /* store the actual sizes of each converter */
1680 int32_t actualSizes
[LENGTHOF(names
)];
1682 static const int32_t bufferSizes
[] = {
1683 U_CNV_SAFECLONE_BUFFERSIZE
,
1684 (int32_t)(3*sizeof(UConverter
))/2, /* 1.5*sizeof(UConverter) */
1685 (int32_t)sizeof(UConverter
)/2 /* 0.5*sizeof(UConverter) */
1688 char charBuffer
[21]; /* Leave at an odd number for alignment testing */
1689 uint8_t buffer
[3] [U_CNV_SAFECLONE_BUFFERSIZE
];
1690 int32_t bufferSize
, maxBufferSize
;
1691 const char *maxName
;
1692 UConverter
* cnv
, *cnv2
;
1696 const char *pConstCharBuffer
;
1697 const char *charBufferLimit
= charBuffer
+ sizeof(charBuffer
)/sizeof(*charBuffer
);
1698 UChar uniBuffer
[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */
1699 UChar uniCharBuffer
[20];
1700 char charSourceBuffer
[] = { 0x1b, 0x24, 0x42 };
1701 const char *pCharSource
= charSourceBuffer
;
1702 const char *pCharSourceLimit
= charSourceBuffer
+ sizeof(charSourceBuffer
);
1703 UChar
*pUCharTarget
= uniCharBuffer
;
1704 UChar
*pUCharTargetLimit
= uniCharBuffer
+ sizeof(uniCharBuffer
)/sizeof(*uniCharBuffer
);
1705 const UChar
* pUniBuffer
;
1706 const UChar
*uniBufferLimit
= uniBuffer
+ sizeof(uniBuffer
)/sizeof(*uniBuffer
);
1710 cnv
= ucnv_open(names
[0], &err
);
1711 if(U_SUCCESS(err
)) {
1712 /* Check the various error & informational states: */
1714 /* Null status - just returns NULL */
1715 bufferSize
= U_CNV_SAFECLONE_BUFFERSIZE
;
1716 if (0 != ucnv_safeClone(cnv
, buffer
[0], &bufferSize
, 0))
1718 log_err("FAIL: Cloned converter failed to deal correctly with null status\n");
1720 /* error status - should return 0 & keep error the same */
1721 err
= U_MEMORY_ALLOCATION_ERROR
;
1722 if (0 != ucnv_safeClone(cnv
, buffer
[0], &bufferSize
, &err
) || err
!= U_MEMORY_ALLOCATION_ERROR
)
1724 log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n");
1728 /* Null buffer size pointer - just returns NULL & set error to U_ILLEGAL_ARGUMENT_ERROR*/
1729 if (0 != ucnv_safeClone(cnv
, buffer
[0], 0, &err
) || err
!= U_ILLEGAL_ARGUMENT_ERROR
)
1731 log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n");
1735 /* buffer size pointer is 0 - fill in pbufferSize with a size */
1737 if (0 != ucnv_safeClone(cnv
, buffer
[0], &bufferSize
, &err
) || U_FAILURE(err
) || bufferSize
<= 0)
1739 log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n");
1741 /* Verify our define is large enough */
1742 if (U_CNV_SAFECLONE_BUFFERSIZE
< bufferSize
)
1744 log_err("FAIL: Pre-calculated buffer size is too small\n");
1746 /* Verify we can use this run-time calculated size */
1747 if (0 == (cnv2
= ucnv_safeClone(cnv
, buffer
[0], &bufferSize
, &err
)) || U_FAILURE(err
))
1749 log_err("FAIL: Converter can't be cloned with run-time size\n");
1755 /* size one byte too small - should allocate & let us know */
1757 if (0 == (cnv2
= ucnv_safeClone(cnv
, 0, &bufferSize
, &err
)) || err
!= U_SAFECLONE_ALLOCATED_WARNING
)
1759 log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n");
1766 bufferSize
= U_CNV_SAFECLONE_BUFFERSIZE
;
1768 /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */
1769 if (0 == (cnv2
= ucnv_safeClone(cnv
, 0, &bufferSize
, &err
)) || err
!= U_SAFECLONE_ALLOCATED_WARNING
)
1771 log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n");
1779 /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */
1780 if (0 != ucnv_safeClone(0, buffer
[0], &bufferSize
, &err
) || err
!= U_ILLEGAL_ARGUMENT_ERROR
)
1782 log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n");
1791 /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/
1793 for(j
= 0; j
< LENGTHOF(bufferSizes
); ++j
) {
1794 for (idx
= 0; idx
< LENGTHOF(names
); idx
++)
1797 cnv
= ucnv_open(names
[idx
], &err
);
1798 if(U_FAILURE(err
)) {
1799 log_data_err("ucnv_open(\"%s\") failed - %s\n", names
[idx
], u_errorName(err
));
1804 /* preflight to get maxBufferSize */
1805 actualSizes
[idx
] = 0;
1806 ucnv_safeClone(cnv
, NULL
, &actualSizes
[idx
], &err
);
1807 if(actualSizes
[idx
] > maxBufferSize
) {
1808 maxBufferSize
= actualSizes
[idx
];
1809 maxName
= names
[idx
];
1813 memset(buffer
, 0xaa, sizeof(buffer
));
1815 bufferSize
= bufferSizes
[j
];
1816 cnv2
= ucnv_safeClone(cnv
, buffer
[1], &bufferSize
, &err
);
1818 /* close the original immediately to make sure that the clone works by itself */
1821 if( actualSizes
[idx
] <= (bufferSizes
[j
] - (int32_t)sizeof(UAlignedMemory
)) &&
1822 err
== U_SAFECLONE_ALLOCATED_WARNING
1824 log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names
[idx
]);
1827 /* check if the clone function overwrote any bytes that it is not supposed to touch */
1828 if(bufferSize
<= bufferSizes
[j
]) {
1829 /* used the stack buffer */
1830 if( containsAnyOtherByte(buffer
[0], (int32_t)sizeof(buffer
[0]), 0xaa) ||
1831 containsAnyOtherByte(buffer
[1]+bufferSize
, (int32_t)(sizeof(buffer
)-(sizeof(buffer
[0])+bufferSize
)), 0xaa)
1833 log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n",
1834 names
[idx
], bufferSize
, bufferSizes
[j
]);
1837 /* heap-allocated the clone */
1838 if(containsAnyOtherByte(buffer
[0], (int32_t)sizeof(buffer
), 0xaa)) {
1839 log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n",
1840 names
[idx
], bufferSize
, bufferSizes
[j
]);
1844 pCharBuffer
= charBuffer
;
1845 pUniBuffer
= uniBuffer
;
1847 ucnv_fromUnicode(cnv2
,
1856 log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err
));
1858 ucnv_toUnicode(cnv2
,
1869 log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err
));
1872 pConstCharBuffer
= charBuffer
;
1873 if (uniBuffer
[0] != ucnv_getNextUChar(cnv2
, &pConstCharBuffer
, pCharBuffer
, &err
))
1875 log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err
));
1881 log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1882 sizeof(UConverter
), maxBufferSize
, maxName
, (int)U_CNV_SAFECLONE_BUFFERSIZE
);
1883 if(maxBufferSize
> U_CNV_SAFECLONE_BUFFERSIZE
) {
1884 log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1885 maxBufferSize
, maxName
, (int)U_CNV_SAFECLONE_BUFFERSIZE
);
1889 static void TestCCSID() {
1890 #if !UCONFIG_NO_LEGACY_CONVERSION
1892 UErrorCode errorCode
;
1893 int32_t ccsids
[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 };
1896 for(i
=0; i
<(int32_t)(sizeof(ccsids
)/sizeof(int32_t)); ++i
) {
1899 errorCode
=U_ZERO_ERROR
;
1900 cnv
=ucnv_openCCSID(ccsid
, UCNV_IBM
, &errorCode
);
1901 if(U_FAILURE(errorCode
)) {
1902 log_data_err("error: ucnv_openCCSID(%ld) failed (%s)\n", ccsid
, u_errorName(errorCode
));
1906 if(ccsid
!=ucnv_getCCSID(cnv
, &errorCode
)) {
1907 log_err("error: ucnv_getCCSID(ucnv_openCCSID(%ld))=%ld\n", ccsid
, ucnv_getCCSID(cnv
, &errorCode
));
1910 /* skip gb18030(ccsid 1392) */
1911 if(ccsid
!= 1392 && UCNV_IBM
!=ucnv_getPlatform(cnv
, &errorCode
)) {
1912 log_err("error: ucnv_getPlatform(ucnv_openCCSID(%ld))=%ld!=UCNV_IBM\n", ccsid
, ucnv_getPlatform(cnv
, &errorCode
));
1920 /* jitterbug 932: ucnv_convert() bugs --------------------------------------- */
1922 /* CHUNK_SIZE defined in common\ucnv.c: */
1923 #define CHUNK_SIZE 1024
1925 static void bug1(void);
1926 static void bug2(void);
1927 static void bug3(void);
1932 bug1(); /* Unicode intermediate buffer straddle bug */
1933 bug2(); /* pre-flighting size incorrect caused by simple overflow */
1934 bug3(); /* pre-flighting size incorrect caused by expansion overflow */
1938 * jitterbug 932: test chunking boundary conditions in
1940 int32_t ucnv_convert(const char *toConverterName,
1941 const char *fromConverterName,
1948 * See discussions on the icu mailing list in
1949 * 2001-April with the subject "converter 'flush' question".
1951 * Bug report and test code provided by Edward J. Batutis.
1955 #if !UCONFIG_NO_LEGACY_CONVERSION
1956 char char_in
[CHUNK_SIZE
+32];
1957 char char_out
[CHUNK_SIZE
*2];
1959 /* GB 18030 equivalent of U+10000 is 90308130 */
1960 static const char test_seq
[]={ (char)0x90u
, 0x30, (char)0x81u
, 0x30 };
1962 UErrorCode err
= U_ZERO_ERROR
;
1963 int32_t i
, test_seq_len
= sizeof(test_seq
);
1966 * causes straddle bug in Unicode intermediate buffer by sliding the test sequence forward
1967 * until the straddle bug appears. I didn't want to hard-code everything so this test could
1968 * be expanded - however this is the only type of straddle bug I can think of at the moment -
1969 * a high surrogate in the last position of the Unicode intermediate buffer. Apparently no
1970 * other Unicode sequences cause a bug since combining sequences are not supported by the
1974 for (i
= test_seq_len
; i
>= 0; i
--) {
1975 /* put character sequence into input buffer */
1976 memset(char_in
, 0x61, sizeof(char_in
)); /* GB 18030 'a' */
1977 memcpy(char_in
+ (CHUNK_SIZE
- i
), test_seq
, test_seq_len
);
1979 /* do the conversion */
1980 ucnv_convert("us-ascii", /* out */
1989 if (err
== U_TRUNCATED_CHAR_FOUND
) {
1990 /* this happens when surrogate pair straddles the intermediate buffer in
1991 * T_UConverter_fromCodepageToCodepage */
1992 log_err("error j932 bug 1: expected success, got U_TRUNCATED_CHAR_FOUND\n");
1998 /* bug2: pre-flighting loop bug: simple overflow causes bug */
2001 /* US-ASCII "1234567890" */
2002 static const char source
[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 };
2003 static const char sourceUTF8
[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (char)0xef, (char)0x80, (char)0x80 };
2004 static const char sourceUTF32
[]={ 0x00, 0x00, 0x00, 0x30,
2005 0x00, 0x00, 0x00, 0x31,
2006 0x00, 0x00, 0x00, 0x32,
2007 0x00, 0x00, 0x00, 0x33,
2008 0x00, 0x00, 0x00, 0x34,
2009 0x00, 0x00, 0x00, 0x35,
2010 0x00, 0x00, 0x00, 0x36,
2011 0x00, 0x00, 0x00, 0x37,
2012 0x00, 0x00, 0x00, 0x38,
2013 0x00, 0x00, (char)0xf0, 0x00};
2014 static char target
[5];
2016 UErrorCode err
= U_ZERO_ERROR
;
2019 /* do the conversion */
2020 size
= ucnv_convert("iso-8859-1", /* out */
2021 "us-ascii", /* in */
2029 /* bug2: size is 5, should be 10 */
2030 log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting size %d instead of 10\n", size
);
2034 /* do the conversion */
2035 size
= ucnv_convert("UTF-32BE", /* out */
2044 /* bug2: size is 5, should be 32 */
2045 log_err("error j932 bug 2 UTF-8->UTF-32BE: got preflighting size %d instead of 32\n", size
);
2049 /* do the conversion */
2050 size
= ucnv_convert("UTF-8", /* out */
2051 "UTF-32BE", /* in */
2055 sizeof(sourceUTF32
),
2059 /* bug2: size is 5, should be 12 */
2060 log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size
);
2065 * bug3: when the characters expand going from source to target codepage
2066 * you get bug3 in addition to bug2
2070 #if !UCONFIG_NO_LEGACY_CONVERSION
2071 char char_in
[CHUNK_SIZE
*4];
2073 UErrorCode err
= U_ZERO_ERROR
;
2077 * first get the buggy size from bug2 then
2078 * compare it to buggy size with an expansion
2080 memset(char_in
, 0x61, sizeof(char_in
)); /* US-ASCII 'a' */
2082 /* do the conversion */
2083 size
= ucnv_convert("lmbcs", /* out */
2084 "us-ascii", /* in */
2091 if ( size
!= sizeof(char_in
) ) {
2093 * bug2: size is 0x2805 (CHUNK_SIZE*2+5 - maybe 5 is the size of the overflow buffer
2094 * in the converter?), should be CHUNK_SIZE*4
2096 * Markus 2001-05-18: 5 is the size of our target[] here, ucnv_convert() did not reset targetSize...
2098 log_data_err("error j932 bug 2/3a: expected preflighting size 0x%04x, got 0x%04x\n", sizeof(char_in
), size
);
2102 * now do the conversion with expansion
2103 * ascii 0x08 expands to 0x0F 0x28 in lmbcs
2105 memset(char_in
, 8, sizeof(char_in
));
2108 /* do the conversion */
2109 size
= ucnv_convert("lmbcs", /* out */
2110 "us-ascii", /* in */
2117 /* expect 2X expansion */
2118 if ( size
!= sizeof(char_in
) * 2 ) {
2121 * bug2 would lead us to expect 0x2805, but it isn't that either, it is 0x3c05:
2123 log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in
) * 2, size
);
2129 convertExStreaming(UConverter
*srcCnv
, UConverter
*targetCnv
,
2130 const char *src
, int32_t srcLength
,
2131 const char *expectTarget
, int32_t expectTargetLength
,
2133 const char *testName
,
2134 UErrorCode expectCode
) {
2135 UChar pivotBuffer
[CHUNK_SIZE
];
2136 UChar
*pivotSource
, *pivotTarget
;
2137 const UChar
*pivotLimit
;
2139 char targetBuffer
[CHUNK_SIZE
];
2141 const char *srcLimit
, *finalSrcLimit
, *targetLimit
;
2143 int32_t targetLength
;
2147 UErrorCode errorCode
;
2150 if(chunkSize
>CHUNK_SIZE
) {
2151 chunkSize
=CHUNK_SIZE
;
2154 pivotSource
=pivotTarget
=pivotBuffer
;
2155 pivotLimit
=pivotBuffer
+chunkSize
;
2157 finalSrcLimit
=src
+srcLength
;
2158 target
=targetBuffer
;
2159 targetLimit
=targetBuffer
+chunkSize
;
2161 ucnv_resetToUnicode(srcCnv
);
2162 ucnv_resetFromUnicode(targetCnv
);
2164 errorCode
=U_ZERO_ERROR
;
2167 /* convert, streaming-style (both converters and pivot keep state) */
2169 /* for testing, give ucnv_convertEx() at most <chunkSize> input/pivot/output units at a time */
2170 if(src
+chunkSize
<=finalSrcLimit
) {
2171 srcLimit
=src
+chunkSize
;
2173 srcLimit
=finalSrcLimit
;
2175 ucnv_convertEx(targetCnv
, srcCnv
,
2176 &target
, targetLimit
,
2178 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotLimit
,
2179 FALSE
, flush
, &errorCode
);
2180 targetLength
=(int32_t)(target
-targetBuffer
);
2181 if(target
>targetLimit
) {
2182 log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n",
2183 testName
, chunkSize
, target
, targetLimit
);
2184 break; /* TODO: major problem! */
2186 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2187 /* continue converting another chunk */
2188 errorCode
=U_ZERO_ERROR
;
2189 if(targetLength
+chunkSize
<=sizeof(targetBuffer
)) {
2190 targetLimit
=target
+chunkSize
;
2192 targetLimit
=targetBuffer
+sizeof(targetBuffer
);
2194 } else if(U_FAILURE(errorCode
)) {
2200 } else if(src
==finalSrcLimit
&& pivotSource
==pivotTarget
) {
2201 /* all consumed, now flush without input (separate from conversion for testing) */
2206 if(!(errorCode
==expectCode
|| (expectCode
==U_ZERO_ERROR
&& errorCode
==U_STRING_NOT_TERMINATED_WARNING
))) {
2207 log_err("ucnv_convertEx(%s) chunk[%d] results in %s instead of %s\n",
2208 testName
, chunkSize
, u_errorName(errorCode
), u_errorName(expectCode
));
2209 } else if(targetLength
!=expectTargetLength
) {
2210 log_err("ucnv_convertEx(%s) chunk[%d] writes %d bytes instead of %d\n",
2211 testName
, chunkSize
, targetLength
, expectTargetLength
);
2212 } else if(memcmp(targetBuffer
, expectTarget
, targetLength
)!=0) {
2213 log_err("ucnv_convertEx(%s) chunk[%d] writes different bytes than expected\n",
2214 testName
, chunkSize
);
2219 convertExMultiStreaming(UConverter
*srcCnv
, UConverter
*targetCnv
,
2220 const char *src
, int32_t srcLength
,
2221 const char *expectTarget
, int32_t expectTargetLength
,
2222 const char *testName
,
2223 UErrorCode expectCode
) {
2224 convertExStreaming(srcCnv
, targetCnv
,
2226 expectTarget
, expectTargetLength
,
2227 1, testName
, expectCode
);
2228 convertExStreaming(srcCnv
, targetCnv
,
2230 expectTarget
, expectTargetLength
,
2231 3, testName
, expectCode
);
2232 convertExStreaming(srcCnv
, targetCnv
,
2234 expectTarget
, expectTargetLength
,
2235 7, testName
, expectCode
);
2238 static void TestConvertEx() {
2239 #if !UCONFIG_NO_LEGACY_CONVERSION
2240 static const uint8_t
2242 /* 4e00 30a1 ff61 0410 */
2243 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2246 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2250 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2251 * SUB, SUB, 0x40, SUB, SUB, 0x40
2253 0xfc, 0xfc, 0xfc, 0xfc, 0x40, 0xfc, 0xfc, 0xfc, 0xfc, 0x40
2256 char srcBuffer
[100], targetBuffer
[100];
2261 UChar pivotBuffer
[100];
2262 UChar
*pivotSource
, *pivotTarget
;
2264 UConverter
*cnv1
, *cnv2
;
2265 UErrorCode errorCode
;
2267 errorCode
=U_ZERO_ERROR
;
2268 cnv1
=ucnv_open("UTF-8", &errorCode
);
2269 if(U_FAILURE(errorCode
)) {
2270 log_err("unable to open a UTF-8 converter - %s\n", u_errorName(errorCode
));
2274 cnv2
=ucnv_open("Shift-JIS", &errorCode
);
2275 if(U_FAILURE(errorCode
)) {
2276 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode
));
2281 /* test ucnv_convertEx() with streaming conversion style */
2282 convertExMultiStreaming(cnv1
, cnv2
,
2283 (const char *)utf8
, sizeof(utf8
), (const char *)shiftJIS
, sizeof(shiftJIS
),
2284 "UTF-8 -> Shift-JIS", U_ZERO_ERROR
);
2286 convertExMultiStreaming(cnv2
, cnv1
,
2287 (const char *)shiftJIS
, sizeof(shiftJIS
), (const char *)utf8
, sizeof(utf8
),
2288 "Shift-JIS -> UTF-8", U_ZERO_ERROR
);
2290 /* U_ZERO_ERROR because by default the SUB callbacks are set */
2291 convertExMultiStreaming(cnv1
, cnv2
,
2292 (const char *)shiftJIS
, sizeof(shiftJIS
), (const char *)errorTarget
, sizeof(errorTarget
),
2293 "shiftJIS[] UTF-8 -> Shift-JIS", U_ZERO_ERROR
);
2295 /* test some simple conversions */
2297 /* NUL-terminated source and target */
2298 errorCode
=U_STRING_NOT_TERMINATED_WARNING
;
2299 memcpy(srcBuffer
, utf8
, sizeof(utf8
));
2300 srcBuffer
[sizeof(utf8
)]=0;
2302 target
=targetBuffer
;
2303 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2304 NULL
, NULL
, NULL
, NULL
, TRUE
, TRUE
, &errorCode
);
2305 if( errorCode
!=U_ZERO_ERROR
||
2306 target
-targetBuffer
!=sizeof(shiftJIS
) ||
2308 memcmp(targetBuffer
, shiftJIS
, sizeof(shiftJIS
))!=0
2310 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s - writes %d bytes, expect %d\n",
2311 u_errorName(errorCode
), target
-targetBuffer
, sizeof(shiftJIS
));
2314 /* NUL-terminated source and U_STRING_NOT_TERMINATED_WARNING */
2315 errorCode
=U_AMBIGUOUS_ALIAS_WARNING
;
2316 memset(targetBuffer
, 0xff, sizeof(targetBuffer
));
2318 target
=targetBuffer
;
2319 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(shiftJIS
), &src
, NULL
,
2320 NULL
, NULL
, NULL
, NULL
, TRUE
, TRUE
, &errorCode
);
2321 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
||
2322 target
-targetBuffer
!=sizeof(shiftJIS
) ||
2323 *target
!=(char)0xff ||
2324 memcmp(targetBuffer
, shiftJIS
, sizeof(shiftJIS
))!=0
2326 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s, expect U_STRING_NOT_TERMINATED_WARNING - writes %d bytes, expect %d\n",
2327 u_errorName(errorCode
), target
-targetBuffer
, sizeof(shiftJIS
));
2331 errorCode
=U_MESSAGE_PARSE_ERROR
;
2333 target
=targetBuffer
;
2334 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2335 NULL
, NULL
, NULL
, NULL
, TRUE
, TRUE
, &errorCode
);
2336 if(errorCode
!=U_MESSAGE_PARSE_ERROR
) {
2337 log_err("ucnv_convertEx(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode
));
2340 /* pivotLimit==pivotStart */
2341 errorCode
=U_ZERO_ERROR
;
2342 pivotSource
=pivotTarget
=pivotBuffer
;
2343 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2344 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
, TRUE
, TRUE
, &errorCode
);
2345 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2346 log_err("ucnv_convertEx(pivotLimit==pivotStart) sets %s\n", u_errorName(errorCode
));
2349 /* *pivotSource==NULL */
2350 errorCode
=U_ZERO_ERROR
;
2352 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2353 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
+1, TRUE
, TRUE
, &errorCode
);
2354 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2355 log_err("ucnv_convertEx(*pivotSource==NULL) sets %s\n", u_errorName(errorCode
));
2359 errorCode
=U_ZERO_ERROR
;
2361 pivotSource
=pivotBuffer
;
2362 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2363 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
+1, TRUE
, TRUE
, &errorCode
);
2364 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2365 log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode
));
2368 /* streaming conversion without a pivot buffer */
2369 errorCode
=U_ZERO_ERROR
;
2371 pivotSource
=pivotBuffer
;
2372 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2373 NULL
, &pivotSource
, &pivotTarget
, pivotBuffer
+1, TRUE
, FALSE
, &errorCode
);
2374 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2375 log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode
));
2383 /* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */
2384 static const char *const badUTF8
[]={
2388 /* truncated multi-byte sequences */
2425 "\xfc\x80\x80\x80\x80",
2427 /* complete sequences but non-shortest forms or out of range etc. */
2433 "\xf8\x80\x80\x80\x80",
2434 "\xfc\x80\x80\x80\x80\x80",
2439 #define ARG_CHAR_ARR_SIZE 8
2441 /* get some character that can be converted and convert it */
2442 static UBool
getTestChar(UConverter
*cnv
, const char *converterName
,
2443 char charUTF8
[4], int32_t *pCharUTF8Length
,
2444 char char0
[ARG_CHAR_ARR_SIZE
], int32_t *pChar0Length
,
2445 char char1
[ARG_CHAR_ARR_SIZE
], int32_t *pChar1Length
) {
2446 UChar utf16
[U16_MAX_LENGTH
];
2447 int32_t utf16Length
;
2449 const UChar
*utf16Source
;
2454 UErrorCode errorCode
;
2456 errorCode
=U_ZERO_ERROR
;
2457 set
=uset_open(1, 0);
2458 ucnv_getUnicodeSet(cnv
, set
, UCNV_ROUNDTRIP_SET
, &errorCode
);
2459 c
=uset_charAt(set
, uset_size(set
)/2);
2463 U16_APPEND_UNSAFE(utf16
, utf16Length
, c
);
2465 U8_APPEND_UNSAFE(charUTF8
, *pCharUTF8Length
, c
);
2469 ucnv_fromUnicode(cnv
,
2470 &target
, char0
+ARG_CHAR_ARR_SIZE
,
2471 &utf16Source
, utf16
+utf16Length
,
2472 NULL
, FALSE
, &errorCode
);
2473 *pChar0Length
=(int32_t)(target
-char0
);
2477 ucnv_fromUnicode(cnv
,
2478 &target
, char1
+ARG_CHAR_ARR_SIZE
,
2479 &utf16Source
, utf16
+utf16Length
,
2480 NULL
, FALSE
, &errorCode
);
2481 *pChar1Length
=(int32_t)(target
-char1
);
2483 if(U_FAILURE(errorCode
)) {
2484 log_err("unable to get test character for %s - %s\n", converterName
, u_errorName(errorCode
));
2490 static void testFromTruncatedUTF8(UConverter
*utf8Cnv
, UConverter
*cnv
, const char *converterName
,
2491 char charUTF8
[4], int32_t charUTF8Length
,
2492 char char0
[8], int32_t char0Length
,
2493 char char1
[8], int32_t char1Length
) {
2498 int32_t outputLength
;
2500 char invalidChars
[8];
2501 int8_t invalidLength
;
2506 UChar pivotBuffer
[8];
2507 UChar
*pivotSource
, *pivotTarget
;
2509 UErrorCode errorCode
;
2512 /* test truncated sequences */
2513 errorCode
=U_ZERO_ERROR
;
2514 ucnv_setToUCallBack(utf8Cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2516 memcpy(utf8
, charUTF8
, charUTF8Length
);
2518 for(i
=0; i
<LENGTHOF(badUTF8
); ++i
) {
2519 /* truncated sequence? */
2520 int32_t length
=strlen(badUTF8
[i
]);
2521 if(length
>=(1+U8_COUNT_TRAIL_BYTES(badUTF8
[i
][0]))) {
2525 /* assemble a string with the test character and the truncated sequence */
2526 memcpy(utf8
+charUTF8Length
, badUTF8
[i
], length
);
2527 utf8Length
=charUTF8Length
+length
;
2529 /* convert and check the invalidChars */
2532 pivotSource
=pivotTarget
=pivotBuffer
;
2533 errorCode
=U_ZERO_ERROR
;
2534 ucnv_convertEx(cnv
, utf8Cnv
,
2535 &target
, output
+sizeof(output
),
2536 &source
, utf8
+utf8Length
,
2537 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
+LENGTHOF(pivotBuffer
),
2538 TRUE
, TRUE
, /* reset & flush */
2540 outputLength
=(int32_t)(target
-output
);
2541 if(errorCode
!=U_TRUNCATED_CHAR_FOUND
|| pivotSource
!=pivotBuffer
) {
2542 log_err("unexpected error %s from %s badUTF8[%ld]\n", u_errorName(errorCode
), converterName
, (long)i
);
2546 errorCode
=U_ZERO_ERROR
;
2547 invalidLength
=(int8_t)sizeof(invalidChars
);
2548 ucnv_getInvalidChars(utf8Cnv
, invalidChars
, &invalidLength
, &errorCode
);
2549 if(invalidLength
!=length
|| 0!=memcmp(invalidChars
, badUTF8
[i
], length
)) {
2550 log_err("wrong invalidChars from %s badUTF8[%ld]\n", converterName
, (long)i
);
2555 static void testFromBadUTF8(UConverter
*utf8Cnv
, UConverter
*cnv
, const char *converterName
,
2556 char charUTF8
[4], int32_t charUTF8Length
,
2557 char char0
[8], int32_t char0Length
,
2558 char char1
[8], int32_t char1Length
) {
2559 char utf8
[600], expect
[600];
2560 int32_t utf8Length
, expectLength
;
2564 UErrorCode errorCode
;
2567 errorCode
=U_ZERO_ERROR
;
2568 ucnv_setToUCallBack(utf8Cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, NULL
, NULL
, &errorCode
);
2571 * assemble an input string with the test character between each
2573 * and an expected string with repeated test character output
2575 memcpy(utf8
, charUTF8
, charUTF8Length
);
2576 utf8Length
=charUTF8Length
;
2578 memcpy(expect
, char0
, char0Length
);
2579 expectLength
=char0Length
;
2581 for(i
=0; i
<LENGTHOF(badUTF8
); ++i
) {
2582 int32_t length
=strlen(badUTF8
[i
]);
2583 memcpy(utf8
+utf8Length
, badUTF8
[i
], length
);
2586 memcpy(utf8
+utf8Length
, charUTF8
, charUTF8Length
);
2587 utf8Length
+=charUTF8Length
;
2589 memcpy(expect
+expectLength
, char1
, char1Length
);
2590 expectLength
+=char1Length
;
2593 /* expect that each bad UTF-8 sequence is detected and skipped */
2594 strcpy(testName
, "from bad UTF-8 to ");
2595 strcat(testName
, converterName
);
2597 convertExMultiStreaming(utf8Cnv
, cnv
,
2599 expect
, expectLength
,
2604 /* Test illegal UTF-8 input. */
2605 static void TestConvertExFromUTF8() {
2606 static const char *const converterNames
[]={
2607 #if !UCONFIG_NO_LEGACY_CONVERSION
2616 UConverter
*utf8Cnv
, *cnv
;
2617 UErrorCode errorCode
;
2620 /* fromUnicode versions of some character, from initial state and later */
2621 char charUTF8
[4], char0
[8], char1
[8];
2622 int32_t charUTF8Length
, char0Length
, char1Length
;
2624 errorCode
=U_ZERO_ERROR
;
2625 utf8Cnv
=ucnv_open("UTF-8", &errorCode
);
2626 if(U_FAILURE(errorCode
)) {
2627 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode
));
2631 for(i
=0; i
<LENGTHOF(converterNames
); ++i
) {
2632 errorCode
=U_ZERO_ERROR
;
2633 cnv
=ucnv_open(converterNames
[i
], &errorCode
);
2634 if(U_FAILURE(errorCode
)) {
2635 log_data_err("unable to open %s converter - %s\n", converterNames
[i
], u_errorName(errorCode
));
2638 if(!getTestChar(cnv
, converterNames
[i
], charUTF8
, &charUTF8Length
, char0
, &char0Length
, char1
, &char1Length
)) {
2641 testFromTruncatedUTF8(utf8Cnv
, cnv
, converterNames
[i
], charUTF8
, charUTF8Length
, char0
, char0Length
, char1
, char1Length
);
2642 testFromBadUTF8(utf8Cnv
, cnv
, converterNames
[i
], charUTF8
, charUTF8Length
, char0
, char0Length
, char1
, char1Length
);
2645 ucnv_close(utf8Cnv
);
2648 static void TestConvertExFromUTF8_C5F0() {
2649 static const char *const converterNames
[]={
2650 #if !UCONFIG_NO_LEGACY_CONVERSION
2659 UConverter
*utf8Cnv
, *cnv
;
2660 UErrorCode errorCode
;
2663 static const char bad_utf8
[2]={ (char)0xC5, (char)0xF0 };
2664 /* Expect "��" (2x U+FFFD as decimal NCRs) */
2665 static const char twoNCRs
[16]={
2666 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B,
2667 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B
2669 static const char twoFFFD
[6]={
2670 (char)0xef, (char)0xbf, (char)0xbd,
2671 (char)0xef, (char)0xbf, (char)0xbd
2673 const char *expected
;
2674 int32_t expectedLength
;
2675 char dest
[20]; /* longer than longest expectedLength */
2680 UChar pivotBuffer
[128];
2681 UChar
*pivotSource
, *pivotTarget
;
2683 errorCode
=U_ZERO_ERROR
;
2684 utf8Cnv
=ucnv_open("UTF-8", &errorCode
);
2685 if(U_FAILURE(errorCode
)) {
2686 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode
));
2690 for(i
=0; i
<LENGTHOF(converterNames
); ++i
) {
2691 errorCode
=U_ZERO_ERROR
;
2692 cnv
=ucnv_open(converterNames
[i
], &errorCode
);
2693 ucnv_setFromUCallBack(cnv
, UCNV_FROM_U_CALLBACK_ESCAPE
, UCNV_ESCAPE_XML_DEC
,
2694 NULL
, NULL
, &errorCode
);
2695 if(U_FAILURE(errorCode
)) {
2696 log_data_err("unable to open %s converter - %s\n",
2697 converterNames
[i
], u_errorName(errorCode
));
2702 uprv_memset(dest
, 9, sizeof(dest
));
2703 if(i
==LENGTHOF(converterNames
)-1) {
2704 /* conversion to UTF-8 yields two U+FFFD directly */
2708 /* conversion to a non-Unicode charset yields two NCRs */
2715 pivotSource
=pivotTarget
=pivotBuffer
;
2718 &target
, dest
+expectedLength
,
2719 &src
, bad_utf8
+sizeof(bad_utf8
),
2720 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
+LENGTHOF(pivotBuffer
),
2721 TRUE
, TRUE
, &errorCode
);
2722 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
|| src
!=bad_utf8
+2 ||
2723 target
!=dest
+expectedLength
|| 0!=uprv_memcmp(dest
, expected
, expectedLength
) ||
2724 dest
[expectedLength
]!=9
2726 log_err("ucnv_convertEx(UTF-8 C5 F0 -> %s/decimal NCRs) failed\n", converterNames
[i
]);
2730 ucnv_close(utf8Cnv
);
2734 TestConvertAlgorithmic() {
2735 #if !UCONFIG_NO_LEGACY_CONVERSION
2736 static const uint8_t
2738 /* 4e00 30a1 ff61 0410 */
2739 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2742 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2746 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2747 * SUB, SUB, 0x40, SUB, SUB, 0x40
2749 /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/
2752 0xfe, 0xff /* BOM only, no text */
2755 0xff, 0xfe, 0, 0 /* BOM only, no text */
2758 char target
[100], utf8NUL
[100], shiftJISNUL
[100];
2761 UErrorCode errorCode
;
2765 errorCode
=U_ZERO_ERROR
;
2766 cnv
=ucnv_open("Shift-JIS", &errorCode
);
2767 if(U_FAILURE(errorCode
)) {
2768 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode
));
2773 memcpy(utf8NUL
, utf8
, sizeof(utf8
));
2774 utf8NUL
[sizeof(utf8
)]=0;
2775 memcpy(shiftJISNUL
, shiftJIS
, sizeof(shiftJIS
));
2776 shiftJISNUL
[sizeof(shiftJIS
)]=0;
2779 * The to/from algorithmic convenience functions share a common implementation,
2780 * so we need not test all permutations of them.
2783 /* length in, not terminated out */
2784 errorCode
=U_ZERO_ERROR
;
2785 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF8
, target
, sizeof(shiftJIS
), (const char *)utf8
, sizeof(utf8
), &errorCode
);
2786 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
||
2787 length
!=sizeof(shiftJIS
) ||
2788 memcmp(target
, shiftJIS
, length
)!=0
2790 log_err("ucnv_fromAlgorithmic(UTF-8 -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect %d\n",
2791 u_errorName(errorCode
), length
, sizeof(shiftJIS
));
2794 /* terminated in and out */
2795 memset(target
, 0x55, sizeof(target
));
2796 errorCode
=U_STRING_NOT_TERMINATED_WARNING
;
2797 length
=ucnv_toAlgorithmic(UCNV_UTF8
, cnv
, target
, sizeof(target
), shiftJISNUL
, -1, &errorCode
);
2798 if( errorCode
!=U_ZERO_ERROR
||
2799 length
!=sizeof(utf8
) ||
2800 memcmp(target
, utf8
, length
)!=0
2802 log_err("ucnv_toAlgorithmic(Shift-JIS -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect %d\n",
2803 u_errorName(errorCode
), length
, sizeof(shiftJIS
));
2806 /* empty string, some target buffer */
2807 errorCode
=U_STRING_NOT_TERMINATED_WARNING
;
2808 length
=ucnv_toAlgorithmic(UCNV_UTF8
, cnv
, target
, sizeof(target
), shiftJISNUL
, 0, &errorCode
);
2809 if( errorCode
!=U_ZERO_ERROR
||
2812 log_err("ucnv_toAlgorithmic(empty string -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect 0\n",
2813 u_errorName(errorCode
), length
);
2816 /* pseudo-empty string, no target buffer */
2817 errorCode
=U_ZERO_ERROR
;
2818 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF16
, target
, 0, (const char *)utf16
, 2, &errorCode
);
2819 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
||
2822 log_err("ucnv_fromAlgorithmic(UTF-16 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
2823 u_errorName(errorCode
), length
);
2826 errorCode
=U_ZERO_ERROR
;
2827 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF32
, target
, 0, (const char *)utf32
, 4, &errorCode
);
2828 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
||
2831 log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
2832 u_errorName(errorCode
), length
);
2836 errorCode
=U_MESSAGE_PARSE_ERROR
;
2837 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF16
, target
, 0, (const char *)utf16
, 2, &errorCode
);
2838 if(errorCode
!=U_MESSAGE_PARSE_ERROR
) {
2839 log_err("ucnv_fromAlgorithmic(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode
));
2843 errorCode
=U_ZERO_ERROR
;
2844 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF16
, target
, 0, NULL
, 2, &errorCode
);
2845 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2846 log_err("ucnv_fromAlgorithmic(source==NULL) sets %s\n", u_errorName(errorCode
));
2849 /* illegal alg. type */
2850 errorCode
=U_ZERO_ERROR
;
2851 length
=ucnv_fromAlgorithmic(cnv
, (UConverterType
)99, target
, 0, (const char *)utf16
, 2, &errorCode
);
2852 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2853 log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode
));
2859 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
2860 static void TestLMBCSMaxChar(void) {
2861 static const struct {
2865 /* some non-LMBCS converters - perfect test setup here */
2876 { 4, "IMAP-mailbox-name"},
2879 { 1, "windows-1256"},
2891 { 3, "ISO-2022-KR"},
2892 { 6, "ISO-2022-JP"},
2893 { 8, "ISO-2022-CN"},
2911 for (idx
= 0; idx
< LENGTHOF(converter
); idx
++) {
2912 UErrorCode status
= U_ZERO_ERROR
;
2913 UConverter
*cnv
= cnv_open(converter
[idx
].name
, &status
);
2914 if (U_FAILURE(status
)) {
2917 if (converter
[idx
].maxSize
!= ucnv_getMaxCharSize(cnv
)) {
2918 log_err("error: ucnv_getMaxCharSize(%s) expected %d, got %d\n",
2919 converter
[idx
].name
, converter
[idx
].maxSize
, ucnv_getMaxCharSize(cnv
));
2924 /* mostly test that the macro compiles */
2925 if(UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10) {
2926 log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n");
2931 static void TestJ1968(void) {
2932 UErrorCode err
= U_ZERO_ERROR
;
2934 char myConvName
[] = "My really really really really really really really really really really really"
2935 " really really really really really really really really really really really"
2936 " really really really really really really really really long converter name";
2937 UChar myConvNameU
[sizeof(myConvName
)];
2939 u_charsToUChars(myConvName
, myConvNameU
, sizeof(myConvName
));
2942 myConvNameU
[UCNV_MAX_CONVERTER_NAME_LENGTH
+1] = 0;
2943 cnv
= ucnv_openU(myConvNameU
, &err
);
2944 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2945 log_err("1U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
2949 myConvNameU
[UCNV_MAX_CONVERTER_NAME_LENGTH
] = 0;
2950 cnv
= ucnv_openU(myConvNameU
, &err
);
2951 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2952 log_err("2U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
2956 myConvNameU
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = 0;
2957 cnv
= ucnv_openU(myConvNameU
, &err
);
2958 if (cnv
|| err
!= U_FILE_ACCESS_ERROR
) {
2959 log_err("3U) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err
));
2966 cnv
= ucnv_open(myConvName
, &err
);
2967 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2968 log_err("1) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
2972 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
] = ',';
2973 cnv
= ucnv_open(myConvName
, &err
);
2974 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2975 log_err("2) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
2979 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = ',';
2980 cnv
= ucnv_open(myConvName
, &err
);
2981 if (cnv
|| err
!= U_FILE_ACCESS_ERROR
) {
2982 log_err("3) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err
));
2986 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = ',';
2987 strncpy(myConvName
+ UCNV_MAX_CONVERTER_NAME_LENGTH
, "locale=", 7);
2988 cnv
= ucnv_open(myConvName
, &err
);
2989 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2990 log_err("4) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
2993 /* The comma isn't really a part of the converter name. */
2995 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
] = 0;
2996 cnv
= ucnv_open(myConvName
, &err
);
2997 if (cnv
|| err
!= U_FILE_ACCESS_ERROR
) {
2998 log_err("5) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err
));
3002 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = ' ';
3003 cnv
= ucnv_open(myConvName
, &err
);
3004 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3005 log_err("6) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
3009 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = 0;
3010 cnv
= ucnv_open(myConvName
, &err
);
3011 if (cnv
|| err
!= U_FILE_ACCESS_ERROR
) {
3012 log_err("7) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err
));
3017 #if !UCONFIG_NO_LEGACY_CONVERSION
3019 testSwap(const char *name
, UBool swap
) {
3021 * Test Unicode text.
3022 * Contains characters that are the highest for some of the
3023 * tested conversions, to make sure that the ucnvmbcs.c code that modifies the
3024 * tables copies the entire tables.
3026 static const UChar text
[]={
3027 0x61, 0xd, 0x62, 0xa, 0x4e00, 0x3000, 0xfffd, 0xa, 0x20, 0x85, 0xff5e, 0x7a
3030 UChar uNormal
[32], uSwapped
[32];
3031 char normal
[32], swapped
[32];
3035 int32_t i
, normalLength
, swappedLength
;
3039 const char *swappedName
;
3040 UConverter
*cnv
, *swapCnv
;
3041 UErrorCode errorCode
;
3043 /* if the swap flag is FALSE, then the test encoding is not EBCDIC and must not swap */
3045 /* open both the normal and the LF/NL-swapping converters */
3046 strcpy(swapped
, name
);
3047 strcat(swapped
, UCNV_SWAP_LFNL_OPTION_STRING
);
3049 errorCode
=U_ZERO_ERROR
;
3050 swapCnv
=ucnv_open(swapped
, &errorCode
);
3051 cnv
=ucnv_open(name
, &errorCode
);
3052 if(U_FAILURE(errorCode
)) {
3053 log_data_err("TestEBCDICSwapLFNL error: unable to open %s or %s (%s)\n", name
, swapped
, u_errorName(errorCode
));
3057 /* the name must contain the swap option if and only if we expect the converter to swap */
3058 swappedName
=ucnv_getName(swapCnv
, &errorCode
);
3059 if(U_FAILURE(errorCode
)) {
3060 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl) failed (%s)\n", name
, u_errorName(errorCode
));
3064 pc
=strstr(swappedName
, UCNV_SWAP_LFNL_OPTION_STRING
);
3065 if(swap
!= (pc
!=NULL
)) {
3066 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl)=%s should (%d) contain 'swaplfnl'\n", name
, swappedName
, swap
);
3070 /* convert to EBCDIC */
3073 ucnv_fromUnicode(cnv
, &pc
, normal
+LENGTHOF(normal
), &pcu
, text
+LENGTHOF(text
), NULL
, TRUE
, &errorCode
);
3074 normalLength
=(int32_t)(pc
-normal
);
3078 ucnv_fromUnicode(swapCnv
, &pc
, swapped
+LENGTHOF(swapped
), &pcu
, text
+LENGTHOF(text
), NULL
, TRUE
, &errorCode
);
3079 swappedLength
=(int32_t)(pc
-swapped
);
3081 if(U_FAILURE(errorCode
)) {
3082 log_err("TestEBCDICSwapLFNL error converting to %s - (%s)\n", name
, u_errorName(errorCode
));
3086 /* compare EBCDIC output */
3087 if(normalLength
!=swappedLength
) {
3088 log_err("TestEBCDICSwapLFNL error converting to %s - output lengths %d vs. %d\n", name
, normalLength
, swappedLength
);
3091 for(i
=0; i
<normalLength
; ++i
) {
3092 /* swap EBCDIC LF/NL for comparison */
3097 } else if(c
==0x25) {
3103 log_err("TestEBCDICSwapLFNL error converting to %s - did not swap properly, output[%d]=0x%02x\n", name
, i
, (uint8_t)swapped
[i
]);
3108 /* convert back to Unicode (may not roundtrip) */
3111 ucnv_toUnicode(cnv
, &pu
, uNormal
+LENGTHOF(uNormal
), (const char **)&pc
, normal
+normalLength
, NULL
, TRUE
, &errorCode
);
3112 normalLength
=(int32_t)(pu
-uNormal
);
3116 ucnv_toUnicode(swapCnv
, &pu
, uSwapped
+LENGTHOF(uSwapped
), (const char **)&pc
, normal
+swappedLength
, NULL
, TRUE
, &errorCode
);
3117 swappedLength
=(int32_t)(pu
-uSwapped
);
3119 if(U_FAILURE(errorCode
)) {
3120 log_err("TestEBCDICSwapLFNL error converting from %s - (%s)\n", name
, u_errorName(errorCode
));
3124 /* compare EBCDIC output */
3125 if(normalLength
!=swappedLength
) {
3126 log_err("TestEBCDICSwapLFNL error converting from %s - output lengths %d vs. %d\n", name
, normalLength
, swappedLength
);
3129 for(i
=0; i
<normalLength
; ++i
) {
3130 /* swap EBCDIC LF/NL for comparison */
3135 } else if(u
==0x85) {
3140 if(u
!=uSwapped
[i
]) {
3141 log_err("TestEBCDICSwapLFNL error converting from %s - did not swap properly, output[%d]=U+%04x\n", name
, i
, uSwapped
[i
]);
3149 ucnv_close(swapCnv
);
3153 TestEBCDICSwapLFNL() {
3154 static const struct {
3159 { "ibm-1047", TRUE
},
3160 { "ibm-1140", TRUE
},
3161 { "ibm-930", TRUE
},
3162 { "iso-8859-3", FALSE
}
3167 for(i
=0; i
<LENGTHOF(tests
); ++i
) {
3168 testSwap(tests
[i
].name
, tests
[i
].swap
);
3173 TestEBCDICSwapLFNL() {
3174 /* test nothing... */
3178 static const UVersionInfo ICU_34
= {3,4,0,0};
3180 static void TestFromUCountPending(){
3181 #if !UCONFIG_NO_LEGACY_CONVERSION
3182 UErrorCode status
= U_ZERO_ERROR
;
3183 /* const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; */
3184 static const struct {
3188 }fromUnicodeTests
[] = {
3191 {{ 0xdbc4, 0xde34, 0xd84d},3,1},
3192 {{ 0xdbc4, 0xde34, 0xd900},3,3},
3195 UConverter
* cnv
= ucnv_openPackage(loadTestData(&status
), "test3", &status
);
3196 if(U_FAILURE(status
)){
3197 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status
));
3200 for(i
=0; i
<LENGTHOF(fromUnicodeTests
); ++i
) {
3203 char* targetLimit
= target
+ 10;
3204 const UChar
* source
= fromUnicodeTests
[i
].input
;
3205 const UChar
* sourceLimit
= source
+ fromUnicodeTests
[i
].len
;
3208 ucnv_fromUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3209 len
= ucnv_fromUCountPending(cnv
, &status
);
3210 if(U_FAILURE(status
)){
3211 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3212 status
= U_ZERO_ERROR
;
3215 if(len
!= fromUnicodeTests
[i
].exp
){
3216 log_err("Did not get the expeced output for ucnv_fromUInputConsumed.\n");
3219 status
= U_ZERO_ERROR
;
3222 * The converter has to read the tail before it knows that
3223 * only head alone matches.
3224 * At the end, the output for head will overflow the target,
3225 * middle will be pending, and tail will not have been consumed.
3228 \U00101234 -> x (<U101234> \x07 |0)
3229 \U00101234\U00050005 -> y (<U101234>+<U50005> \x07+\x00+\x01\x02\x0e+\x05 |0)
3230 \U00101234\U00050005\U00060006 -> z (<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0)
3231 \U00060007 -> unassigned
3233 static const UChar head
[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */
3234 static const UChar middle
[] = {0xD940,0x0000}; /* first half of \U00060006 or \U00060007 */
3235 static const UChar tail
[] = {0xDC07,0x0000};/* second half of \U00060007 */
3238 char* targetLimit
= target
+ 2; /* expect overflow from converting \U00101234\U00050005 */
3239 const UChar
* source
= head
;
3240 const UChar
* sourceLimit
= source
+ u_strlen(head
);
3243 ucnv_fromUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3244 len
= ucnv_fromUCountPending(cnv
, &status
);
3245 if(U_FAILURE(status
)){
3246 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3247 status
= U_ZERO_ERROR
;
3250 log_err("ucnv_fromUInputHeld did not return correct length for head\n");
3253 sourceLimit
= source
+ u_strlen(middle
);
3254 ucnv_fromUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3255 len
= ucnv_fromUCountPending(cnv
, &status
);
3256 if(U_FAILURE(status
)){
3257 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3258 status
= U_ZERO_ERROR
;
3261 log_err("ucnv_fromUInputHeld did not return correct length for middle\n");
3264 sourceLimit
= source
+ u_strlen(tail
);
3265 ucnv_fromUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3266 if(status
!= U_BUFFER_OVERFLOW_ERROR
){
3267 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3269 status
= U_ZERO_ERROR
;
3270 len
= ucnv_fromUCountPending(cnv
, &status
);
3271 /* middle[1] is pending, tail has not been consumed */
3272 if(U_FAILURE(status
)){
3273 log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status
));
3276 log_err("ucnv_fromUInputHeld did not return correct length for tail\n");
3284 TestToUCountPending(){
3285 #if !UCONFIG_NO_LEGACY_CONVERSION
3286 UErrorCode status
= U_ZERO_ERROR
;
3287 static const struct {
3291 }toUnicodeTests
[] = {
3293 {{0x05, 0x01, 0x02},3,3},
3295 {{0x07, 0x00, 0x01, 0x02},4,4},
3299 UConverterToUCallback
*oldToUAction
= NULL
;
3300 UConverter
* cnv
= ucnv_openPackage(loadTestData(&status
), "test3", &status
);
3301 if(U_FAILURE(status
)){
3302 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status
));
3305 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, oldToUAction
, NULL
, &status
);
3306 for(i
=0; i
<LENGTHOF(toUnicodeTests
); ++i
) {
3308 UChar
* target
= tgt
;
3309 UChar
* targetLimit
= target
+ 20;
3310 const char* source
= toUnicodeTests
[i
].input
;
3311 const char* sourceLimit
= source
+ toUnicodeTests
[i
].len
;
3314 ucnv_toUnicode(cnv
, &target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3315 len
= ucnv_toUCountPending(cnv
,&status
);
3316 if(U_FAILURE(status
)){
3317 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3318 status
= U_ZERO_ERROR
;
3321 if(len
!= toUnicodeTests
[i
].exp
){
3322 log_err("Did not get the expeced output for ucnv_toUInputConsumed.\n");
3325 status
= U_ZERO_ERROR
;
3330 * The converter has to read the tail before it knows that
3331 * only head alone matches.
3332 * At the end, the output for head will overflow the target,
3333 * mid will be pending, and tail will not have been consumed.
3335 char head
[] = { 0x01, 0x02, 0x03, 0x0a , 0x00};
3336 char mid
[] = { 0x01, 0x02, 0x03, 0x0b, 0x00 };
3337 char tail
[] = { 0x01, 0x02, 0x03, 0x0d, 0x00 };
3339 0x01, 0x02, 0x03, 0x0a -> x (<U23456> \x01\x02\x03\x0a |0)
3340 0x01, 0x02, 0x03, 0x0b -> y (<U000b> \x01\x02\x03\x0b |0)
3341 0x01, 0x02, 0x03, 0x0d -> z (<U34567> \x01\x02\x03\x0d |3)
3342 0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar")
3345 UChar
* target
= tgt
;
3346 UChar
* targetLimit
= target
+ 1; /* expect overflow from converting */
3347 const char* source
= head
;
3348 const char* sourceLimit
= source
+ strlen(head
);
3350 cnv
= ucnv_openPackage(loadTestData(&status
), "test4", &status
);
3351 if(U_FAILURE(status
)){
3352 log_err("Could not create converter for test3. Error: %s\n", u_errorName(status
));
3355 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, oldToUAction
, NULL
, &status
);
3356 ucnv_toUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3357 len
= ucnv_toUCountPending(cnv
,&status
);
3358 if(U_FAILURE(status
)){
3359 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3362 log_err("Did not get the expected len for head.\n");
3365 sourceLimit
= source
+strlen(mid
);
3366 ucnv_toUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3367 len
= ucnv_toUCountPending(cnv
,&status
);
3368 if(U_FAILURE(status
)){
3369 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3372 log_err("Did not get the expected len for mid.\n");
3376 sourceLimit
= source
+strlen(tail
);
3377 targetLimit
= target
;
3378 ucnv_toUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3379 if(status
!= U_BUFFER_OVERFLOW_ERROR
){
3380 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3382 status
= U_ZERO_ERROR
;
3383 len
= ucnv_toUCountPending(cnv
,&status
);
3384 /* mid[4] is pending, tail has not been consumed */
3385 if(U_FAILURE(status
)){
3386 log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status
));
3389 log_err("Did not get the expected len for tail.\n");
3396 static void TestOneDefaultNameChange(const char *name
, const char *expected
) {
3397 UErrorCode status
= U_ZERO_ERROR
;
3399 ucnv_setDefaultName(name
);
3400 if(strcmp(ucnv_getDefaultName(), expected
)==0)
3401 log_verbose("setDefaultName of %s works.\n", name
);
3403 log_err("setDefaultName of %s failed\n", name
);
3404 cnv
=ucnv_open(NULL
, &status
);
3405 if (U_FAILURE(status
) || cnv
== NULL
) {
3406 log_err("opening the default converter of %s failed\n", name
);
3409 if(strcmp(ucnv_getName(cnv
, &status
), expected
)==0)
3410 log_verbose("ucnv_getName of %s works.\n", name
);
3412 log_err("ucnv_getName of %s failed\n", name
);
3416 static void TestDefaultName(void) {
3417 /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/
3418 static char defaultName
[UCNV_MAX_CONVERTER_NAME_LENGTH
+ 1];
3419 strcpy(defaultName
, ucnv_getDefaultName());
3421 log_verbose("getDefaultName returned %s\n", defaultName
);
3423 /*change the default name by setting it */
3424 TestOneDefaultNameChange("UTF-8", "UTF-8");
3425 #if U_CHARSET_IS_UTF8
3426 TestOneDefaultNameChange("ISCII,version=1", "UTF-8");
3427 TestOneDefaultNameChange("ISCII,version=2", "UTF-8");
3428 TestOneDefaultNameChange("ISO-8859-1", "UTF-8");
3430 # if !UCONFIG_NO_LEGACY_CONVERSION
3431 TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1");
3432 TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2");
3434 TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1");
3437 /*set the default name back*/
3438 ucnv_setDefaultName(defaultName
);
3441 /* Test that ucnv_compareNames() matches names according to spec. ----------- */
3455 compareNames(const char **names
) {
3456 const char *relation
, *name1
, *name2
;
3460 if(*relation
=='=') {
3462 } else if(*relation
=='<') {
3472 while((name2
=*names
++)!=NULL
) {
3473 result
=ucnv_compareNames(name1
, name2
);
3474 if(sign(result
)!=rel
) {
3475 log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1
, name2
, result
, rel
);
3482 TestCompareNames() {
3483 static const char *equalUTF8
[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL
};
3484 static const char *equalIBM
[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL
};
3485 static const char *lessMac
[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL
};
3486 static const char *lessUTF080
[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL
};
3488 compareNames(equalUTF8
);
3489 compareNames(equalIBM
);
3490 compareNames(lessMac
);
3491 compareNames(lessUTF080
);
3496 static const UChar surrogate
[1]={ 0xd900 };
3499 static const UChar sub
[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3500 static const char subChars
[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3502 UErrorCode errorCode
;
3506 /* UTF-16/32: test that the BOM is output before the sub character */
3507 errorCode
=U_ZERO_ERROR
;
3508 cnv
=ucnv_open("UTF-16", &errorCode
);
3509 if(U_FAILURE(errorCode
)) {
3510 log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode
));
3513 length
=ucnv_fromUChars(cnv
, buffer
, (int32_t)sizeof(buffer
), surrogate
, 1, &errorCode
);
3515 if(U_FAILURE(errorCode
) ||
3517 NULL
== ucnv_detectUnicodeSignature(buffer
, length
, NULL
, &errorCode
)
3519 log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n");
3522 errorCode
=U_ZERO_ERROR
;
3523 cnv
=ucnv_open("UTF-32", &errorCode
);
3524 if(U_FAILURE(errorCode
)) {
3525 log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode
));
3528 length
=ucnv_fromUChars(cnv
, buffer
, (int32_t)sizeof(buffer
), surrogate
, 1, &errorCode
);
3530 if(U_FAILURE(errorCode
) ||
3532 NULL
== ucnv_detectUnicodeSignature(buffer
, length
, NULL
, &errorCode
)
3534 log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n");
3537 /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */
3538 errorCode
=U_ZERO_ERROR
;
3539 cnv
=ucnv_open("ISO-8859-1", &errorCode
);
3540 if(U_FAILURE(errorCode
)) {
3541 log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode
));
3544 ucnv_setSubstString(cnv
, sub
, LENGTHOF(sub
), &errorCode
);
3545 if(U_FAILURE(errorCode
)) {
3546 log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode
));
3548 len8
= sizeof(buffer
);
3549 ucnv_getSubstChars(cnv
, buffer
, &len8
, &errorCode
);
3550 /* Stateless converter, we expect the string converted to charset bytes. */
3551 if(U_FAILURE(errorCode
) || len8
!=sizeof(subChars
) || 0!=uprv_memcmp(buffer
, subChars
, len8
)) {
3552 log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode
));
3557 #if !UCONFIG_NO_LEGACY_CONVERSION
3558 errorCode
=U_ZERO_ERROR
;
3559 cnv
=ucnv_open("HZ", &errorCode
);
3560 if(U_FAILURE(errorCode
)) {
3561 log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode
));
3564 ucnv_setSubstString(cnv
, sub
, LENGTHOF(sub
), &errorCode
);
3565 if(U_FAILURE(errorCode
)) {
3566 log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode
));
3568 len8
= sizeof(buffer
);
3569 ucnv_getSubstChars(cnv
, buffer
, &len8
, &errorCode
);
3570 /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */
3571 if(U_FAILURE(errorCode
) || len8
!=0) {
3572 log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode
));
3578 * Further testing of ucnv_setSubstString() is done via intltest convert.
3579 * We do not test edge cases of illegal arguments and similar because the
3580 * function implementation uses all of its parameters in calls to other
3581 * functions with UErrorCode parameters.
3586 InvalidArguments() {
3588 UErrorCode errorCode
;
3589 char charBuffer
[2] = {1, 1};
3590 char ucharAsCharBuffer
[2] = {2, 2};
3591 char *charsPtr
= charBuffer
;
3592 UChar
*ucharsPtr
= (UChar
*)ucharAsCharBuffer
;
3593 UChar
*ucharsBadPtr
= (UChar
*)(ucharAsCharBuffer
+ 1);
3595 errorCode
=U_ZERO_ERROR
;
3596 cnv
=ucnv_open("UTF-8", &errorCode
);
3597 if(U_FAILURE(errorCode
)) {
3598 log_err("ucnv_open() failed - %s\n", u_errorName(errorCode
));
3602 errorCode
=U_ZERO_ERROR
;
3603 /* This one should fail because an incomplete UChar is being passed in */
3604 ucnv_fromUnicode(cnv
, &charsPtr
, charsPtr
, (const UChar
**)&ucharsPtr
, ucharsBadPtr
, NULL
, TRUE
, &errorCode
);
3605 if(errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3606 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode
));
3609 errorCode
=U_ZERO_ERROR
;
3610 /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3611 ucnv_fromUnicode(cnv
, &charsPtr
, charsPtr
, (const UChar
**)&ucharsBadPtr
, ucharsPtr
, NULL
, TRUE
, &errorCode
);
3612 if(errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3613 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode
));
3616 errorCode
=U_ZERO_ERROR
;
3617 /* This one should fail because an incomplete UChar is being passed in */
3618 ucnv_toUnicode(cnv
, &ucharsPtr
, ucharsBadPtr
, (const char **)&charsPtr
, charsPtr
, NULL
, TRUE
, &errorCode
);
3619 if(errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3620 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode
));
3623 errorCode
=U_ZERO_ERROR
;
3624 /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3625 ucnv_toUnicode(cnv
, &ucharsBadPtr
, ucharsPtr
, (const char **)&charsPtr
, charsPtr
, NULL
, TRUE
, &errorCode
);
3626 if(errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3627 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode
));
3630 if (charBuffer
[0] != 1 || charBuffer
[1] != 1
3631 || ucharAsCharBuffer
[0] != 2 || ucharAsCharBuffer
[1] != 2)
3633 log_err("Data was incorrectly written to buffers\n");
3639 static void TestGetName() {
3640 static const char *const names
[] = {
3641 "Unicode", "UTF-16",
3642 "UnicodeBigUnmarked", "UTF-16BE",
3643 "UnicodeBig", "UTF-16BE,version=1",
3644 "UnicodeLittleUnmarked", "UTF-16LE",
3645 "UnicodeLittle", "UTF-16LE,version=1",
3646 "x-UTF-16LE-BOM", "UTF-16LE,version=1"
3649 for(i
= 0; i
< LENGTHOF(names
); i
+= 2) {
3650 UErrorCode errorCode
= U_ZERO_ERROR
;
3651 UConverter
*cnv
= ucnv_open(names
[i
], &errorCode
);
3652 if(U_SUCCESS(errorCode
)) {
3653 const char *name
= ucnv_getName(cnv
, &errorCode
);
3654 if(U_FAILURE(errorCode
) || 0 != strcmp(name
, names
[i
+1])) {
3655 log_err("ucnv_getName(%s) = %s != %s -- %s\n",
3656 names
[i
], name
, names
[i
+1], u_errorName(errorCode
));
3663 static void TestUTFBOM() {
3664 static const UChar a16
[] = { 0x61 };
3665 static const char *const names
[] = {
3673 static const uint8_t expected
[][5] = {
3675 { 4, 0xfe, 0xff, 0, 0x61 },
3676 { 4, 0xfe, 0xff, 0, 0x61 },
3678 { 4, 0xff, 0xfe, 0x61, 0 },
3679 { 4, 0xff, 0xfe, 0x61, 0 },
3683 { 4, 0xfe, 0xff, 0, 0x61 },
3686 { 4, 0xff, 0xfe, 0x61, 0 }
3692 for(i
= 0; i
< LENGTHOF(names
); ++i
) {
3693 UErrorCode errorCode
= U_ZERO_ERROR
;
3694 UConverter
*cnv
= ucnv_open(names
[i
], &errorCode
);
3696 const uint8_t *exp
= expected
[i
];
3697 if (U_FAILURE(errorCode
)) {
3698 log_err_status(errorCode
, "Unable to open converter: %s got error code: %s\n", names
[i
], u_errorName(errorCode
));
3701 length
= ucnv_fromUChars(cnv
, bytes
, (int32_t)sizeof(bytes
), a16
, 1, &errorCode
);
3703 if(U_FAILURE(errorCode
) || length
!= exp
[0] || 0 != memcmp(bytes
, exp
+1, length
)) {
3704 log_err("unexpected %s BOM writing behavior -- %s\n",
3705 names
[i
], u_errorName(errorCode
));