1 /********************************************************************
3 * Copyright (c) 1997-2011, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /*****************************************************************************
10 * Modification History:
12 * Madhu Katragadda Ported for C API
13 ******************************************************************************
19 #include "unicode/uloc.h"
20 #include "unicode/ucnv.h"
21 #include "unicode/ucnv_err.h"
22 #include "unicode/putil.h"
23 #include "unicode/uset.h"
24 #include "unicode/ustring.h"
25 #include "ucnv_bld.h" /* for sizeof(UConverter) */
26 #include "cmemory.h" /* for UAlignedMemory */
31 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
33 #define NUM_CODEPAGE 1
34 #define MAX_FILE_LEN 1024*20
35 #define UCS_FILE_NAME_SIZE 512
37 /*returns an action other than the one provided*/
38 static UConverterFromUCallback
otherUnicodeAction(UConverterFromUCallback MIA
);
39 static UConverterToUCallback
otherCharAction(UConverterToUCallback MIA
);
42 cnv_open(const char *name
, UErrorCode
*pErrorCode
) {
43 if(name
!=NULL
&& name
[0]=='*') {
44 return ucnv_openPackage(loadTestData(pErrorCode
), name
+1, pErrorCode
);
46 return ucnv_open(name
, pErrorCode
);
51 static void ListNames(void);
52 static void TestFlushCache(void);
53 static void TestDuplicateAlias(void);
54 static void TestCCSID(void);
55 static void TestJ932(void);
56 static void TestJ1968(void);
57 static void TestLMBCSMaxChar(void);
59 #if !UCONFIG_NO_LEGACY_CONVERSION
60 static void TestConvertSafeCloneCallback(void);
63 static void TestEBCDICSwapLFNL(void);
64 static void TestConvertEx(void);
65 static void TestConvertExFromUTF8(void);
66 static void TestConvertExFromUTF8_C5F0(void);
67 static void TestConvertAlgorithmic(void);
68 void TestDefaultConverterError(void); /* defined in cctest.c */
69 void TestDefaultConverterSet(void); /* defined in cctest.c */
70 static void TestToUCountPending(void);
71 static void TestFromUCountPending(void);
72 static void TestDefaultName(void);
73 static void TestCompareNames(void);
74 static void TestSubstString(void);
75 static void InvalidArguments(void);
76 static void TestGetName(void);
77 static void TestUTFBOM(void);
79 void addTestConvert(TestNode
** root
);
81 void addTestConvert(TestNode
** root
)
83 addTest(root
, &ListNames
, "tsconv/ccapitst/ListNames");
84 addTest(root
, &TestConvert
, "tsconv/ccapitst/TestConvert");
85 addTest(root
, &TestFlushCache
, "tsconv/ccapitst/TestFlushCache");
86 addTest(root
, &TestAlias
, "tsconv/ccapitst/TestAlias");
87 addTest(root
, &TestDuplicateAlias
, "tsconv/ccapitst/TestDuplicateAlias");
88 addTest(root
, &TestConvertSafeClone
, "tsconv/ccapitst/TestConvertSafeClone");
89 #if !UCONFIG_NO_LEGACY_CONVERSION
90 addTest(root
, &TestConvertSafeCloneCallback
,"tsconv/ccapitst/TestConvertSafeCloneCallback");
92 addTest(root
, &TestCCSID
, "tsconv/ccapitst/TestCCSID");
93 addTest(root
, &TestJ932
, "tsconv/ccapitst/TestJ932");
94 addTest(root
, &TestJ1968
, "tsconv/ccapitst/TestJ1968");
95 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
96 addTest(root
, &TestLMBCSMaxChar
, "tsconv/ccapitst/TestLMBCSMaxChar");
98 addTest(root
, &TestEBCDICSwapLFNL
, "tsconv/ccapitst/TestEBCDICSwapLFNL");
99 addTest(root
, &TestConvertEx
, "tsconv/ccapitst/TestConvertEx");
100 addTest(root
, &TestConvertExFromUTF8
, "tsconv/ccapitst/TestConvertExFromUTF8");
101 addTest(root
, &TestConvertExFromUTF8_C5F0
, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0");
102 addTest(root
, &TestConvertAlgorithmic
, "tsconv/ccapitst/TestConvertAlgorithmic");
103 addTest(root
, &TestDefaultConverterError
, "tsconv/ccapitst/TestDefaultConverterError");
104 addTest(root
, &TestDefaultConverterSet
, "tsconv/ccapitst/TestDefaultConverterSet");
105 #if !UCONFIG_NO_FILE_IO
106 addTest(root
, &TestToUCountPending
, "tsconv/ccapitst/TestToUCountPending");
107 addTest(root
, &TestFromUCountPending
, "tsconv/ccapitst/TestFromUCountPending");
109 addTest(root
, &TestDefaultName
, "tsconv/ccapitst/TestDefaultName");
110 addTest(root
, &TestCompareNames
, "tsconv/ccapitst/TestCompareNames");
111 addTest(root
, &TestSubstString
, "tsconv/ccapitst/TestSubstString");
112 addTest(root
, &InvalidArguments
, "tsconv/ccapitst/InvalidArguments");
113 addTest(root
, &TestGetName
, "tsconv/ccapitst/TestGetName");
114 addTest(root
, &TestUTFBOM
, "tsconv/ccapitst/TestUTFBOM");
117 static void ListNames(void) {
118 UErrorCode err
= U_ZERO_ERROR
;
119 int32_t testLong1
= 0;
120 const char* available_conv
;
121 UEnumeration
*allNamesEnum
= NULL
;
122 int32_t allNamesCount
= 0;
125 log_verbose("Testing ucnv_openAllNames()...");
126 allNamesEnum
= ucnv_openAllNames(&err
);
128 log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err
));
131 const char *string
= NULL
;
135 allNamesCount
= uenum_count(allNamesEnum
, &err
);
136 while ((string
= uenum_next(allNamesEnum
, &len
, &err
))) {
138 log_verbose("read \"%s\", length %i\n", string
, len
);
140 if (U_FAILURE(err
)) {
141 log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err
));
144 uenum_reset(allNamesEnum
, &err
);
145 while ((string
= uenum_next(allNamesEnum
, &len
, &err
))) {
147 ucnv_close(ucnv_open(string
, &err
));
148 log_verbose("read \"%s\", length %i (%s)\n", string
, len
, U_SUCCESS(err
) ? "available" : "unavailable");
151 if (count1
!= count2
) {
152 log_err("FAILURE! uenum_reset(allNamesEnum, &err); doesn't work\n");
155 uenum_close(allNamesEnum
);
158 /*Tests ucnv_getAvailableName(), getAvialableCount()*/
160 log_verbose("Testing ucnv_countAvailable()...");
162 testLong1
=ucnv_countAvailable();
163 log_info("Number of available codepages: %d/%d\n", testLong1
, allNamesCount
);
165 log_verbose("\n---Testing ucnv_getAvailableName.."); /*need to check this out */
167 available_conv
= ucnv_getAvailableName(testLong1
);
168 /*test ucnv_getAvailableName with err condition*/
169 log_verbose("\n---Testing ucnv_getAvailableName..with index < 0 ");
170 available_conv
= ucnv_getAvailableName(-1);
171 if(available_conv
!= NULL
){
172 log_err("ucnv_getAvailableName() with index < 0) should return NULL\n");
175 /* Test ucnv_countAliases() etc. */
176 count
= ucnv_countAliases("utf-8", &err
);
178 log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err
));
179 } else if(count
<= 0) {
180 log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count
);
182 /* try to get the aliases individually */
184 alias
= ucnv_getAlias("utf-8", 0, &err
);
186 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s\n", myErrorName(err
));
187 } else if(strcmp("UTF-8", alias
) != 0) {
188 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s instead of UTF-8\n", alias
);
191 for(aliasNum
= 0; aliasNum
< count
; ++aliasNum
) {
192 alias
= ucnv_getAlias("utf-8", aliasNum
, &err
);
194 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum
, myErrorName(err
));
195 } else if(strlen(alias
) > 20) {
197 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> alias %s insanely long, corrupt?!\n", aliasNum
, alias
);
199 log_verbose("alias %d for utf-8: %s\n", aliasNum
, alias
);
203 /* try to fill an array with all aliases */
204 const char **aliases
;
205 aliases
=(const char **)malloc(count
* sizeof(const char *));
207 ucnv_getAliases("utf-8", aliases
, &err
);
209 log_err("FAILURE! ucnv_getAliases(\"utf-8\") -> %s\n", myErrorName(err
));
211 for(aliasNum
= 0; aliasNum
< count
; ++aliasNum
) {
212 /* compare the pointers with the ones returned individually */
213 alias
= ucnv_getAlias("utf-8", aliasNum
, &err
);
215 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum
, myErrorName(err
));
216 } else if(aliases
[aliasNum
] != alias
) {
217 log_err("FAILURE! ucnv_getAliases(\"utf-8\")[%d] != ucnv_getAlias(\"utf-8\", %d)\n", aliasNum
, aliasNum
);
221 free((char **)aliases
);
229 static void TestConvert()
231 #if !UCONFIG_NO_LEGACY_CONVERSION
234 int32_t testLong1
= 0;
238 FILE* ucs_file_in
= NULL
;
240 UChar myUChar
= 0x0000;
241 char* mytarget
; /* [MAX_FILE_LEN] */
244 UChar
* consumedUni
= NULL
;
245 char* consumed
= NULL
;
246 char* output_cp_buffer
; /* [MAX_FILE_LEN] */
247 UChar
* ucs_file_buffer
; /* [MAX_FILE_LEN] */
248 UChar
* ucs_file_buffer_use
;
249 UChar
* my_ucs_file_buffer
; /* [MAX_FILE_LEN] */
250 UChar
* my_ucs_file_buffer_1
;
253 uint16_t codepage_index
= 0;
255 UErrorCode err
= U_ZERO_ERROR
;
256 char ucs_file_name
[UCS_FILE_NAME_SIZE
];
257 UConverterFromUCallback MIA1
, MIA1_2
;
258 UConverterToUCallback MIA2
, MIA2_2
;
259 const void *MIA1Context
, *MIA1Context2
, *MIA2Context
, *MIA2Context2
;
260 UConverter
* someConverters
[5];
261 UConverter
* myConverter
= 0;
262 UChar
* displayname
= 0;
269 int32_t targetcapacity2
;
270 int32_t targetcapacity
;
274 const UChar
* tmp_ucs_buf
;
275 const UChar
* tmp_consumedUni
=NULL
;
276 const char* tmp_mytarget_use
;
277 const char* tmp_consumed
;
279 /******************************************************************
280 Checking Unicode -> ksc
281 ******************************************************************/
283 const char* CodePagesToTest
[NUM_CODEPAGE
] =
289 const uint16_t CodePageNumberToTest
[NUM_CODEPAGE
] =
295 const int8_t CodePagesMinChars
[NUM_CODEPAGE
] =
301 const int8_t CodePagesMaxChars
[NUM_CODEPAGE
] =
307 const uint16_t CodePagesSubstitutionChars
[NUM_CODEPAGE
] =
312 const char* CodePagesTestFiles
[NUM_CODEPAGE
] =
318 const UConverterPlatform CodePagesPlatform
[NUM_CODEPAGE
] =
324 const char* CodePagesLocale
[NUM_CODEPAGE
] =
329 UConverterFromUCallback oldFromUAction
= NULL
;
330 UConverterToUCallback oldToUAction
= NULL
;
331 const void* oldFromUContext
= NULL
;
332 const void* oldToUContext
= NULL
;
334 /* Allocate memory */
335 mytarget
= (char*) malloc(MAX_FILE_LEN
* sizeof(mytarget
[0]));
336 output_cp_buffer
= (char*) malloc(MAX_FILE_LEN
* sizeof(output_cp_buffer
[0]));
337 ucs_file_buffer
= (UChar
*) malloc(MAX_FILE_LEN
* sizeof(ucs_file_buffer
[0]));
338 my_ucs_file_buffer
= (UChar
*) malloc(MAX_FILE_LEN
* sizeof(my_ucs_file_buffer
[0]));
340 ucs_file_buffer_use
= ucs_file_buffer
;
342 mytarget_use
= mytarget
;
343 my_ucs_file_buffer_1
=my_ucs_file_buffer
;
345 /* flush the converter cache to get a consistent state before the flushing is tested */
348 /*Testing ucnv_openU()*/
350 UChar converterName
[]={ 0x0069, 0x0062, 0x006d, 0x002d, 0x0039, 0x0034, 0x0033, 0x0000}; /*ibm-943*/
351 UChar firstSortedName
[]={ 0x0021, 0x0000}; /* ! */
352 UChar lastSortedName
[]={ 0x007E, 0x0000}; /* ~ */
353 const char *illegalNameChars
={ "ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943"};
354 UChar illegalName
[100];
355 UConverter
*converter
=NULL
;
357 converter
=ucnv_openU(converterName
, &err
);
359 log_data_err("FAILURE! ucnv_openU(ibm-943, err) failed. %s\n", myErrorName(err
));
361 ucnv_close(converter
);
363 converter
=ucnv_openU(NULL
, &err
);
365 log_err("FAILURE! ucnv_openU(NULL, err) failed. %s\n", myErrorName(err
));
367 ucnv_close(converter
);
368 /*testing with error value*/
369 err
=U_ILLEGAL_ARGUMENT_ERROR
;
370 converter
=ucnv_openU(converterName
, &err
);
371 if(!(converter
== NULL
)){
372 log_data_err("FAILURE! ucnv_openU(ibm-943, U_ILLEGAL_ARGUMENT_ERROR) is expected to fail\n");
374 ucnv_close(converter
);
376 u_uastrcpy(illegalName
, "");
377 u_uastrcpy(illegalName
, illegalNameChars
);
378 ucnv_openU(illegalName
, &err
);
379 if(!(err
==U_ILLEGAL_ARGUMENT_ERROR
)){
380 log_err("FAILURE! ucnv_openU(illegalName, err) is expected to fail\n");
384 ucnv_openU(firstSortedName
, &err
);
385 if(err
!=U_FILE_ACCESS_ERROR
){
386 log_err("FAILURE! ucnv_openU(firstSortedName, err) is expected to fail\n");
390 ucnv_openU(lastSortedName
, &err
);
391 if(err
!=U_FILE_ACCESS_ERROR
){
392 log_err("FAILURE! ucnv_openU(lastSortedName, err) is expected to fail\n");
397 log_verbose("Testing ucnv_open() with converter name greater than 7 characters\n");
399 UConverter
*cnv
=NULL
;
401 cnv
=ucnv_open("ibm-949,Madhu", &err
);
403 log_data_err("FAILURE! ucnv_open(\"ibm-949,Madhu\", err) failed. %s\n", myErrorName(err
));
408 /*Testing ucnv_convert()*/
410 int32_t targetLimit
=0, sourceLimit
=0, i
=0, targetCapacity
=0;
411 const uint8_t source
[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00};
412 const uint8_t expectedTarget
[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00};
414 sourceLimit
=sizeof(source
)/sizeof(source
[0]);
418 targetCapacity
=ucnv_convert("ibm-1364", "ibm-1363", NULL
, targetLimit
, (const char*)source
, sourceLimit
, &err
);
419 if(err
== U_BUFFER_OVERFLOW_ERROR
){
421 targetLimit
=targetCapacity
+1;
422 target
=(char*)malloc(sizeof(char) * targetLimit
);
423 targetCapacity
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
, sourceLimit
, &err
);
426 log_data_err("FAILURE! ucnv_convert(ibm-1363->ibm-1364) failed. %s\n", myErrorName(err
));
429 for(i
=0; i
<targetCapacity
; i
++){
430 if(target
[i
] != expectedTarget
[i
]){
431 log_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i
, (UChar
)expectedTarget
[i
], (uint8_t)target
[i
]);
435 i
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
+1, -1, &err
);
436 if(U_FAILURE(err
) || i
!=7){
437 log_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 7\n",
438 u_errorName(err
), i
);
441 /*Test error conditions*/
443 i
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
, 0, &err
);
445 log_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n");
448 err
=U_ILLEGAL_ARGUMENT_ERROR
;
449 sourceLimit
=sizeof(source
)/sizeof(source
[0]);
450 i
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
, sourceLimit
, &err
);
452 log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n");
456 sourceLimit
=sizeof(source
)/sizeof(source
[0]);
458 i
=ucnv_convert("ibm-1364", "ibm-1363", target
, targetLimit
, (const char*)source
, sourceLimit
, &err
);
459 if(!(U_FAILURE(err
) && err
==U_BUFFER_OVERFLOW_ERROR
)){
460 log_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n");
467 /*Testing ucnv_openCCSID and ucnv_open with error conditions*/
468 log_verbose("\n---Testing ucnv_open with err ! = U_ZERO_ERROR...\n");
469 err
=U_ILLEGAL_ARGUMENT_ERROR
;
470 if(ucnv_open(NULL
, &err
) != NULL
){
471 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
473 if(ucnv_openCCSID(1051, UCNV_IBM
, &err
) != NULL
){
474 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
478 /* Testing ucnv_openCCSID(), ucnv_open(), ucnv_getName() */
479 log_verbose("\n---Testing ucnv_open default...\n");
480 someConverters
[0] = ucnv_open(NULL
,&err
);
481 someConverters
[1] = ucnv_open(NULL
,&err
);
482 someConverters
[2] = ucnv_open("utf8", &err
);
483 someConverters
[3] = ucnv_openCCSID(949,UCNV_IBM
,&err
);
484 ucnv_close(ucnv_openCCSID(1051, UCNV_IBM
, &err
)); /* test for j350; ucnv_close(NULL) is safe */
485 if (U_FAILURE(err
)){ log_data_err("FAILURE! %s\n", myErrorName(err
));}
487 /* Testing ucnv_getName()*/
488 /*default code page */
489 ucnv_getName(someConverters
[0], &err
);
491 log_data_err("getName[0] failed\n");
493 log_verbose("getName(someConverters[0]) returned %s\n", ucnv_getName(someConverters
[0], &err
));
495 ucnv_getName(someConverters
[1], &err
);
497 log_data_err("getName[1] failed\n");
499 log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters
[1], &err
));
502 ucnv_close(someConverters
[0]);
503 ucnv_close(someConverters
[1]);
504 ucnv_close(someConverters
[2]);
505 ucnv_close(someConverters
[3]);
508 for (codepage_index
=0; codepage_index
< NUM_CODEPAGE
; ++codepage_index
)
514 strcpy(ucs_file_name
, U_TOPSRCDIR U_FILE_SEP_STRING
"test"U_FILE_SEP_STRING
"testdata"U_FILE_SEP_STRING
);
516 strcpy(ucs_file_name
, loadTestData(&err
));
519 log_err("\nCouldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err
));
524 char* index
= strrchr(ucs_file_name
,(char)U_FILE_SEP_CHAR
);
526 if((unsigned int)(index
-ucs_file_name
) != (strlen(ucs_file_name
)-1)){
531 strcat(ucs_file_name
,".."U_FILE_SEP_STRING
);
533 strcat(ucs_file_name
, CodePagesTestFiles
[codepage_index
]);
535 ucs_file_in
= fopen(ucs_file_name
,"rb");
538 log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name
);
542 /*Creates a converter and testing ucnv_openCCSID(u_int code_page, platform, errstatus*/
544 /* myConverter =ucnv_openCCSID(CodePageNumberToTest[codepage_index],UCNV_IBM, &err); */
545 /* ucnv_flushCache(); */
546 myConverter
=ucnv_open( "ibm-949", &err
);
547 if (!myConverter
|| U_FAILURE(err
))
549 log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err
));
554 /*testing for ucnv_getName() */
555 log_verbose("Testing ucnv_getName()...\n");
556 ucnv_getName(myConverter
, &err
);
558 log_err("Error in getName\n");
561 log_verbose("getName o.k. %s\n", ucnv_getName(myConverter
, &err
));
563 if (uprv_stricmp(ucnv_getName(myConverter
, &err
), CodePagesToTest
[codepage_index
]))
564 log_err("getName failed\n");
566 log_verbose("getName ok\n");
567 /*Test getName with error condition*/
570 err
=U_ILLEGAL_ARGUMENT_ERROR
;
571 log_verbose("Testing ucnv_getName with err != U_ZERO_ERROR");
572 name
=ucnv_getName(myConverter
, &err
);
574 log_err("ucnv_getName() with err != U_ZERO_ERROR is expected to fail");
580 /*Tests ucnv_getMaxCharSize() and ucnv_getMinCharSize()*/
582 log_verbose("Testing ucnv_getMaxCharSize()...\n");
583 if (ucnv_getMaxCharSize(myConverter
)==CodePagesMaxChars
[codepage_index
])
584 log_verbose("Max byte per character OK\n");
586 log_err("Max byte per character failed\n");
588 log_verbose("\n---Testing ucnv_getMinCharSize()...\n");
589 if (ucnv_getMinCharSize(myConverter
)==CodePagesMinChars
[codepage_index
])
590 log_verbose("Min byte per character OK\n");
592 log_err("Min byte per character failed\n");
595 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars()*/
596 log_verbose("\n---Testing ucnv_getSubstChars...\n");
598 ucnv_getSubstChars(myConverter
, myptr
, &ii
, &err
);
600 log_err("ucnv_getSubstChars returned a negative number %d\n", ii
);
604 rest
= (uint16_t)(((unsigned char)rest
<< 8) + (unsigned char)myptr
[x
]);
605 if (rest
==CodePagesSubstitutionChars
[codepage_index
])
606 log_verbose("Substitution character ok\n");
608 log_err("Substitution character failed.\n");
610 log_verbose("\n---Testing ucnv_setSubstChars RoundTrip Test ...\n");
611 ucnv_setSubstChars(myConverter
, myptr
, ii
, &err
);
614 log_err("FAILURE! %s\n", myErrorName(err
));
616 ucnv_getSubstChars(myConverter
,save
, &ii
, &err
);
619 log_err("FAILURE! %s\n", myErrorName(err
));
622 if (strncmp(save
, myptr
, ii
))
623 log_err("Saved substitution character failed\n");
625 log_verbose("Saved substitution character ok\n");
627 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars() with error conditions*/
628 log_verbose("\n---Testing ucnv_getSubstChars.. with len < minBytesPerChar\n");
630 ucnv_getSubstChars(myConverter
, myptr
, &ii
, &err
);
631 if(err
!= U_INDEX_OUTOFBOUNDS_ERROR
){
632 log_err("ucnv_getSubstChars() with len < minBytesPerChar should throw U_INDEX_OUTOFBOUNDS_ERROR Got %s\n", myErrorName(err
));
636 ucnv_getSubstChars(myConverter
, myptr
, &ii
, &err
);
637 log_verbose("\n---Testing ucnv_setSubstChars.. with len < minBytesPerChar\n");
638 ucnv_setSubstChars(myConverter
, myptr
, 0, &err
);
639 if(err
!= U_ILLEGAL_ARGUMENT_ERROR
){
640 log_err("ucnv_setSubstChars() with len < minBytesPerChar should throw U_ILLEGAL_ARGUMENT_ERROR Got %s\n", myErrorName(err
));
642 log_verbose("\n---Testing ucnv_setSubstChars.. with err != U_ZERO_ERROR \n");
643 strcpy(myptr
, "abc");
644 ucnv_setSubstChars(myConverter
, myptr
, ii
, &err
);
646 ucnv_getSubstChars(myConverter
, save
, &ii
, &err
);
647 if(strncmp(save
, myptr
, ii
) == 0){
648 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't set the SubstChars and just return\n");
650 log_verbose("\n---Testing ucnv_getSubstChars.. with err != U_ZERO_ERROR \n");
652 strcpy(myptr
, "abc");
653 ucnv_setSubstChars(myConverter
, myptr
, ii
, &err
);
654 err
=U_ILLEGAL_ARGUMENT_ERROR
;
655 ucnv_getSubstChars(myConverter
, save
, &ii
, &err
);
656 if(strncmp(save
, myptr
, ii
) == 0){
657 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't fill the SubstChars in the buffer, it just returns\n");
662 #ifdef U_ENABLE_GENERIC_ISO_2022
663 /*resetState ucnv_reset()*/
664 log_verbose("\n---Testing ucnv_reset()..\n");
665 ucnv_reset(myConverter
);
668 const uint8_t in
[]={ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x61, 0xc0, 0x80, 0xe0, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80};
669 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
670 UConverter
*cnv
=ucnv_open("ISO_2022", &err
);
672 log_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err
));
674 c
=ucnv_getNextUChar(cnv
, &source
, limit
, &err
);
675 if((U_FAILURE(err
) || c
!= (UChar32
)0x0031)) {
676 log_err("ucnv_getNextUChar() failed: %s\n", u_errorName(err
));
685 log_verbose("\n---Testing ucnv_getDisplayName()...\n");
686 locale
=CodePagesLocale
[codepage_index
];
689 disnamelen
= ucnv_getDisplayName(myConverter
, locale
, displayname
, len
, &err
);
690 if(err
==U_BUFFER_OVERFLOW_ERROR
) {
692 displayname
=(UChar
*)malloc((disnamelen
+1) * sizeof(UChar
));
693 ucnv_getDisplayName(myConverter
,locale
,displayname
,disnamelen
+1, &err
);
695 log_err("getDisplayName failed. The error is %s\n", myErrorName(err
));
698 log_verbose(" getDisplayName o.k.\n");
704 log_err("getDisplayName preflight doesn't work. Error is %s\n", myErrorName(err
));
706 /*test ucnv_getDiaplayName with error condition*/
707 err
= U_ILLEGAL_ARGUMENT_ERROR
;
708 len
=ucnv_getDisplayName(myConverter
,locale
,NULL
,0, &err
);
710 log_err("ucnv_getDisplayName() with err != U_ZERO_ERROR is supposed to return 0\n");
712 /*test ucnv_getDiaplayName with error condition*/
714 len
=ucnv_getDisplayName(NULL
,locale
,NULL
,0, &err
);
715 if( len
!=0 || U_SUCCESS(err
)){
716 log_err("ucnv_getDisplayName(NULL) with cnv == NULL is supposed to return 0\n");
720 /* testing ucnv_setFromUCallBack() and ucnv_getFromUCallBack()*/
721 ucnv_getFromUCallBack(myConverter
, &MIA1
, &MIA1Context
);
723 log_verbose("\n---Testing ucnv_setFromUCallBack...\n");
724 ucnv_setFromUCallBack(myConverter
, otherUnicodeAction(MIA1
), &BOM
, &oldFromUAction
, &oldFromUContext
, &err
);
725 if (U_FAILURE(err
) || oldFromUAction
!= MIA1
|| oldFromUContext
!= MIA1Context
)
727 log_err("FAILURE! %s\n", myErrorName(err
));
730 ucnv_getFromUCallBack(myConverter
, &MIA1_2
, &MIA1Context2
);
731 if (MIA1_2
!= otherUnicodeAction(MIA1
) || MIA1Context2
!= &BOM
)
732 log_err("get From UCallBack failed\n");
734 log_verbose("get From UCallBack ok\n");
736 log_verbose("\n---Testing getFromUCallBack Roundtrip...\n");
737 ucnv_setFromUCallBack(myConverter
,MIA1
, MIA1Context
, &oldFromUAction
, &oldFromUContext
, &err
);
738 if (U_FAILURE(err
) || oldFromUAction
!= otherUnicodeAction(MIA1
) || oldFromUContext
!= &BOM
)
740 log_err("FAILURE! %s\n", myErrorName(err
));
743 ucnv_getFromUCallBack(myConverter
, &MIA1_2
, &MIA1Context2
);
744 if (MIA1_2
!= MIA1
|| MIA1Context2
!= MIA1Context
)
745 log_err("get From UCallBack action failed\n");
747 log_verbose("get From UCallBack action ok\n");
749 /*testing ucnv_setToUCallBack with error conditions*/
750 err
=U_ILLEGAL_ARGUMENT_ERROR
;
751 log_verbose("\n---Testing setFromUCallBack. with err != U_ZERO_ERROR..\n");
752 ucnv_setFromUCallBack(myConverter
, otherUnicodeAction(MIA1
), &BOM
, &oldFromUAction
, &oldFromUContext
, &err
);
753 ucnv_getFromUCallBack(myConverter
, &MIA1_2
, &MIA1Context2
);
754 if(MIA1_2
== otherUnicodeAction(MIA1
) || MIA1Context2
== &BOM
){
755 log_err("To setFromUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
760 /*testing ucnv_setToUCallBack() and ucnv_getToUCallBack()*/
761 ucnv_getToUCallBack(myConverter
, &MIA2
, &MIA2Context
);
763 log_verbose("\n---Testing setTo UCallBack...\n");
764 ucnv_setToUCallBack(myConverter
,otherCharAction(MIA2
), &BOM
, &oldToUAction
, &oldToUContext
, &err
);
765 if (U_FAILURE(err
) || oldToUAction
!= MIA2
|| oldToUContext
!= MIA2Context
)
767 log_err("FAILURE! %s\n", myErrorName(err
));
770 ucnv_getToUCallBack(myConverter
, &MIA2_2
, &MIA2Context2
);
771 if (MIA2_2
!= otherCharAction(MIA2
) || MIA2Context2
!= &BOM
)
772 log_err("To UCallBack failed\n");
774 log_verbose("To UCallBack ok\n");
776 log_verbose("\n---Testing setTo UCallBack Roundtrip...\n");
777 ucnv_setToUCallBack(myConverter
,MIA2
, MIA2Context
, &oldToUAction
, &oldToUContext
, &err
);
778 if (U_FAILURE(err
) || oldToUAction
!= otherCharAction(MIA2
) || oldToUContext
!= &BOM
)
779 { log_err("FAILURE! %s\n", myErrorName(err
)); }
781 ucnv_getToUCallBack(myConverter
, &MIA2_2
, &MIA2Context2
);
782 if (MIA2_2
!= MIA2
|| MIA2Context2
!= MIA2Context
)
783 log_err("To UCallBack failed\n");
785 log_verbose("To UCallBack ok\n");
787 /*testing ucnv_setToUCallBack with error conditions*/
788 err
=U_ILLEGAL_ARGUMENT_ERROR
;
789 log_verbose("\n---Testing setToUCallBack. with err != U_ZERO_ERROR..\n");
790 ucnv_setToUCallBack(myConverter
,otherCharAction(MIA2
), NULL
, &oldToUAction
, &oldToUContext
, &err
);
791 ucnv_getToUCallBack(myConverter
, &MIA2_2
, &MIA2Context2
);
792 if (MIA2_2
== otherCharAction(MIA2
) || MIA2Context2
== &BOM
){
793 log_err("To setToUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
798 /*getcodepageid testing ucnv_getCCSID() */
799 log_verbose("\n----Testing getCCSID....\n");
800 cp
= ucnv_getCCSID(myConverter
,&err
);
803 log_err("FAILURE!..... %s\n", myErrorName(err
));
805 if (cp
!= CodePageNumberToTest
[codepage_index
])
806 log_err("Codepage number test failed\n");
808 log_verbose("Codepage number test OK\n");
810 /*testing ucnv_getCCSID() with err != U_ZERO_ERROR*/
811 err
=U_ILLEGAL_ARGUMENT_ERROR
;
812 if( ucnv_getCCSID(myConverter
,&err
) != -1){
813 log_err("ucnv_getCCSID() with err != U_ZERO_ERROR is supposed to fail\n");
817 /*getCodepagePlatform testing ucnv_getPlatform()*/
818 log_verbose("\n---Testing getCodepagePlatform ..\n");
819 if (CodePagesPlatform
[codepage_index
]!=ucnv_getPlatform(myConverter
, &err
))
820 log_err("Platform codepage test failed\n");
822 log_verbose("Platform codepage test ok\n");
826 log_err("FAILURE! %s\n", myErrorName(err
));
828 /*testing ucnv_getPlatform() with err != U_ZERO_ERROR*/
829 err
= U_ILLEGAL_ARGUMENT_ERROR
;
830 if(ucnv_getPlatform(myConverter
, &err
) != UCNV_UNKNOWN
){
831 log_err("ucnv)getPlatform with err != U_ZERO_ERROR is supposed to fail\n");
837 fread(&BOM
, sizeof(UChar
), 1, ucs_file_in
);
838 if (BOM
!=0xFEFF && BOM
!=0xFFFE)
840 log_err("File Missing BOM...Bailing!\n");
846 /*Reads in the file*/
847 while(!feof(ucs_file_in
)&&(i
+=fread(ucs_file_buffer
+i
, sizeof(UChar
), 1, ucs_file_in
)))
849 myUChar
= ucs_file_buffer
[i
-1];
851 ucs_file_buffer
[i
-1] = (UChar
)((BOM
==0xFEFF)?myUChar
:((myUChar
>> 8) | (myUChar
<< 8))); /*adjust if BIG_ENDIAN*/
854 myUChar
= ucs_file_buffer
[i
-1];
855 ucs_file_buffer
[i
-1] = (UChar
)((BOM
==0xFEFF)?myUChar
:((myUChar
>> 8) | (myUChar
<< 8))); /*adjust if BIG_ENDIAN Corner Case*/
858 /*testing ucnv_fromUChars() and ucnv_toUChars() */
859 /*uchar1---fromUChar--->output_cp_buffer --toUChar--->uchar2*/
861 uchar1
=(UChar
*)malloc(sizeof(UChar
) * (i
+1));
862 u_uastrcpy(uchar1
,"");
863 u_strncpy(uchar1
,ucs_file_buffer
,i
);
866 uchar3
=(UChar
*)malloc(sizeof(UChar
)*(i
+1));
867 u_uastrcpy(uchar3
,"");
868 u_strncpy(uchar3
,ucs_file_buffer
,i
);
871 /*Calls the Conversion Routine */
872 testLong1
= MAX_FILE_LEN
;
873 log_verbose("\n---Testing ucnv_fromUChars()\n");
874 targetcapacity
= ucnv_fromUChars(myConverter
, output_cp_buffer
, testLong1
, uchar1
, -1, &err
);
877 log_err("\nFAILURE...%s\n", myErrorName(err
));
880 log_verbose(" ucnv_fromUChars() o.k.\n");
882 /*test the conversion routine */
883 log_verbose("\n---Testing ucnv_toUChars()\n");
884 /*call it first time for trapping the targetcapacity and size needed to allocate memory for the buffer uchar2 */
886 targetsize
= ucnv_toUChars(myConverter
,
890 strlen(output_cp_buffer
),
892 /*if there is an buffer overflow then trap the values and pass them and make the actual call*/
894 if(err
==U_BUFFER_OVERFLOW_ERROR
)
897 uchar2
=(UChar
*)malloc((targetsize
+1) * sizeof(UChar
));
898 targetsize
= ucnv_toUChars(myConverter
,
902 strlen(output_cp_buffer
),
906 log_err("ucnv_toUChars() FAILED %s\n", myErrorName(err
));
908 log_verbose(" ucnv_toUChars() o.k.\n");
910 if(u_strcmp(uchar1
,uchar2
)!=0)
911 log_err("equality test failed with conversion routine\n");
915 log_err("ERR: calling toUChars: Didn't get U_BUFFER_OVERFLOW .. expected it.\n");
917 /*Testing ucnv_fromUChars and ucnv_toUChars with error conditions*/
918 err
=U_ILLEGAL_ARGUMENT_ERROR
;
919 log_verbose("\n---Testing ucnv_fromUChars() with err != U_ZERO_ERROR\n");
920 targetcapacity
= ucnv_fromUChars(myConverter
, output_cp_buffer
, testLong1
, uchar1
, -1, &err
);
921 if (targetcapacity
!=0) {
922 log_err("\nFAILURE: ucnv_fromUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
925 log_verbose("\n---Testing ucnv_fromUChars() with converter=NULL\n");
926 targetcapacity
= ucnv_fromUChars(NULL
, output_cp_buffer
, testLong1
, uchar1
, -1, &err
);
927 if (targetcapacity
!=0 || err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
928 log_err("\nFAILURE: ucnv_fromUChars with converter=NULL is expected to fail\n");
931 log_verbose("\n---Testing ucnv_fromUChars() with sourceLength = 0\n");
932 targetcapacity
= ucnv_fromUChars(myConverter
, output_cp_buffer
, testLong1
, uchar1
, 0, &err
);
933 if (targetcapacity
!=0) {
934 log_err("\nFAILURE: ucnv_fromUChars with sourceLength 0 is expected to return 0\n");
936 log_verbose("\n---Testing ucnv_fromUChars() with targetLength = 0\n");
937 targetcapacity
= ucnv_fromUChars(myConverter
, output_cp_buffer
, 0, uchar1
, -1, &err
);
938 if (err
!= U_BUFFER_OVERFLOW_ERROR
) {
939 log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n");
941 /*toUChars with error conditions*/
942 targetsize
= ucnv_toUChars(myConverter
, uchar2
, targetsize
, output_cp_buffer
, strlen(output_cp_buffer
), &err
);
944 log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
947 targetsize
= ucnv_toUChars(myConverter
, uchar2
, -1, output_cp_buffer
, strlen(output_cp_buffer
), &err
);
948 if(targetsize
!= 0 || err
!= U_ILLEGAL_ARGUMENT_ERROR
){
949 log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n");
952 targetsize
= ucnv_toUChars(myConverter
, uchar2
, 0, output_cp_buffer
, 0, &err
);
953 if (targetsize
!=0) {
954 log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n");
957 targetsize
= ucnv_toUChars(myConverter
, NULL
, targetcapacity2
, output_cp_buffer
, strlen(output_cp_buffer
), &err
);
958 if (err
!= U_STRING_NOT_TERMINATED_WARNING
) {
959 log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n",
966 /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */
967 /*Clean up re-usable vars*/
969 log_verbose("Testing ucnv_fromUnicode().....\n");
970 tmp_ucs_buf
=ucs_file_buffer_use
;
971 ucnv_fromUnicode(myConverter
, &mytarget_1
,
972 mytarget
+ MAX_FILE_LEN
,
974 ucs_file_buffer_use
+i
,
978 consumedUni
= (UChar
*)tmp_consumedUni
;
982 log_err("FAILURE! %s\n", myErrorName(err
));
985 log_verbose("ucnv_fromUnicode() o.k.\n");
987 /*Uni1 ----ToUnicode----> Cp2 ----FromUnicode---->Uni3 */
988 log_verbose("Testing ucnv_toUnicode().....\n");
989 tmp_mytarget_use
=mytarget_use
;
990 tmp_consumed
= consumed
;
991 ucnv_toUnicode(myConverter
, &my_ucs_file_buffer_1
,
992 my_ucs_file_buffer
+ MAX_FILE_LEN
,
994 mytarget_use
+ (mytarget_1
- mytarget
),
998 consumed
= (char*)tmp_consumed
;
1001 log_err("FAILURE! %s\n", myErrorName(err
));
1004 log_verbose("ucnv_toUnicode() o.k.\n");
1007 log_verbose("\n---Testing RoundTrip ...\n");
1010 u_strncpy(uchar3
, my_ucs_file_buffer
,i
);
1013 if(u_strcmp(uchar1
,uchar3
)==0)
1014 log_verbose("Equality test o.k.\n");
1016 log_err("Equality test failed\n");
1021 log_err("uchar2 was NULL (ccapitst.c line %d), couldn't do sanity check\n", __LINE__
);
1025 if(u_strcmp(uchar2
, uchar3
)==0)
1026 log_verbose("Equality test o.k.\n");
1028 log_err("Equality test failed\n");
1031 fclose(ucs_file_in
);
1032 ucnv_close(myConverter
);
1033 if (uchar1
!= 0) free(uchar1
);
1034 if (uchar2
!= 0) free(uchar2
);
1035 if (uchar3
!= 0) free(uchar3
);
1038 free((void*)mytarget
);
1039 free((void*)output_cp_buffer
);
1040 free((void*)ucs_file_buffer
);
1041 free((void*)my_ucs_file_buffer
);
1045 static UConverterFromUCallback
otherUnicodeAction(UConverterFromUCallback MIA
)
1047 return (MIA
==(UConverterFromUCallback
)UCNV_FROM_U_CALLBACK_STOP
)?(UConverterFromUCallback
)UCNV_FROM_U_CALLBACK_SUBSTITUTE
:(UConverterFromUCallback
)UCNV_FROM_U_CALLBACK_STOP
;
1051 static UConverterToUCallback
otherCharAction(UConverterToUCallback MIA
)
1053 return (MIA
==(UConverterToUCallback
)UCNV_TO_U_CALLBACK_STOP
)?(UConverterToUCallback
)UCNV_TO_U_CALLBACK_SUBSTITUTE
:(UConverterToUCallback
)UCNV_TO_U_CALLBACK_STOP
;
1056 static void TestFlushCache(void) {
1057 #if !UCONFIG_NO_LEGACY_CONVERSION
1058 UErrorCode err
= U_ZERO_ERROR
;
1059 UConverter
* someConverters
[5];
1062 /* flush the converter cache to get a consistent state before the flushing is tested */
1065 /*Testing ucnv_open()*/
1066 /* Note: These converters have been chosen because they do NOT
1067 encode the Latin characters (U+0041, ...), and therefore are
1068 highly unlikely to be chosen as system default codepages */
1070 someConverters
[0] = ucnv_open("ibm-1047", &err
);
1071 if (U_FAILURE(err
)) {
1072 log_data_err("FAILURE! %s\n", myErrorName(err
));
1075 someConverters
[1] = ucnv_open("ibm-1047", &err
);
1076 if (U_FAILURE(err
)) {
1077 log_data_err("FAILURE! %s\n", myErrorName(err
));
1080 someConverters
[2] = ucnv_open("ibm-1047", &err
);
1081 if (U_FAILURE(err
)) {
1082 log_data_err("FAILURE! %s\n", myErrorName(err
));
1085 someConverters
[3] = ucnv_open("gb18030", &err
);
1086 if (U_FAILURE(err
)) {
1087 log_data_err("FAILURE! %s\n", myErrorName(err
));
1090 someConverters
[4] = ucnv_open("ibm-954", &err
);
1091 if (U_FAILURE(err
)) {
1092 log_data_err("FAILURE! %s\n", myErrorName(err
));
1096 /* Testing ucnv_flushCache() */
1097 log_verbose("\n---Testing ucnv_flushCache...\n");
1098 if ((flushCount
=ucnv_flushCache())==0)
1099 log_verbose("Flush cache ok\n");
1101 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__
, flushCount
);
1103 /*testing ucnv_close() and ucnv_flushCache() */
1104 ucnv_close(someConverters
[0]);
1105 ucnv_close(someConverters
[1]);
1107 if ((flushCount
=ucnv_flushCache())==0)
1108 log_verbose("Flush cache ok\n");
1110 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__
, flushCount
);
1112 ucnv_close(someConverters
[2]);
1113 ucnv_close(someConverters
[3]);
1115 if ((flushCount
=ucnv_flushCache())==2)
1116 log_verbose("Flush cache ok\n"); /*because first, second and third are same */
1118 log_data_err("Flush Cache failed line %d, got %d expected 2 or there is an error in ucnv_close()\n",
1122 ucnv_close(someConverters
[4]);
1123 if ( (flushCount
=ucnv_flushCache())==1)
1124 log_verbose("Flush cache ok\n");
1126 log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__
, flushCount
);
1131 * Test the converter alias API, specifically the fuzzy matching of
1132 * alias names and the alias table integrity. Make sure each
1133 * converter has at least one alias (itself), and that its listed
1134 * aliases map back to itself. Check some hard-coded UTF-8 and
1135 * ISO_2022 aliases to make sure they work.
1137 static void TestAlias() {
1139 UErrorCode status
= U_ZERO_ERROR
;
1141 /* Predetermined aliases that we expect to map back to ISO_2022
1142 * and UTF-8. UPDATE THIS DATA AS NECESSARY. */
1143 const char* ISO_2022_NAMES
[] =
1144 {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2",
1145 "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"};
1146 int32_t ISO_2022_NAMES_LENGTH
=
1147 sizeof(ISO_2022_NAMES
) / sizeof(ISO_2022_NAMES
[0]);
1148 const char *UTF8_NAMES
[] =
1149 { "UTF-8", "utf-8", "utf8", "ibm-1208",
1150 "utf_8", "ibm1208", "cp1208" };
1151 int32_t UTF8_NAMES_LENGTH
=
1152 sizeof(UTF8_NAMES
) / sizeof(UTF8_NAMES
[0]);
1157 } CONVERTERS_NAMES
[] = {
1158 { "UTF-32BE", "UTF32_BigEndian" },
1159 { "UTF-32LE", "UTF32_LittleEndian" },
1160 { "UTF-32", "ISO-10646-UCS-4" },
1161 { "UTF32_PlatformEndian", "UTF32_PlatformEndian" },
1162 { "UTF-32", "ucs-4" }
1164 int32_t CONVERTERS_NAMES_LENGTH
= sizeof(CONVERTERS_NAMES
) / sizeof(*CONVERTERS_NAMES
);
1166 /* When there are bugs in gencnval or in ucnv_io, converters can
1167 appear to have no aliases. */
1168 ncnv
= ucnv_countAvailable();
1169 log_verbose("%d converters\n", ncnv
);
1170 for (i
=0; i
<ncnv
; ++i
) {
1171 const char *name
= ucnv_getAvailableName(i
);
1173 uint16_t na
= ucnv_countAliases(name
, &status
);
1178 log_err("FAIL: Converter \"%s\" (i=%d)"
1179 " has no aliases; expect at least one\n",
1183 cnv
= ucnv_open(name
, &status
);
1184 if (U_FAILURE(status
)) {
1185 log_data_err("FAIL: Converter \"%s\" (i=%d)"
1186 " can't be opened.\n",
1190 if (strcmp(ucnv_getName(cnv
, &status
), name
) != 0
1191 && (strstr(name
, "PlatformEndian") == 0 && strstr(name
, "OppositeEndian") == 0)) {
1192 log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. "
1193 "The should be the same\n",
1194 name
, ucnv_getName(cnv
, &status
));
1199 status
= U_ZERO_ERROR
;
1200 alias0
= ucnv_getAlias(name
, 0, &status
);
1201 for (j
=1; j
<na
; ++j
) {
1203 /* Make sure each alias maps back to the the same list of
1204 aliases. Assume that if alias 0 is the same, the whole
1205 list is the same (this should always be true). */
1206 const char *mapBack
;
1208 status
= U_ZERO_ERROR
;
1209 alias
= ucnv_getAlias(name
, j
, &status
);
1210 if (status
== U_AMBIGUOUS_ALIAS_WARNING
) {
1211 log_err("FAIL: Converter \"%s\"is ambiguous\n", name
);
1214 if (alias
== NULL
) {
1215 log_err("FAIL: Converter \"%s\" -> "
1221 mapBack
= ucnv_getAlias(alias
, 0, &status
);
1223 if (mapBack
== NULL
) {
1224 log_err("FAIL: Converter \"%s\" -> "
1225 "alias[%d]=\"%s\" -> "
1226 "alias[0]=NULL, exp. \"%s\"\n",
1227 name
, j
, alias
, alias0
);
1231 if (0 != strcmp(alias0
, mapBack
)) {
1233 UBool foundAlias
= FALSE
;
1234 if (status
== U_AMBIGUOUS_ALIAS_WARNING
) {
1235 /* Make sure that we only get this mismapping when there is
1236 an ambiguous alias, and the other converter has this alias too. */
1237 for (idx
= 0; idx
< ucnv_countAliases(mapBack
, &status
); idx
++) {
1238 if (strcmp(ucnv_getAlias(mapBack
, (uint16_t)idx
, &status
), alias
) == 0) {
1244 /* else not ambiguous, and this is a real problem. foundAlias = FALSE */
1247 log_err("FAIL: Converter \"%s\" -> "
1248 "alias[%d]=\"%s\" -> "
1249 "alias[0]=\"%s\", exp. \"%s\"\n",
1250 name
, j
, alias
, mapBack
, alias0
);
1257 /* Check a list of predetermined aliases that we expect to map
1258 * back to ISO_2022 and UTF-8. */
1259 for (i
=1; i
<ISO_2022_NAMES_LENGTH
; ++i
) {
1260 const char* mapBack
= ucnv_getAlias(ISO_2022_NAMES
[i
], 0, &status
);
1262 log_data_err("Couldn't get alias for %s. You probably have no data\n", ISO_2022_NAMES
[i
]);
1265 if (0 != strcmp(mapBack
, ISO_2022_NAMES
[0])) {
1266 log_err("FAIL: \"%s\" -> \"%s\", expect \"ISO_2022,locale=ja,version=2\"\n",
1267 ISO_2022_NAMES
[i
], mapBack
);
1272 for (i
=1; i
<UTF8_NAMES_LENGTH
; ++i
) {
1273 const char* mapBack
= ucnv_getAlias(UTF8_NAMES
[i
], 0, &status
);
1275 log_data_err("Couldn't get alias for %s. You probably have no data\n", UTF8_NAMES
[i
]);
1278 if (mapBack
&& 0 != strcmp(mapBack
, UTF8_NAMES
[0])) {
1279 log_err("FAIL: \"%s\" -> \"%s\", expect UTF-8\n",
1280 UTF8_NAMES
[i
], mapBack
);
1285 * Check a list of predetermined aliases that we expect to map
1286 * back to predermined converter names.
1289 for (i
= 0; i
< CONVERTERS_NAMES_LENGTH
; ++i
) {
1290 const char* mapBack
= ucnv_getAlias(CONVERTERS_NAMES
[i
].alias
, 0, &status
);
1292 log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES
[i
].name
);
1295 if (0 != strcmp(mapBack
, CONVERTERS_NAMES
[i
].name
)) {
1296 log_err("FAIL: \"%s\" -> \"%s\", expect %s\n",
1297 CONVERTERS_NAMES
[i
].alias
, mapBack
, CONVERTERS_NAMES
[i
].name
);
1303 static void TestDuplicateAlias(void) {
1305 UErrorCode status
= U_ZERO_ERROR
;
1307 status
= U_ZERO_ERROR
;
1308 alias
= ucnv_getStandardName("Shift_JIS", "IBM", &status
);
1309 if (alias
== NULL
|| strcmp(alias
, "ibm-943") != 0 || status
!= U_AMBIGUOUS_ALIAS_WARNING
) {
1310 log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias
);
1312 status
= U_ZERO_ERROR
;
1313 alias
= ucnv_getStandardName("ibm-943", "IANA", &status
);
1314 if (alias
== NULL
|| strcmp(alias
, "Shift_JIS") != 0 || status
!= U_AMBIGUOUS_ALIAS_WARNING
) {
1315 log_data_err("FAIL: Didn't get Shift_JIS for ibm-943 {IANA}. Got %s\n", alias
);
1317 status
= U_ZERO_ERROR
;
1318 alias
= ucnv_getStandardName("ibm-943_P130-2000", "IANA", &status
);
1319 if (alias
!= NULL
|| status
== U_AMBIGUOUS_ALIAS_WARNING
) {
1320 log_data_err("FAIL: Didn't get NULL for ibm-943 {IANA}. Got %s\n", alias
);
1325 /* Test safe clone callback */
1327 static uint32_t TSCC_nextSerial()
1329 static uint32_t n
= 1;
1336 uint32_t magic
; /* 0xC0FFEE to identify that the object is OK */
1337 uint32_t serial
; /* minted from nextSerial, above */
1338 UBool wasClosed
; /* close happened on the object */
1341 static TSCCContext
*TSCC_clone(TSCCContext
*ctx
)
1343 TSCCContext
*newCtx
= (TSCCContext
*)malloc(sizeof(TSCCContext
));
1345 newCtx
->serial
= TSCC_nextSerial();
1346 newCtx
->wasClosed
= 0;
1347 newCtx
->magic
= 0xC0FFEE;
1349 log_verbose("TSCC_clone: %p:%d -> new context %p:%d\n", ctx
, ctx
->serial
, newCtx
, newCtx
->serial
);
1354 static void TSCC_fromU(const void *context
,
1355 UConverterFromUnicodeArgs
*fromUArgs
,
1356 const UChar
* codeUnits
,
1359 UConverterCallbackReason reason
,
1362 TSCCContext
*ctx
= (TSCCContext
*)context
;
1363 UConverterFromUCallback junkFrom
;
1365 log_verbose("TSCC_fromU: Context %p:%d called, reason %d on cnv %p\n", ctx
, ctx
->serial
, reason
, fromUArgs
->converter
);
1367 if(ctx
->magic
!= 0xC0FFEE) {
1368 log_err("TSCC_fromU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx
,ctx
->serial
, ctx
->magic
);
1372 if(reason
== UCNV_CLONE
) {
1373 UErrorCode subErr
= U_ZERO_ERROR
;
1374 TSCCContext
*newCtx
;
1375 TSCCContext
*junkCtx
;
1376 TSCCContext
**pjunkCtx
= &junkCtx
;
1379 log_verbose("TSCC_fromU: cloning..\n");
1380 newCtx
= TSCC_clone(ctx
);
1382 if(newCtx
== NULL
) {
1383 log_err("TSCC_fromU: internal clone failed on %p\n", ctx
);
1387 ucnv_getFromUCallBack(fromUArgs
->converter
, &junkFrom
, (const void**)pjunkCtx
);
1388 ucnv_setFromUCallBack(fromUArgs
->converter
, junkFrom
, newCtx
, NULL
, NULL
, &subErr
);
1390 if(U_FAILURE(subErr
)) {
1395 if(reason
== UCNV_CLOSE
) {
1396 log_verbose("TSCC_fromU: Context %p:%d closing\n", ctx
, ctx
->serial
);
1397 ctx
->wasClosed
= TRUE
;
1402 static void TSCC_toU(const void *context
,
1403 UConverterToUnicodeArgs
*toUArgs
,
1404 const char* codeUnits
,
1406 UConverterCallbackReason reason
,
1409 TSCCContext
*ctx
= (TSCCContext
*)context
;
1410 UConverterToUCallback junkFrom
;
1412 log_verbose("TSCC_toU: Context %p:%d called, reason %d on cnv %p\n", ctx
, ctx
->serial
, reason
, toUArgs
->converter
);
1414 if(ctx
->magic
!= 0xC0FFEE) {
1415 log_err("TSCC_toU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx
,ctx
->serial
, ctx
->magic
);
1419 if(reason
== UCNV_CLONE
) {
1420 UErrorCode subErr
= U_ZERO_ERROR
;
1421 TSCCContext
*newCtx
;
1422 TSCCContext
*junkCtx
;
1423 TSCCContext
**pjunkCtx
= &junkCtx
;
1426 log_verbose("TSCC_toU: cloning..\n");
1427 newCtx
= TSCC_clone(ctx
);
1429 if(newCtx
== NULL
) {
1430 log_err("TSCC_toU: internal clone failed on %p\n", ctx
);
1434 ucnv_getToUCallBack(toUArgs
->converter
, &junkFrom
, (const void**)pjunkCtx
);
1435 ucnv_setToUCallBack(toUArgs
->converter
, junkFrom
, newCtx
, NULL
, NULL
, &subErr
);
1437 if(U_FAILURE(subErr
)) {
1442 if(reason
== UCNV_CLOSE
) {
1443 log_verbose("TSCC_toU: Context %p:%d closing\n", ctx
, ctx
->serial
);
1444 ctx
->wasClosed
= TRUE
;
1448 static void TSCC_init(TSCCContext
*q
)
1450 q
->magic
= 0xC0FFEE;
1451 q
->serial
= TSCC_nextSerial();
1455 static void TSCC_print_log(TSCCContext
*q
, const char *name
)
1458 log_verbose("TSCContext: %s is NULL!!\n", name
);
1460 if(q
->magic
!= 0xC0FFEE) {
1461 log_err("TSCCContext: %p:%d's magic is %x, supposed to be 0xC0FFEE\n",
1462 q
,q
->serial
, q
->magic
);
1464 log_verbose("TSCCContext %p:%d=%s - magic %x, %s\n",
1465 q
, q
->serial
, name
, q
->magic
, q
->wasClosed
?"CLOSED":"open");
1469 #if !UCONFIG_NO_LEGACY_CONVERSION
1470 static void TestConvertSafeCloneCallback()
1472 UErrorCode err
= U_ZERO_ERROR
;
1473 TSCCContext from1
, to1
;
1474 TSCCContext
*from2
, *from3
, *to2
, *to3
;
1475 TSCCContext
**pfrom2
= &from2
, **pfrom3
= &from3
, **pto2
= &to2
, **pto3
= &to3
;
1477 int32_t hunkSize
= 8192;
1478 UConverterFromUCallback junkFrom
;
1479 UConverterToUCallback junkTo
;
1480 UConverter
*conv1
, *conv2
= NULL
;
1482 conv1
= ucnv_open("iso-8859-3", &err
);
1484 if(U_FAILURE(err
)) {
1485 log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err
));
1489 log_verbose("Opened conv1=%p\n", conv1
);
1494 TSCC_print_log(&from1
, "from1");
1495 TSCC_print_log(&to1
, "to1");
1497 ucnv_setFromUCallBack(conv1
, TSCC_fromU
, &from1
, NULL
, NULL
, &err
);
1498 log_verbose("Set from1 on conv1\n");
1499 TSCC_print_log(&from1
, "from1");
1501 ucnv_setToUCallBack(conv1
, TSCC_toU
, &to1
, NULL
, NULL
, &err
);
1502 log_verbose("Set to1 on conv1\n");
1503 TSCC_print_log(&to1
, "to1");
1505 conv2
= ucnv_safeClone(conv1
, hunk
, &hunkSize
, &err
);
1506 if(U_FAILURE(err
)) {
1507 log_err("safeClone failed: %s\n", u_errorName(err
));
1510 log_verbose("Cloned to conv2=%p.\n", conv2
);
1512 /********** from *********************/
1513 ucnv_getFromUCallBack(conv2
, &junkFrom
, (const void**)pfrom2
);
1514 ucnv_getFromUCallBack(conv1
, &junkFrom
, (const void**)pfrom3
);
1516 TSCC_print_log(from2
, "from2");
1517 TSCC_print_log(from3
, "from3(==from1)");
1520 log_err("FAIL! from2 is null \n");
1525 log_err("FAIL! from3 is null \n");
1529 if(from3
!= (&from1
) ) {
1530 log_err("FAIL! conv1's FROM context changed!\n");
1533 if(from2
== (&from1
) ) {
1534 log_err("FAIL! conv1's FROM context is the same as conv2's!\n");
1537 if(from1
.wasClosed
) {
1538 log_err("FAIL! from1 is closed \n");
1541 if(from2
->wasClosed
) {
1542 log_err("FAIL! from2 was closed\n");
1545 /********** to *********************/
1546 ucnv_getToUCallBack(conv2
, &junkTo
, (const void**)pto2
);
1547 ucnv_getToUCallBack(conv1
, &junkTo
, (const void**)pto3
);
1549 TSCC_print_log(to2
, "to2");
1550 TSCC_print_log(to3
, "to3(==to1)");
1553 log_err("FAIL! to2 is null \n");
1558 log_err("FAIL! to3 is null \n");
1562 if(to3
!= (&to1
) ) {
1563 log_err("FAIL! conv1's TO context changed!\n");
1566 if(to2
== (&to1
) ) {
1567 log_err("FAIL! conv1's TO context is the same as conv2's!\n");
1571 log_err("FAIL! to1 is closed \n");
1574 if(to2
->wasClosed
) {
1575 log_err("FAIL! to2 was closed\n");
1578 /*************************************/
1581 log_verbose("ucnv_closed (conv1)\n");
1582 TSCC_print_log(&from1
, "from1");
1583 TSCC_print_log(from2
, "from2");
1584 TSCC_print_log(&to1
, "to1");
1585 TSCC_print_log(to2
, "to2");
1587 if(from1
.wasClosed
== FALSE
) {
1588 log_err("FAIL! from1 is NOT closed \n");
1591 if(from2
->wasClosed
) {
1592 log_err("FAIL! from2 was closed\n");
1595 if(to1
.wasClosed
== FALSE
) {
1596 log_err("FAIL! to1 is NOT closed \n");
1599 if(to2
->wasClosed
) {
1600 log_err("FAIL! to2 was closed\n");
1604 log_verbose("ucnv_closed (conv2)\n");
1606 TSCC_print_log(&from1
, "from1");
1607 TSCC_print_log(from2
, "from2");
1609 if(from1
.wasClosed
== FALSE
) {
1610 log_err("FAIL! from1 is NOT closed \n");
1613 if(from2
->wasClosed
== FALSE
) {
1614 log_err("FAIL! from2 was NOT closed\n");
1617 TSCC_print_log(&to1
, "to1");
1618 TSCC_print_log(to2
, "to2");
1620 if(to1
.wasClosed
== FALSE
) {
1621 log_err("FAIL! to1 is NOT closed \n");
1624 if(to2
->wasClosed
== FALSE
) {
1625 log_err("FAIL! to2 was NOT closed\n");
1629 free(to2
); /* to1 is stack based */
1631 if(from2
!= (&from1
)) {
1632 free(from2
); /* from1 is stack based */
1638 containsAnyOtherByte(uint8_t *p
, int32_t length
, uint8_t b
) {
1649 static void TestConvertSafeClone()
1651 /* one 'regular' & all the 'private stateful' converters */
1652 static const char *const names
[] = {
1653 #if !UCONFIG_NO_LEGACY_CONVERSION
1655 "ISO_2022,locale=zh,version=1",
1658 #if !UCONFIG_NO_LEGACY_CONVERSION
1662 "ISO_2022,locale=kr,version=1",
1663 "ISO_2022,locale=jp,version=2",
1667 #if !UCONFIG_NO_LEGACY_CONVERSION
1668 "IMAP-mailbox-name",
1675 /* store the actual sizes of each converter */
1676 int32_t actualSizes
[LENGTHOF(names
)];
1678 static const int32_t bufferSizes
[] = {
1679 U_CNV_SAFECLONE_BUFFERSIZE
,
1680 (int32_t)(3*sizeof(UConverter
))/2, /* 1.5*sizeof(UConverter) */
1681 (int32_t)sizeof(UConverter
)/2 /* 0.5*sizeof(UConverter) */
1684 char charBuffer
[21]; /* Leave at an odd number for alignment testing */
1685 uint8_t buffer
[3] [U_CNV_SAFECLONE_BUFFERSIZE
];
1686 int32_t bufferSize
, maxBufferSize
;
1687 const char *maxName
;
1688 UConverter
* cnv
, *cnv2
;
1692 const char *pConstCharBuffer
;
1693 const char *charBufferLimit
= charBuffer
+ sizeof(charBuffer
)/sizeof(*charBuffer
);
1694 UChar uniBuffer
[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */
1695 UChar uniCharBuffer
[20];
1696 char charSourceBuffer
[] = { 0x1b, 0x24, 0x42 };
1697 const char *pCharSource
= charSourceBuffer
;
1698 const char *pCharSourceLimit
= charSourceBuffer
+ sizeof(charSourceBuffer
);
1699 UChar
*pUCharTarget
= uniCharBuffer
;
1700 UChar
*pUCharTargetLimit
= uniCharBuffer
+ sizeof(uniCharBuffer
)/sizeof(*uniCharBuffer
);
1701 const UChar
* pUniBuffer
;
1702 const UChar
*uniBufferLimit
= uniBuffer
+ sizeof(uniBuffer
)/sizeof(*uniBuffer
);
1706 cnv
= ucnv_open(names
[0], &err
);
1707 if(U_SUCCESS(err
)) {
1708 /* Check the various error & informational states: */
1710 /* Null status - just returns NULL */
1711 bufferSize
= U_CNV_SAFECLONE_BUFFERSIZE
;
1712 if (0 != ucnv_safeClone(cnv
, buffer
[0], &bufferSize
, 0))
1714 log_err("FAIL: Cloned converter failed to deal correctly with null status\n");
1716 /* error status - should return 0 & keep error the same */
1717 err
= U_MEMORY_ALLOCATION_ERROR
;
1718 if (0 != ucnv_safeClone(cnv
, buffer
[0], &bufferSize
, &err
) || err
!= U_MEMORY_ALLOCATION_ERROR
)
1720 log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n");
1724 /* Null buffer size pointer - just returns NULL & set error to U_ILLEGAL_ARGUMENT_ERROR*/
1725 if (0 != ucnv_safeClone(cnv
, buffer
[0], 0, &err
) || err
!= U_ILLEGAL_ARGUMENT_ERROR
)
1727 log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n");
1731 /* buffer size pointer is 0 - fill in pbufferSize with a size */
1733 if (0 != ucnv_safeClone(cnv
, buffer
[0], &bufferSize
, &err
) || U_FAILURE(err
) || bufferSize
<= 0)
1735 log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n");
1737 /* Verify our define is large enough */
1738 if (U_CNV_SAFECLONE_BUFFERSIZE
< bufferSize
)
1740 log_err("FAIL: Pre-calculated buffer size is too small\n");
1742 /* Verify we can use this run-time calculated size */
1743 if (0 == (cnv2
= ucnv_safeClone(cnv
, buffer
[0], &bufferSize
, &err
)) || U_FAILURE(err
))
1745 log_err("FAIL: Converter can't be cloned with run-time size\n");
1751 /* size one byte too small - should allocate & let us know */
1753 if (0 == (cnv2
= ucnv_safeClone(cnv
, 0, &bufferSize
, &err
)) || err
!= U_SAFECLONE_ALLOCATED_WARNING
)
1755 log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n");
1762 bufferSize
= U_CNV_SAFECLONE_BUFFERSIZE
;
1764 /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */
1765 if (0 == (cnv2
= ucnv_safeClone(cnv
, 0, &bufferSize
, &err
)) || err
!= U_SAFECLONE_ALLOCATED_WARNING
)
1767 log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n");
1775 /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */
1776 if (0 != ucnv_safeClone(0, buffer
[0], &bufferSize
, &err
) || err
!= U_ILLEGAL_ARGUMENT_ERROR
)
1778 log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n");
1787 /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/
1789 for(j
= 0; j
< LENGTHOF(bufferSizes
); ++j
) {
1790 for (index
= 0; index
< LENGTHOF(names
); index
++)
1793 cnv
= ucnv_open(names
[index
], &err
);
1794 if(U_FAILURE(err
)) {
1795 log_data_err("ucnv_open(\"%s\") failed - %s\n", names
[index
], u_errorName(err
));
1800 /* preflight to get maxBufferSize */
1801 actualSizes
[index
] = 0;
1802 ucnv_safeClone(cnv
, NULL
, &actualSizes
[index
], &err
);
1803 if(actualSizes
[index
] > maxBufferSize
) {
1804 maxBufferSize
= actualSizes
[index
];
1805 maxName
= names
[index
];
1809 memset(buffer
, 0xaa, sizeof(buffer
));
1811 bufferSize
= bufferSizes
[j
];
1812 cnv2
= ucnv_safeClone(cnv
, buffer
[1], &bufferSize
, &err
);
1814 /* close the original immediately to make sure that the clone works by itself */
1817 if( actualSizes
[index
] <= (bufferSizes
[j
] - (int32_t)sizeof(UAlignedMemory
)) &&
1818 err
== U_SAFECLONE_ALLOCATED_WARNING
1820 log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names
[index
]);
1823 /* check if the clone function overwrote any bytes that it is not supposed to touch */
1824 if(bufferSize
<= bufferSizes
[j
]) {
1825 /* used the stack buffer */
1826 if( containsAnyOtherByte(buffer
[0], (int32_t)sizeof(buffer
[0]), 0xaa) ||
1827 containsAnyOtherByte(buffer
[1]+bufferSize
, (int32_t)(sizeof(buffer
)-(sizeof(buffer
[0])+bufferSize
)), 0xaa)
1829 log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n",
1830 names
[index
], bufferSize
, bufferSizes
[j
]);
1833 /* heap-allocated the clone */
1834 if(containsAnyOtherByte(buffer
[0], (int32_t)sizeof(buffer
), 0xaa)) {
1835 log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n",
1836 names
[index
], bufferSize
, bufferSizes
[j
]);
1840 pCharBuffer
= charBuffer
;
1841 pUniBuffer
= uniBuffer
;
1843 ucnv_fromUnicode(cnv2
,
1852 log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err
));
1854 ucnv_toUnicode(cnv2
,
1865 log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err
));
1868 pConstCharBuffer
= charBuffer
;
1869 if (uniBuffer
[0] != ucnv_getNextUChar(cnv2
, &pConstCharBuffer
, pCharBuffer
, &err
))
1871 log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err
));
1877 log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1878 sizeof(UConverter
), maxBufferSize
, maxName
, (int)U_CNV_SAFECLONE_BUFFERSIZE
);
1879 if(maxBufferSize
> U_CNV_SAFECLONE_BUFFERSIZE
) {
1880 log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1881 maxBufferSize
, maxName
, (int)U_CNV_SAFECLONE_BUFFERSIZE
);
1885 static void TestCCSID() {
1886 #if !UCONFIG_NO_LEGACY_CONVERSION
1888 UErrorCode errorCode
;
1889 int32_t ccsids
[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 };
1892 for(i
=0; i
<(int32_t)(sizeof(ccsids
)/sizeof(int32_t)); ++i
) {
1895 errorCode
=U_ZERO_ERROR
;
1896 cnv
=ucnv_openCCSID(ccsid
, UCNV_IBM
, &errorCode
);
1897 if(U_FAILURE(errorCode
)) {
1898 log_data_err("error: ucnv_openCCSID(%ld) failed (%s)\n", ccsid
, u_errorName(errorCode
));
1902 if(ccsid
!=ucnv_getCCSID(cnv
, &errorCode
)) {
1903 log_err("error: ucnv_getCCSID(ucnv_openCCSID(%ld))=%ld\n", ccsid
, ucnv_getCCSID(cnv
, &errorCode
));
1906 /* skip gb18030(ccsid 1392) */
1907 if(ccsid
!= 1392 && UCNV_IBM
!=ucnv_getPlatform(cnv
, &errorCode
)) {
1908 log_err("error: ucnv_getPlatform(ucnv_openCCSID(%ld))=%ld!=UCNV_IBM\n", ccsid
, ucnv_getPlatform(cnv
, &errorCode
));
1916 /* jitterbug 932: ucnv_convert() bugs --------------------------------------- */
1918 /* CHUNK_SIZE defined in common\ucnv.c: */
1919 #define CHUNK_SIZE 1024
1921 static void bug1(void);
1922 static void bug2(void);
1923 static void bug3(void);
1928 bug1(); /* Unicode intermediate buffer straddle bug */
1929 bug2(); /* pre-flighting size incorrect caused by simple overflow */
1930 bug3(); /* pre-flighting size incorrect caused by expansion overflow */
1934 * jitterbug 932: test chunking boundary conditions in
1936 int32_t ucnv_convert(const char *toConverterName,
1937 const char *fromConverterName,
1944 * See discussions on the icu mailing list in
1945 * 2001-April with the subject "converter 'flush' question".
1947 * Bug report and test code provided by Edward J. Batutis.
1951 #if !UCONFIG_NO_LEGACY_CONVERSION
1952 char char_in
[CHUNK_SIZE
+32];
1953 char char_out
[CHUNK_SIZE
*2];
1955 /* GB 18030 equivalent of U+10000 is 90308130 */
1956 static const char test_seq
[]={ (char)0x90u
, 0x30, (char)0x81u
, 0x30 };
1958 UErrorCode err
= U_ZERO_ERROR
;
1959 int32_t i
, test_seq_len
= sizeof(test_seq
);
1962 * causes straddle bug in Unicode intermediate buffer by sliding the test sequence forward
1963 * until the straddle bug appears. I didn't want to hard-code everything so this test could
1964 * be expanded - however this is the only type of straddle bug I can think of at the moment -
1965 * a high surrogate in the last position of the Unicode intermediate buffer. Apparently no
1966 * other Unicode sequences cause a bug since combining sequences are not supported by the
1970 for (i
= test_seq_len
; i
>= 0; i
--) {
1971 /* put character sequence into input buffer */
1972 memset(char_in
, 0x61, sizeof(char_in
)); /* GB 18030 'a' */
1973 memcpy(char_in
+ (CHUNK_SIZE
- i
), test_seq
, test_seq_len
);
1975 /* do the conversion */
1976 ucnv_convert("us-ascii", /* out */
1985 if (err
== U_TRUNCATED_CHAR_FOUND
) {
1986 /* this happens when surrogate pair straddles the intermediate buffer in
1987 * T_UConverter_fromCodepageToCodepage */
1988 log_err("error j932 bug 1: expected success, got U_TRUNCATED_CHAR_FOUND\n");
1994 /* bug2: pre-flighting loop bug: simple overflow causes bug */
1997 /* US-ASCII "1234567890" */
1998 static const char source
[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 };
1999 static const char sourceUTF8
[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (char)0xef, (char)0x80, (char)0x80 };
2000 static const char sourceUTF32
[]={ 0x00, 0x00, 0x00, 0x30,
2001 0x00, 0x00, 0x00, 0x31,
2002 0x00, 0x00, 0x00, 0x32,
2003 0x00, 0x00, 0x00, 0x33,
2004 0x00, 0x00, 0x00, 0x34,
2005 0x00, 0x00, 0x00, 0x35,
2006 0x00, 0x00, 0x00, 0x36,
2007 0x00, 0x00, 0x00, 0x37,
2008 0x00, 0x00, 0x00, 0x38,
2009 0x00, 0x00, (char)0xf0, 0x00};
2010 static char target
[5];
2012 UErrorCode err
= U_ZERO_ERROR
;
2015 /* do the conversion */
2016 size
= ucnv_convert("iso-8859-1", /* out */
2017 "us-ascii", /* in */
2025 /* bug2: size is 5, should be 10 */
2026 log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting size %d instead of 10\n", size
);
2030 /* do the conversion */
2031 size
= ucnv_convert("UTF-32BE", /* out */
2040 /* bug2: size is 5, should be 32 */
2041 log_err("error j932 bug 2 UTF-8->UTF-32BE: got preflighting size %d instead of 32\n", size
);
2045 /* do the conversion */
2046 size
= ucnv_convert("UTF-8", /* out */
2047 "UTF-32BE", /* in */
2051 sizeof(sourceUTF32
),
2055 /* bug2: size is 5, should be 12 */
2056 log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size
);
2061 * bug3: when the characters expand going from source to target codepage
2062 * you get bug3 in addition to bug2
2066 #if !UCONFIG_NO_LEGACY_CONVERSION
2067 char char_in
[CHUNK_SIZE
*4];
2069 UErrorCode err
= U_ZERO_ERROR
;
2073 * first get the buggy size from bug2 then
2074 * compare it to buggy size with an expansion
2076 memset(char_in
, 0x61, sizeof(char_in
)); /* US-ASCII 'a' */
2078 /* do the conversion */
2079 size
= ucnv_convert("lmbcs", /* out */
2080 "us-ascii", /* in */
2087 if ( size
!= sizeof(char_in
) ) {
2089 * bug2: size is 0x2805 (CHUNK_SIZE*2+5 - maybe 5 is the size of the overflow buffer
2090 * in the converter?), should be CHUNK_SIZE*4
2092 * Markus 2001-05-18: 5 is the size of our target[] here, ucnv_convert() did not reset targetSize...
2094 log_data_err("error j932 bug 2/3a: expected preflighting size 0x%04x, got 0x%04x\n", sizeof(char_in
), size
);
2098 * now do the conversion with expansion
2099 * ascii 0x08 expands to 0x0F 0x28 in lmbcs
2101 memset(char_in
, 8, sizeof(char_in
));
2104 /* do the conversion */
2105 size
= ucnv_convert("lmbcs", /* out */
2106 "us-ascii", /* in */
2113 /* expect 2X expansion */
2114 if ( size
!= sizeof(char_in
) * 2 ) {
2117 * bug2 would lead us to expect 0x2805, but it isn't that either, it is 0x3c05:
2119 log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in
) * 2, size
);
2125 convertExStreaming(UConverter
*srcCnv
, UConverter
*targetCnv
,
2126 const char *src
, int32_t srcLength
,
2127 const char *expectTarget
, int32_t expectTargetLength
,
2129 const char *testName
,
2130 UErrorCode expectCode
) {
2131 UChar pivotBuffer
[CHUNK_SIZE
];
2132 UChar
*pivotSource
, *pivotTarget
;
2133 const UChar
*pivotLimit
;
2135 char targetBuffer
[CHUNK_SIZE
];
2137 const char *srcLimit
, *finalSrcLimit
, *targetLimit
;
2139 int32_t targetLength
;
2143 UErrorCode errorCode
;
2146 if(chunkSize
>CHUNK_SIZE
) {
2147 chunkSize
=CHUNK_SIZE
;
2150 pivotSource
=pivotTarget
=pivotBuffer
;
2151 pivotLimit
=pivotBuffer
+chunkSize
;
2153 finalSrcLimit
=src
+srcLength
;
2154 target
=targetBuffer
;
2155 targetLimit
=targetBuffer
+chunkSize
;
2157 ucnv_resetToUnicode(srcCnv
);
2158 ucnv_resetFromUnicode(targetCnv
);
2160 errorCode
=U_ZERO_ERROR
;
2163 /* convert, streaming-style (both converters and pivot keep state) */
2165 /* for testing, give ucnv_convertEx() at most <chunkSize> input/pivot/output units at a time */
2166 if(src
+chunkSize
<=finalSrcLimit
) {
2167 srcLimit
=src
+chunkSize
;
2169 srcLimit
=finalSrcLimit
;
2171 ucnv_convertEx(targetCnv
, srcCnv
,
2172 &target
, targetLimit
,
2174 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotLimit
,
2175 FALSE
, flush
, &errorCode
);
2176 targetLength
=(int32_t)(target
-targetBuffer
);
2177 if(target
>targetLimit
) {
2178 log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n",
2179 testName
, chunkSize
, target
, targetLimit
);
2180 break; /* TODO: major problem! */
2182 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2183 /* continue converting another chunk */
2184 errorCode
=U_ZERO_ERROR
;
2185 if(targetLength
+chunkSize
<=sizeof(targetBuffer
)) {
2186 targetLimit
=target
+chunkSize
;
2188 targetLimit
=targetBuffer
+sizeof(targetBuffer
);
2190 } else if(U_FAILURE(errorCode
)) {
2196 } else if(src
==finalSrcLimit
&& pivotSource
==pivotTarget
) {
2197 /* all consumed, now flush without input (separate from conversion for testing) */
2202 if(!(errorCode
==expectCode
|| (expectCode
==U_ZERO_ERROR
&& errorCode
==U_STRING_NOT_TERMINATED_WARNING
))) {
2203 log_err("ucnv_convertEx(%s) chunk[%d] results in %s instead of %s\n",
2204 testName
, chunkSize
, u_errorName(errorCode
), u_errorName(expectCode
));
2205 } else if(targetLength
!=expectTargetLength
) {
2206 log_err("ucnv_convertEx(%s) chunk[%d] writes %d bytes instead of %d\n",
2207 testName
, chunkSize
, targetLength
, expectTargetLength
);
2208 } else if(memcmp(targetBuffer
, expectTarget
, targetLength
)!=0) {
2209 log_err("ucnv_convertEx(%s) chunk[%d] writes different bytes than expected\n",
2210 testName
, chunkSize
);
2215 convertExMultiStreaming(UConverter
*srcCnv
, UConverter
*targetCnv
,
2216 const char *src
, int32_t srcLength
,
2217 const char *expectTarget
, int32_t expectTargetLength
,
2218 const char *testName
,
2219 UErrorCode expectCode
) {
2220 convertExStreaming(srcCnv
, targetCnv
,
2222 expectTarget
, expectTargetLength
,
2223 1, testName
, expectCode
);
2224 convertExStreaming(srcCnv
, targetCnv
,
2226 expectTarget
, expectTargetLength
,
2227 3, testName
, expectCode
);
2228 convertExStreaming(srcCnv
, targetCnv
,
2230 expectTarget
, expectTargetLength
,
2231 7, testName
, expectCode
);
2234 static void TestConvertEx() {
2235 #if !UCONFIG_NO_LEGACY_CONVERSION
2236 static const uint8_t
2238 /* 4e00 30a1 ff61 0410 */
2239 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2242 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2246 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2247 * SUB, SUB, 0x40, SUB, SUB, 0x40
2249 0xfc, 0xfc, 0xfc, 0xfc, 0x40, 0xfc, 0xfc, 0xfc, 0xfc, 0x40
2252 char srcBuffer
[100], targetBuffer
[100];
2257 UChar pivotBuffer
[100];
2258 UChar
*pivotSource
, *pivotTarget
;
2260 UConverter
*cnv1
, *cnv2
;
2261 UErrorCode errorCode
;
2263 errorCode
=U_ZERO_ERROR
;
2264 cnv1
=ucnv_open("UTF-8", &errorCode
);
2265 if(U_FAILURE(errorCode
)) {
2266 log_err("unable to open a UTF-8 converter - %s\n", u_errorName(errorCode
));
2270 cnv2
=ucnv_open("Shift-JIS", &errorCode
);
2271 if(U_FAILURE(errorCode
)) {
2272 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode
));
2277 /* test ucnv_convertEx() with streaming conversion style */
2278 convertExMultiStreaming(cnv1
, cnv2
,
2279 (const char *)utf8
, sizeof(utf8
), (const char *)shiftJIS
, sizeof(shiftJIS
),
2280 "UTF-8 -> Shift-JIS", U_ZERO_ERROR
);
2282 convertExMultiStreaming(cnv2
, cnv1
,
2283 (const char *)shiftJIS
, sizeof(shiftJIS
), (const char *)utf8
, sizeof(utf8
),
2284 "Shift-JIS -> UTF-8", U_ZERO_ERROR
);
2286 /* U_ZERO_ERROR because by default the SUB callbacks are set */
2287 convertExMultiStreaming(cnv1
, cnv2
,
2288 (const char *)shiftJIS
, sizeof(shiftJIS
), (const char *)errorTarget
, sizeof(errorTarget
),
2289 "shiftJIS[] UTF-8 -> Shift-JIS", U_ZERO_ERROR
);
2291 /* test some simple conversions */
2293 /* NUL-terminated source and target */
2294 errorCode
=U_STRING_NOT_TERMINATED_WARNING
;
2295 memcpy(srcBuffer
, utf8
, sizeof(utf8
));
2296 srcBuffer
[sizeof(utf8
)]=0;
2298 target
=targetBuffer
;
2299 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2300 NULL
, NULL
, NULL
, NULL
, TRUE
, TRUE
, &errorCode
);
2301 if( errorCode
!=U_ZERO_ERROR
||
2302 target
-targetBuffer
!=sizeof(shiftJIS
) ||
2304 memcmp(targetBuffer
, shiftJIS
, sizeof(shiftJIS
))!=0
2306 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s - writes %d bytes, expect %d\n",
2307 u_errorName(errorCode
), target
-targetBuffer
, sizeof(shiftJIS
));
2310 /* NUL-terminated source and U_STRING_NOT_TERMINATED_WARNING */
2311 errorCode
=U_AMBIGUOUS_ALIAS_WARNING
;
2312 memset(targetBuffer
, 0xff, sizeof(targetBuffer
));
2314 target
=targetBuffer
;
2315 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(shiftJIS
), &src
, NULL
,
2316 NULL
, NULL
, NULL
, NULL
, TRUE
, TRUE
, &errorCode
);
2317 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
||
2318 target
-targetBuffer
!=sizeof(shiftJIS
) ||
2319 *target
!=(char)0xff ||
2320 memcmp(targetBuffer
, shiftJIS
, sizeof(shiftJIS
))!=0
2322 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s, expect U_STRING_NOT_TERMINATED_WARNING - writes %d bytes, expect %d\n",
2323 u_errorName(errorCode
), target
-targetBuffer
, sizeof(shiftJIS
));
2327 errorCode
=U_MESSAGE_PARSE_ERROR
;
2329 target
=targetBuffer
;
2330 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2331 NULL
, NULL
, NULL
, NULL
, TRUE
, TRUE
, &errorCode
);
2332 if(errorCode
!=U_MESSAGE_PARSE_ERROR
) {
2333 log_err("ucnv_convertEx(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode
));
2336 /* pivotLimit==pivotStart */
2337 errorCode
=U_ZERO_ERROR
;
2338 pivotSource
=pivotTarget
=pivotBuffer
;
2339 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2340 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
, TRUE
, TRUE
, &errorCode
);
2341 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2342 log_err("ucnv_convertEx(pivotLimit==pivotStart) sets %s\n", u_errorName(errorCode
));
2345 /* *pivotSource==NULL */
2346 errorCode
=U_ZERO_ERROR
;
2348 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2349 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
+1, TRUE
, TRUE
, &errorCode
);
2350 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2351 log_err("ucnv_convertEx(*pivotSource==NULL) sets %s\n", u_errorName(errorCode
));
2355 errorCode
=U_ZERO_ERROR
;
2357 pivotSource
=pivotBuffer
;
2358 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2359 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
+1, TRUE
, TRUE
, &errorCode
);
2360 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2361 log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode
));
2364 /* streaming conversion without a pivot buffer */
2365 errorCode
=U_ZERO_ERROR
;
2367 pivotSource
=pivotBuffer
;
2368 ucnv_convertEx(cnv2
, cnv1
, &target
, targetBuffer
+sizeof(targetBuffer
), &src
, NULL
,
2369 NULL
, &pivotSource
, &pivotTarget
, pivotBuffer
+1, TRUE
, FALSE
, &errorCode
);
2370 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2371 log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode
));
2379 /* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */
2380 static const char *const badUTF8
[]={
2384 /* truncated multi-byte sequences */
2421 "\xfc\x80\x80\x80\x80",
2423 /* complete sequences but non-shortest forms or out of range etc. */
2429 "\xf8\x80\x80\x80\x80",
2430 "\xfc\x80\x80\x80\x80\x80",
2435 #define ARG_CHAR_ARR_SIZE 8
2437 /* get some character that can be converted and convert it */
2438 static UBool
getTestChar(UConverter
*cnv
, const char *converterName
,
2439 char charUTF8
[4], int32_t *pCharUTF8Length
,
2440 char char0
[ARG_CHAR_ARR_SIZE
], int32_t *pChar0Length
,
2441 char char1
[ARG_CHAR_ARR_SIZE
], int32_t *pChar1Length
) {
2442 UChar utf16
[U16_MAX_LENGTH
];
2443 int32_t utf16Length
;
2445 const UChar
*utf16Source
;
2450 UErrorCode errorCode
;
2452 errorCode
=U_ZERO_ERROR
;
2453 set
=uset_open(1, 0);
2454 ucnv_getUnicodeSet(cnv
, set
, UCNV_ROUNDTRIP_SET
, &errorCode
);
2455 c
=uset_charAt(set
, uset_size(set
)/2);
2459 U16_APPEND_UNSAFE(utf16
, utf16Length
, c
);
2461 U8_APPEND_UNSAFE(charUTF8
, *pCharUTF8Length
, c
);
2465 ucnv_fromUnicode(cnv
,
2466 &target
, char0
+ARG_CHAR_ARR_SIZE
,
2467 &utf16Source
, utf16
+utf16Length
,
2468 NULL
, FALSE
, &errorCode
);
2469 *pChar0Length
=(int32_t)(target
-char0
);
2473 ucnv_fromUnicode(cnv
,
2474 &target
, char1
+ARG_CHAR_ARR_SIZE
,
2475 &utf16Source
, utf16
+utf16Length
,
2476 NULL
, FALSE
, &errorCode
);
2477 *pChar1Length
=(int32_t)(target
-char1
);
2479 if(U_FAILURE(errorCode
)) {
2480 log_err("unable to get test character for %s - %s\n", converterName
, u_errorName(errorCode
));
2486 static void testFromTruncatedUTF8(UConverter
*utf8Cnv
, UConverter
*cnv
, const char *converterName
,
2487 char charUTF8
[4], int32_t charUTF8Length
,
2488 char char0
[8], int32_t char0Length
,
2489 char char1
[8], int32_t char1Length
) {
2494 int32_t outputLength
;
2496 char invalidChars
[8];
2497 int8_t invalidLength
;
2502 UChar pivotBuffer
[8];
2503 UChar
*pivotSource
, *pivotTarget
;
2505 UErrorCode errorCode
;
2508 /* test truncated sequences */
2509 errorCode
=U_ZERO_ERROR
;
2510 ucnv_setToUCallBack(utf8Cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2512 memcpy(utf8
, charUTF8
, charUTF8Length
);
2514 for(i
=0; i
<LENGTHOF(badUTF8
); ++i
) {
2515 /* truncated sequence? */
2516 int32_t length
=strlen(badUTF8
[i
]);
2517 if(length
>=(1+U8_COUNT_TRAIL_BYTES(badUTF8
[i
][0]))) {
2521 /* assemble a string with the test character and the truncated sequence */
2522 memcpy(utf8
+charUTF8Length
, badUTF8
[i
], length
);
2523 utf8Length
=charUTF8Length
+length
;
2525 /* convert and check the invalidChars */
2528 pivotSource
=pivotTarget
=pivotBuffer
;
2529 errorCode
=U_ZERO_ERROR
;
2530 ucnv_convertEx(cnv
, utf8Cnv
,
2531 &target
, output
+sizeof(output
),
2532 &source
, utf8
+utf8Length
,
2533 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
+LENGTHOF(pivotBuffer
),
2534 TRUE
, TRUE
, /* reset & flush */
2536 outputLength
=(int32_t)(target
-output
);
2537 if(errorCode
!=U_TRUNCATED_CHAR_FOUND
|| pivotSource
!=pivotBuffer
) {
2538 log_err("unexpected error %s from %s badUTF8[%ld]\n", u_errorName(errorCode
), converterName
, (long)i
);
2542 errorCode
=U_ZERO_ERROR
;
2543 invalidLength
=(int8_t)sizeof(invalidChars
);
2544 ucnv_getInvalidChars(utf8Cnv
, invalidChars
, &invalidLength
, &errorCode
);
2545 if(invalidLength
!=length
|| 0!=memcmp(invalidChars
, badUTF8
[i
], length
)) {
2546 log_err("wrong invalidChars from %s badUTF8[%ld]\n", converterName
, (long)i
);
2551 static void testFromBadUTF8(UConverter
*utf8Cnv
, UConverter
*cnv
, const char *converterName
,
2552 char charUTF8
[4], int32_t charUTF8Length
,
2553 char char0
[8], int32_t char0Length
,
2554 char char1
[8], int32_t char1Length
) {
2555 char utf8
[600], expect
[600];
2556 int32_t utf8Length
, expectLength
;
2560 UErrorCode errorCode
;
2563 errorCode
=U_ZERO_ERROR
;
2564 ucnv_setToUCallBack(utf8Cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, NULL
, NULL
, &errorCode
);
2567 * assemble an input string with the test character between each
2569 * and an expected string with repeated test character output
2571 memcpy(utf8
, charUTF8
, charUTF8Length
);
2572 utf8Length
=charUTF8Length
;
2574 memcpy(expect
, char0
, char0Length
);
2575 expectLength
=char0Length
;
2577 for(i
=0; i
<LENGTHOF(badUTF8
); ++i
) {
2578 int32_t length
=strlen(badUTF8
[i
]);
2579 memcpy(utf8
+utf8Length
, badUTF8
[i
], length
);
2582 memcpy(utf8
+utf8Length
, charUTF8
, charUTF8Length
);
2583 utf8Length
+=charUTF8Length
;
2585 memcpy(expect
+expectLength
, char1
, char1Length
);
2586 expectLength
+=char1Length
;
2589 /* expect that each bad UTF-8 sequence is detected and skipped */
2590 strcpy(testName
, "from bad UTF-8 to ");
2591 strcat(testName
, converterName
);
2593 convertExMultiStreaming(utf8Cnv
, cnv
,
2595 expect
, expectLength
,
2600 /* Test illegal UTF-8 input. */
2601 static void TestConvertExFromUTF8() {
2602 static const char *const converterNames
[]={
2603 #if !UCONFIG_NO_LEGACY_CONVERSION
2612 UConverter
*utf8Cnv
, *cnv
;
2613 UErrorCode errorCode
;
2616 /* fromUnicode versions of some character, from initial state and later */
2617 char charUTF8
[4], char0
[8], char1
[8];
2618 int32_t charUTF8Length
, char0Length
, char1Length
;
2620 errorCode
=U_ZERO_ERROR
;
2621 utf8Cnv
=ucnv_open("UTF-8", &errorCode
);
2622 if(U_FAILURE(errorCode
)) {
2623 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode
));
2627 for(i
=0; i
<LENGTHOF(converterNames
); ++i
) {
2628 errorCode
=U_ZERO_ERROR
;
2629 cnv
=ucnv_open(converterNames
[i
], &errorCode
);
2630 if(U_FAILURE(errorCode
)) {
2631 log_data_err("unable to open %s converter - %s\n", converterNames
[i
], u_errorName(errorCode
));
2634 if(!getTestChar(cnv
, converterNames
[i
], charUTF8
, &charUTF8Length
, char0
, &char0Length
, char1
, &char1Length
)) {
2637 testFromTruncatedUTF8(utf8Cnv
, cnv
, converterNames
[i
], charUTF8
, charUTF8Length
, char0
, char0Length
, char1
, char1Length
);
2638 testFromBadUTF8(utf8Cnv
, cnv
, converterNames
[i
], charUTF8
, charUTF8Length
, char0
, char0Length
, char1
, char1Length
);
2641 ucnv_close(utf8Cnv
);
2644 static void TestConvertExFromUTF8_C5F0() {
2645 static const char *const converterNames
[]={
2646 #if !UCONFIG_NO_LEGACY_CONVERSION
2655 UConverter
*utf8Cnv
, *cnv
;
2656 UErrorCode errorCode
;
2659 static const char bad_utf8
[2]={ (char)0xC5, (char)0xF0 };
2660 /* Expect "��" (2x U+FFFD as decimal NCRs) */
2661 static const char twoNCRs
[16]={
2662 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B,
2663 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B
2665 static const char twoFFFD
[6]={
2666 (char)0xef, (char)0xbf, (char)0xbd,
2667 (char)0xef, (char)0xbf, (char)0xbd
2669 const char *expected
;
2670 int32_t expectedLength
;
2671 char dest
[20]; /* longer than longest expectedLength */
2676 UChar pivotBuffer
[128];
2677 UChar
*pivotSource
, *pivotTarget
;
2679 errorCode
=U_ZERO_ERROR
;
2680 utf8Cnv
=ucnv_open("UTF-8", &errorCode
);
2681 if(U_FAILURE(errorCode
)) {
2682 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode
));
2686 for(i
=0; i
<LENGTHOF(converterNames
); ++i
) {
2687 errorCode
=U_ZERO_ERROR
;
2688 cnv
=ucnv_open(converterNames
[i
], &errorCode
);
2689 ucnv_setFromUCallBack(cnv
, UCNV_FROM_U_CALLBACK_ESCAPE
, UCNV_ESCAPE_XML_DEC
,
2690 NULL
, NULL
, &errorCode
);
2691 if(U_FAILURE(errorCode
)) {
2692 log_data_err("unable to open %s converter - %s\n",
2693 converterNames
[i
], u_errorName(errorCode
));
2698 uprv_memset(dest
, 9, sizeof(dest
));
2699 if(i
==LENGTHOF(converterNames
)-1) {
2700 /* conversion to UTF-8 yields two U+FFFD directly */
2704 /* conversion to a non-Unicode charset yields two NCRs */
2711 pivotSource
=pivotTarget
=pivotBuffer
;
2714 &target
, dest
+expectedLength
,
2715 &src
, bad_utf8
+sizeof(bad_utf8
),
2716 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotBuffer
+LENGTHOF(pivotBuffer
),
2717 TRUE
, TRUE
, &errorCode
);
2718 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
|| src
!=bad_utf8
+2 ||
2719 target
!=dest
+expectedLength
|| 0!=uprv_memcmp(dest
, expected
, expectedLength
) ||
2720 dest
[expectedLength
]!=9
2722 log_err("ucnv_convertEx(UTF-8 C5 F0 -> %s/decimal NCRs) failed\n", converterNames
[i
]);
2726 ucnv_close(utf8Cnv
);
2730 TestConvertAlgorithmic() {
2731 #if !UCONFIG_NO_LEGACY_CONVERSION
2732 static const uint8_t
2734 /* 4e00 30a1 ff61 0410 */
2735 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2738 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2742 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2743 * SUB, SUB, 0x40, SUB, SUB, 0x40
2745 /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/
2748 0xfe, 0xff /* BOM only, no text */
2751 0xff, 0xfe, 0, 0 /* BOM only, no text */
2754 char target
[100], utf8NUL
[100], shiftJISNUL
[100];
2757 UErrorCode errorCode
;
2761 errorCode
=U_ZERO_ERROR
;
2762 cnv
=ucnv_open("Shift-JIS", &errorCode
);
2763 if(U_FAILURE(errorCode
)) {
2764 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode
));
2769 memcpy(utf8NUL
, utf8
, sizeof(utf8
));
2770 utf8NUL
[sizeof(utf8
)]=0;
2771 memcpy(shiftJISNUL
, shiftJIS
, sizeof(shiftJIS
));
2772 shiftJISNUL
[sizeof(shiftJIS
)]=0;
2775 * The to/from algorithmic convenience functions share a common implementation,
2776 * so we need not test all permutations of them.
2779 /* length in, not terminated out */
2780 errorCode
=U_ZERO_ERROR
;
2781 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF8
, target
, sizeof(shiftJIS
), (const char *)utf8
, sizeof(utf8
), &errorCode
);
2782 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
||
2783 length
!=sizeof(shiftJIS
) ||
2784 memcmp(target
, shiftJIS
, length
)!=0
2786 log_err("ucnv_fromAlgorithmic(UTF-8 -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect %d\n",
2787 u_errorName(errorCode
), length
, sizeof(shiftJIS
));
2790 /* terminated in and out */
2791 memset(target
, 0x55, sizeof(target
));
2792 errorCode
=U_STRING_NOT_TERMINATED_WARNING
;
2793 length
=ucnv_toAlgorithmic(UCNV_UTF8
, cnv
, target
, sizeof(target
), shiftJISNUL
, -1, &errorCode
);
2794 if( errorCode
!=U_ZERO_ERROR
||
2795 length
!=sizeof(utf8
) ||
2796 memcmp(target
, utf8
, length
)!=0
2798 log_err("ucnv_toAlgorithmic(Shift-JIS -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect %d\n",
2799 u_errorName(errorCode
), length
, sizeof(shiftJIS
));
2802 /* empty string, some target buffer */
2803 errorCode
=U_STRING_NOT_TERMINATED_WARNING
;
2804 length
=ucnv_toAlgorithmic(UCNV_UTF8
, cnv
, target
, sizeof(target
), shiftJISNUL
, 0, &errorCode
);
2805 if( errorCode
!=U_ZERO_ERROR
||
2808 log_err("ucnv_toAlgorithmic(empty string -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect 0\n",
2809 u_errorName(errorCode
), length
);
2812 /* pseudo-empty string, no target buffer */
2813 errorCode
=U_ZERO_ERROR
;
2814 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF16
, target
, 0, (const char *)utf16
, 2, &errorCode
);
2815 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
||
2818 log_err("ucnv_fromAlgorithmic(UTF-16 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
2819 u_errorName(errorCode
), length
);
2822 errorCode
=U_ZERO_ERROR
;
2823 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF32
, target
, 0, (const char *)utf32
, 4, &errorCode
);
2824 if( errorCode
!=U_STRING_NOT_TERMINATED_WARNING
||
2827 log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
2828 u_errorName(errorCode
), length
);
2832 errorCode
=U_MESSAGE_PARSE_ERROR
;
2833 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF16
, target
, 0, (const char *)utf16
, 2, &errorCode
);
2834 if(errorCode
!=U_MESSAGE_PARSE_ERROR
) {
2835 log_err("ucnv_fromAlgorithmic(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode
));
2839 errorCode
=U_ZERO_ERROR
;
2840 length
=ucnv_fromAlgorithmic(cnv
, UCNV_UTF16
, target
, 0, NULL
, 2, &errorCode
);
2841 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2842 log_err("ucnv_fromAlgorithmic(source==NULL) sets %s\n", u_errorName(errorCode
));
2845 /* illegal alg. type */
2846 errorCode
=U_ZERO_ERROR
;
2847 length
=ucnv_fromAlgorithmic(cnv
, (UConverterType
)99, target
, 0, (const char *)utf16
, 2, &errorCode
);
2848 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
2849 log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode
));
2855 static void TestLMBCSMaxChar(void) {
2856 static const struct {
2860 /* some non-LMBCS converters - perfect test setup here */
2871 { 4, "IMAP-mailbox-name"},
2874 { 1, "windows-1256"},
2886 { 3, "ISO-2022-KR"},
2887 { 6, "ISO-2022-JP"},
2888 { 8, "ISO-2022-CN"},
2906 for (idx
= 0; idx
< LENGTHOF(converter
); idx
++) {
2907 UErrorCode status
= U_ZERO_ERROR
;
2908 UConverter
*cnv
= cnv_open(converter
[idx
].name
, &status
);
2909 if (U_FAILURE(status
)) {
2912 if (converter
[idx
].maxSize
!= ucnv_getMaxCharSize(cnv
)) {
2913 log_err("error: ucnv_getMaxCharSize(%s) expected %d, got %d\n",
2914 converter
[idx
].name
, converter
[idx
].maxSize
, ucnv_getMaxCharSize(cnv
));
2919 /* mostly test that the macro compiles */
2920 if(UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10) {
2921 log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n");
2926 static void TestJ1968(void) {
2927 UErrorCode err
= U_ZERO_ERROR
;
2929 char myConvName
[] = "My really really really really really really really really really really really"
2930 " really really really really really really really really really really really"
2931 " really really really really really really really really long converter name";
2932 UChar myConvNameU
[sizeof(myConvName
)];
2934 u_charsToUChars(myConvName
, myConvNameU
, sizeof(myConvName
));
2937 myConvNameU
[UCNV_MAX_CONVERTER_NAME_LENGTH
+1] = 0;
2938 cnv
= ucnv_openU(myConvNameU
, &err
);
2939 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2940 log_err("1U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
2944 myConvNameU
[UCNV_MAX_CONVERTER_NAME_LENGTH
] = 0;
2945 cnv
= ucnv_openU(myConvNameU
, &err
);
2946 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2947 log_err("2U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
2951 myConvNameU
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = 0;
2952 cnv
= ucnv_openU(myConvNameU
, &err
);
2953 if (cnv
|| err
!= U_FILE_ACCESS_ERROR
) {
2954 log_err("3U) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err
));
2961 cnv
= ucnv_open(myConvName
, &err
);
2962 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2963 log_err("1) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
2967 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
] = ',';
2968 cnv
= ucnv_open(myConvName
, &err
);
2969 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2970 log_err("2) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
2974 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = ',';
2975 cnv
= ucnv_open(myConvName
, &err
);
2976 if (cnv
|| err
!= U_FILE_ACCESS_ERROR
) {
2977 log_err("3) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err
));
2981 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = ',';
2982 strncpy(myConvName
+ UCNV_MAX_CONVERTER_NAME_LENGTH
, "locale=", 7);
2983 cnv
= ucnv_open(myConvName
, &err
);
2984 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2985 log_err("4) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
2988 /* The comma isn't really a part of the converter name. */
2990 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
] = 0;
2991 cnv
= ucnv_open(myConvName
, &err
);
2992 if (cnv
|| err
!= U_FILE_ACCESS_ERROR
) {
2993 log_err("5) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err
));
2997 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = ' ';
2998 cnv
= ucnv_open(myConvName
, &err
);
2999 if (cnv
|| err
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3000 log_err("6) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err
));
3004 myConvName
[UCNV_MAX_CONVERTER_NAME_LENGTH
-1] = 0;
3005 cnv
= ucnv_open(myConvName
, &err
);
3006 if (cnv
|| err
!= U_FILE_ACCESS_ERROR
) {
3007 log_err("7) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err
));
3012 #if !UCONFIG_NO_LEGACY_CONVERSION
3014 testSwap(const char *name
, UBool swap
) {
3016 * Test Unicode text.
3017 * Contains characters that are the highest for some of the
3018 * tested conversions, to make sure that the ucnvmbcs.c code that modifies the
3019 * tables copies the entire tables.
3021 static const UChar text
[]={
3022 0x61, 0xd, 0x62, 0xa, 0x4e00, 0x3000, 0xfffd, 0xa, 0x20, 0x85, 0xff5e, 0x7a
3025 UChar uNormal
[32], uSwapped
[32];
3026 char normal
[32], swapped
[32];
3030 int32_t i
, normalLength
, swappedLength
;
3034 const char *swappedName
;
3035 UConverter
*cnv
, *swapCnv
;
3036 UErrorCode errorCode
;
3038 /* if the swap flag is FALSE, then the test encoding is not EBCDIC and must not swap */
3040 /* open both the normal and the LF/NL-swapping converters */
3041 strcpy(swapped
, name
);
3042 strcat(swapped
, UCNV_SWAP_LFNL_OPTION_STRING
);
3044 errorCode
=U_ZERO_ERROR
;
3045 swapCnv
=ucnv_open(swapped
, &errorCode
);
3046 cnv
=ucnv_open(name
, &errorCode
);
3047 if(U_FAILURE(errorCode
)) {
3048 log_data_err("TestEBCDICSwapLFNL error: unable to open %s or %s (%s)\n", name
, swapped
, u_errorName(errorCode
));
3052 /* the name must contain the swap option if and only if we expect the converter to swap */
3053 swappedName
=ucnv_getName(swapCnv
, &errorCode
);
3054 if(U_FAILURE(errorCode
)) {
3055 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl) failed (%s)\n", name
, u_errorName(errorCode
));
3059 pc
=strstr(swappedName
, UCNV_SWAP_LFNL_OPTION_STRING
);
3060 if(swap
!= (pc
!=NULL
)) {
3061 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl)=%s should (%d) contain 'swaplfnl'\n", name
, swappedName
, swap
);
3065 /* convert to EBCDIC */
3068 ucnv_fromUnicode(cnv
, &pc
, normal
+LENGTHOF(normal
), &pcu
, text
+LENGTHOF(text
), NULL
, TRUE
, &errorCode
);
3069 normalLength
=(int32_t)(pc
-normal
);
3073 ucnv_fromUnicode(swapCnv
, &pc
, swapped
+LENGTHOF(swapped
), &pcu
, text
+LENGTHOF(text
), NULL
, TRUE
, &errorCode
);
3074 swappedLength
=(int32_t)(pc
-swapped
);
3076 if(U_FAILURE(errorCode
)) {
3077 log_err("TestEBCDICSwapLFNL error converting to %s - (%s)\n", name
, u_errorName(errorCode
));
3081 /* compare EBCDIC output */
3082 if(normalLength
!=swappedLength
) {
3083 log_err("TestEBCDICSwapLFNL error converting to %s - output lengths %d vs. %d\n", name
, normalLength
, swappedLength
);
3086 for(i
=0; i
<normalLength
; ++i
) {
3087 /* swap EBCDIC LF/NL for comparison */
3092 } else if(c
==0x25) {
3098 log_err("TestEBCDICSwapLFNL error converting to %s - did not swap properly, output[%d]=0x%02x\n", name
, i
, (uint8_t)swapped
[i
]);
3103 /* convert back to Unicode (may not roundtrip) */
3106 ucnv_toUnicode(cnv
, &pu
, uNormal
+LENGTHOF(uNormal
), (const char **)&pc
, normal
+normalLength
, NULL
, TRUE
, &errorCode
);
3107 normalLength
=(int32_t)(pu
-uNormal
);
3111 ucnv_toUnicode(swapCnv
, &pu
, uSwapped
+LENGTHOF(uSwapped
), (const char **)&pc
, normal
+swappedLength
, NULL
, TRUE
, &errorCode
);
3112 swappedLength
=(int32_t)(pu
-uSwapped
);
3114 if(U_FAILURE(errorCode
)) {
3115 log_err("TestEBCDICSwapLFNL error converting from %s - (%s)\n", name
, u_errorName(errorCode
));
3119 /* compare EBCDIC output */
3120 if(normalLength
!=swappedLength
) {
3121 log_err("TestEBCDICSwapLFNL error converting from %s - output lengths %d vs. %d\n", name
, normalLength
, swappedLength
);
3124 for(i
=0; i
<normalLength
; ++i
) {
3125 /* swap EBCDIC LF/NL for comparison */
3130 } else if(u
==0x85) {
3135 if(u
!=uSwapped
[i
]) {
3136 log_err("TestEBCDICSwapLFNL error converting from %s - did not swap properly, output[%d]=U+%04x\n", name
, i
, uSwapped
[i
]);
3144 ucnv_close(swapCnv
);
3148 TestEBCDICSwapLFNL() {
3149 static const struct {
3154 { "ibm-1047", TRUE
},
3155 { "ibm-1140", TRUE
},
3156 { "ibm-930", TRUE
},
3157 { "iso-8859-3", FALSE
}
3162 for(i
=0; i
<LENGTHOF(tests
); ++i
) {
3163 testSwap(tests
[i
].name
, tests
[i
].swap
);
3168 TestEBCDICSwapLFNL() {
3169 /* test nothing... */
3173 static const UVersionInfo ICU_34
= {3,4,0,0};
3175 static void TestFromUCountPending(){
3176 #if !UCONFIG_NO_LEGACY_CONVERSION
3177 UErrorCode status
= U_ZERO_ERROR
;
3178 /* const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; */
3179 static const struct {
3183 }fromUnicodeTests
[] = {
3186 {{ 0xdbc4, 0xde34, 0xd84d},3,1},
3187 {{ 0xdbc4, 0xde34, 0xd900},3,3},
3190 UConverter
* cnv
= ucnv_openPackage(loadTestData(&status
), "test3", &status
);
3191 if(U_FAILURE(status
)){
3192 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status
));
3195 for(i
=0; i
<LENGTHOF(fromUnicodeTests
); ++i
) {
3198 char* targetLimit
= target
+ 10;
3199 const UChar
* source
= fromUnicodeTests
[i
].input
;
3200 const UChar
* sourceLimit
= source
+ fromUnicodeTests
[i
].len
;
3203 ucnv_fromUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3204 len
= ucnv_fromUCountPending(cnv
, &status
);
3205 if(U_FAILURE(status
)){
3206 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3207 status
= U_ZERO_ERROR
;
3210 if(len
!= fromUnicodeTests
[i
].exp
){
3211 log_err("Did not get the expeced output for ucnv_fromUInputConsumed.\n");
3214 status
= U_ZERO_ERROR
;
3217 * The converter has to read the tail before it knows that
3218 * only head alone matches.
3219 * At the end, the output for head will overflow the target,
3220 * middle will be pending, and tail will not have been consumed.
3223 \U00101234 -> x (<U101234> \x07 |0)
3224 \U00101234\U00050005 -> y (<U101234>+<U50005> \x07+\x00+\x01\x02\x0e+\x05 |0)
3225 \U00101234\U00050005\U00060006 -> z (<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0)
3226 \U00060007 -> unassigned
3228 static const UChar head
[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */
3229 static const UChar middle
[] = {0xD940,0x0000}; /* first half of \U00060006 or \U00060007 */
3230 static const UChar tail
[] = {0xDC07,0x0000};/* second half of \U00060007 */
3233 char* targetLimit
= target
+ 2; /* expect overflow from converting \U00101234\U00050005 */
3234 const UChar
* source
= head
;
3235 const UChar
* sourceLimit
= source
+ u_strlen(head
);
3238 ucnv_fromUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3239 len
= ucnv_fromUCountPending(cnv
, &status
);
3240 if(U_FAILURE(status
)){
3241 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3242 status
= U_ZERO_ERROR
;
3245 log_err("ucnv_fromUInputHeld did not return correct length for head\n");
3248 sourceLimit
= source
+ u_strlen(middle
);
3249 ucnv_fromUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3250 len
= ucnv_fromUCountPending(cnv
, &status
);
3251 if(U_FAILURE(status
)){
3252 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3253 status
= U_ZERO_ERROR
;
3256 log_err("ucnv_fromUInputHeld did not return correct length for middle\n");
3259 sourceLimit
= source
+ u_strlen(tail
);
3260 ucnv_fromUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3261 if(status
!= U_BUFFER_OVERFLOW_ERROR
){
3262 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3264 status
= U_ZERO_ERROR
;
3265 len
= ucnv_fromUCountPending(cnv
, &status
);
3266 /* middle[1] is pending, tail has not been consumed */
3267 if(U_FAILURE(status
)){
3268 log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status
));
3271 log_err("ucnv_fromUInputHeld did not return correct length for tail\n");
3279 TestToUCountPending(){
3280 #if !UCONFIG_NO_LEGACY_CONVERSION
3281 UErrorCode status
= U_ZERO_ERROR
;
3282 static const struct {
3286 }toUnicodeTests
[] = {
3288 {{0x05, 0x01, 0x02},3,3},
3290 {{0x07, 0x00, 0x01, 0x02},4,4},
3294 UConverterToUCallback
*oldToUAction
= NULL
;
3295 UConverter
* cnv
= ucnv_openPackage(loadTestData(&status
), "test3", &status
);
3296 if(U_FAILURE(status
)){
3297 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status
));
3300 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, oldToUAction
, NULL
, &status
);
3301 for(i
=0; i
<LENGTHOF(toUnicodeTests
); ++i
) {
3303 UChar
* target
= tgt
;
3304 UChar
* targetLimit
= target
+ 20;
3305 const char* source
= toUnicodeTests
[i
].input
;
3306 const char* sourceLimit
= source
+ toUnicodeTests
[i
].len
;
3309 ucnv_toUnicode(cnv
, &target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3310 len
= ucnv_toUCountPending(cnv
,&status
);
3311 if(U_FAILURE(status
)){
3312 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3313 status
= U_ZERO_ERROR
;
3316 if(len
!= toUnicodeTests
[i
].exp
){
3317 log_err("Did not get the expeced output for ucnv_toUInputConsumed.\n");
3320 status
= U_ZERO_ERROR
;
3325 * The converter has to read the tail before it knows that
3326 * only head alone matches.
3327 * At the end, the output for head will overflow the target,
3328 * mid will be pending, and tail will not have been consumed.
3330 char head
[] = { 0x01, 0x02, 0x03, 0x0a , 0x00};
3331 char mid
[] = { 0x01, 0x02, 0x03, 0x0b, 0x00 };
3332 char tail
[] = { 0x01, 0x02, 0x03, 0x0d, 0x00 };
3334 0x01, 0x02, 0x03, 0x0a -> x (<U23456> \x01\x02\x03\x0a |0)
3335 0x01, 0x02, 0x03, 0x0b -> y (<U000b> \x01\x02\x03\x0b |0)
3336 0x01, 0x02, 0x03, 0x0d -> z (<U34567> \x01\x02\x03\x0d |3)
3337 0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar")
3340 UChar
* target
= tgt
;
3341 UChar
* targetLimit
= target
+ 1; /* expect overflow from converting */
3342 const char* source
= head
;
3343 const char* sourceLimit
= source
+ strlen(head
);
3345 cnv
= ucnv_openPackage(loadTestData(&status
), "test4", &status
);
3346 if(U_FAILURE(status
)){
3347 log_err("Could not create converter for test3. Error: %s\n", u_errorName(status
));
3350 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, oldToUAction
, NULL
, &status
);
3351 ucnv_toUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3352 len
= ucnv_toUCountPending(cnv
,&status
);
3353 if(U_FAILURE(status
)){
3354 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3357 log_err("Did not get the expected len for head.\n");
3360 sourceLimit
= source
+strlen(mid
);
3361 ucnv_toUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3362 len
= ucnv_toUCountPending(cnv
,&status
);
3363 if(U_FAILURE(status
)){
3364 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3367 log_err("Did not get the expected len for mid.\n");
3371 sourceLimit
= source
+strlen(tail
);
3372 targetLimit
= target
;
3373 ucnv_toUnicode(cnv
,&target
, targetLimit
, &source
, sourceLimit
, NULL
, FALSE
, &status
);
3374 if(status
!= U_BUFFER_OVERFLOW_ERROR
){
3375 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status
));
3377 status
= U_ZERO_ERROR
;
3378 len
= ucnv_toUCountPending(cnv
,&status
);
3379 /* mid[4] is pending, tail has not been consumed */
3380 if(U_FAILURE(status
)){
3381 log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status
));
3384 log_err("Did not get the expected len for tail.\n");
3391 static void TestOneDefaultNameChange(const char *name
, const char *expected
) {
3392 UErrorCode status
= U_ZERO_ERROR
;
3394 ucnv_setDefaultName(name
);
3395 if(strcmp(ucnv_getDefaultName(), expected
)==0)
3396 log_verbose("setDefaultName of %s works.\n", name
);
3398 log_err("setDefaultName of %s failed\n", name
);
3399 cnv
=ucnv_open(NULL
, &status
);
3400 if (U_FAILURE(status
) || cnv
== NULL
) {
3401 log_err("opening the default converter of %s failed\n", name
);
3404 if(strcmp(ucnv_getName(cnv
, &status
), expected
)==0)
3405 log_verbose("ucnv_getName of %s works.\n", name
);
3407 log_err("ucnv_getName of %s failed\n", name
);
3411 static void TestDefaultName(void) {
3412 /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/
3413 static char defaultName
[UCNV_MAX_CONVERTER_NAME_LENGTH
+ 1];
3414 strcpy(defaultName
, ucnv_getDefaultName());
3416 log_verbose("getDefaultName returned %s\n", defaultName
);
3418 /*change the default name by setting it */
3419 TestOneDefaultNameChange("UTF-8", "UTF-8");
3420 #if U_CHARSET_IS_UTF8
3421 TestOneDefaultNameChange("ISCII,version=1", "UTF-8");
3422 TestOneDefaultNameChange("ISCII,version=2", "UTF-8");
3423 TestOneDefaultNameChange("ISO-8859-1", "UTF-8");
3425 # if !UCONFIG_NO_LEGACY_CONVERSION
3426 TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1");
3427 TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2");
3429 TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1");
3432 /*set the default name back*/
3433 ucnv_setDefaultName(defaultName
);
3436 /* Test that ucnv_compareNames() matches names according to spec. ----------- */
3450 compareNames(const char **names
) {
3451 const char *relation
, *name1
, *name2
;
3455 if(*relation
=='=') {
3457 } else if(*relation
=='<') {
3467 while((name2
=*names
++)!=NULL
) {
3468 result
=ucnv_compareNames(name1
, name2
);
3469 if(sign(result
)!=rel
) {
3470 log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1
, name2
, result
, rel
);
3477 TestCompareNames() {
3478 static const char *equalUTF8
[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL
};
3479 static const char *equalIBM
[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL
};
3480 static const char *lessMac
[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL
};
3481 static const char *lessUTF080
[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL
};
3483 compareNames(equalUTF8
);
3484 compareNames(equalIBM
);
3485 compareNames(lessMac
);
3486 compareNames(lessUTF080
);
3491 static const UChar surrogate
[1]={ 0xd900 };
3494 static const UChar sub
[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3495 static const char subChars
[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3497 UErrorCode errorCode
;
3501 /* UTF-16/32: test that the BOM is output before the sub character */
3502 errorCode
=U_ZERO_ERROR
;
3503 cnv
=ucnv_open("UTF-16", &errorCode
);
3504 if(U_FAILURE(errorCode
)) {
3505 log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode
));
3508 length
=ucnv_fromUChars(cnv
, buffer
, (int32_t)sizeof(buffer
), surrogate
, 1, &errorCode
);
3510 if(U_FAILURE(errorCode
) ||
3512 NULL
== ucnv_detectUnicodeSignature(buffer
, length
, NULL
, &errorCode
)
3514 log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n");
3517 errorCode
=U_ZERO_ERROR
;
3518 cnv
=ucnv_open("UTF-32", &errorCode
);
3519 if(U_FAILURE(errorCode
)) {
3520 log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode
));
3523 length
=ucnv_fromUChars(cnv
, buffer
, (int32_t)sizeof(buffer
), surrogate
, 1, &errorCode
);
3525 if(U_FAILURE(errorCode
) ||
3527 NULL
== ucnv_detectUnicodeSignature(buffer
, length
, NULL
, &errorCode
)
3529 log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n");
3532 /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */
3533 errorCode
=U_ZERO_ERROR
;
3534 cnv
=ucnv_open("ISO-8859-1", &errorCode
);
3535 if(U_FAILURE(errorCode
)) {
3536 log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode
));
3539 ucnv_setSubstString(cnv
, sub
, LENGTHOF(sub
), &errorCode
);
3540 if(U_FAILURE(errorCode
)) {
3541 log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode
));
3543 len8
= sizeof(buffer
);
3544 ucnv_getSubstChars(cnv
, buffer
, &len8
, &errorCode
);
3545 /* Stateless converter, we expect the string converted to charset bytes. */
3546 if(U_FAILURE(errorCode
) || len8
!=sizeof(subChars
) || 0!=uprv_memcmp(buffer
, subChars
, len8
)) {
3547 log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode
));
3552 #if !UCONFIG_NO_LEGACY_CONVERSION
3553 errorCode
=U_ZERO_ERROR
;
3554 cnv
=ucnv_open("HZ", &errorCode
);
3555 if(U_FAILURE(errorCode
)) {
3556 log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode
));
3559 ucnv_setSubstString(cnv
, sub
, LENGTHOF(sub
), &errorCode
);
3560 if(U_FAILURE(errorCode
)) {
3561 log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode
));
3563 len8
= sizeof(buffer
);
3564 ucnv_getSubstChars(cnv
, buffer
, &len8
, &errorCode
);
3565 /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */
3566 if(U_FAILURE(errorCode
) || len8
!=0) {
3567 log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode
));
3573 * Further testing of ucnv_setSubstString() is done via intltest convert.
3574 * We do not test edge cases of illegal arguments and similar because the
3575 * function implementation uses all of its parameters in calls to other
3576 * functions with UErrorCode parameters.
3581 InvalidArguments() {
3583 UErrorCode errorCode
;
3584 char charBuffer
[2] = {1, 1};
3585 char ucharAsCharBuffer
[2] = {2, 2};
3586 char *charsPtr
= charBuffer
;
3587 UChar
*ucharsPtr
= (UChar
*)ucharAsCharBuffer
;
3588 UChar
*ucharsBadPtr
= (UChar
*)(ucharAsCharBuffer
+ 1);
3590 errorCode
=U_ZERO_ERROR
;
3591 cnv
=ucnv_open("UTF-8", &errorCode
);
3592 if(U_FAILURE(errorCode
)) {
3593 log_err("ucnv_open() failed - %s\n", u_errorName(errorCode
));
3597 errorCode
=U_ZERO_ERROR
;
3598 /* This one should fail because an incomplete UChar is being passed in */
3599 ucnv_fromUnicode(cnv
, &charsPtr
, charsPtr
, (const UChar
**)&ucharsPtr
, ucharsBadPtr
, NULL
, TRUE
, &errorCode
);
3600 if(errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3601 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode
));
3604 errorCode
=U_ZERO_ERROR
;
3605 /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3606 ucnv_fromUnicode(cnv
, &charsPtr
, charsPtr
, (const UChar
**)&ucharsBadPtr
, ucharsPtr
, NULL
, TRUE
, &errorCode
);
3607 if(errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3608 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode
));
3611 errorCode
=U_ZERO_ERROR
;
3612 /* This one should fail because an incomplete UChar is being passed in */
3613 ucnv_toUnicode(cnv
, &ucharsPtr
, ucharsBadPtr
, (const char **)&charsPtr
, charsPtr
, NULL
, TRUE
, &errorCode
);
3614 if(errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3615 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode
));
3618 errorCode
=U_ZERO_ERROR
;
3619 /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3620 ucnv_toUnicode(cnv
, &ucharsBadPtr
, ucharsPtr
, (const char **)&charsPtr
, charsPtr
, NULL
, TRUE
, &errorCode
);
3621 if(errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
) {
3622 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode
));
3625 if (charBuffer
[0] != 1 || charBuffer
[1] != 1
3626 || ucharAsCharBuffer
[0] != 2 || ucharAsCharBuffer
[1] != 2)
3628 log_err("Data was incorrectly written to buffers\n");
3634 static void TestGetName() {
3635 static const char *const names
[] = {
3636 "Unicode", "UTF-16",
3637 "UnicodeBigUnmarked", "UTF-16BE",
3638 "UnicodeBig", "UTF-16BE,version=1",
3639 "UnicodeLittleUnmarked", "UTF-16LE",
3640 "UnicodeLittle", "UTF-16LE,version=1",
3641 "x-UTF-16LE-BOM", "UTF-16LE,version=1"
3644 for(i
= 0; i
< LENGTHOF(names
); i
+= 2) {
3645 UErrorCode errorCode
= U_ZERO_ERROR
;
3646 UConverter
*cnv
= ucnv_open(names
[i
], &errorCode
);
3647 if(U_SUCCESS(errorCode
)) {
3648 const char *name
= ucnv_getName(cnv
, &errorCode
);
3649 if(U_FAILURE(errorCode
) || 0 != strcmp(name
, names
[i
+1])) {
3650 log_err("ucnv_getName(%s) = %s != %s -- %s\n",
3651 names
[i
], name
, names
[i
+1], u_errorName(errorCode
));
3658 static void TestUTFBOM() {
3659 static const UChar a16
[] = { 0x61 };
3660 static const char *const names
[] = {
3668 static const uint8_t expected
[][5] = {
3670 { 4, 0xfe, 0xff, 0, 0x61 },
3671 { 4, 0xfe, 0xff, 0, 0x61 },
3673 { 4, 0xff, 0xfe, 0x61, 0 },
3674 { 4, 0xff, 0xfe, 0x61, 0 },
3678 { 4, 0xfe, 0xff, 0, 0x61 },
3681 { 4, 0xff, 0xfe, 0x61, 0 }
3687 for(i
= 0; i
< LENGTHOF(names
); ++i
) {
3688 UErrorCode errorCode
= U_ZERO_ERROR
;
3689 UConverter
*cnv
= ucnv_open(names
[i
], &errorCode
);
3691 const uint8_t *exp
= expected
[i
];
3692 if (U_FAILURE(errorCode
)) {
3693 log_err_status(errorCode
, "Unable to open converter: %s got error code: %s\n", names
[i
], u_errorName(errorCode
));
3696 length
= ucnv_fromUChars(cnv
, bytes
, (int32_t)sizeof(bytes
), a16
, 1, &errorCode
);
3698 if(U_FAILURE(errorCode
) || length
!= exp
[0] || 0 != memcmp(bytes
, exp
+1, length
)) {
3699 log_err("unexpected %s BOM writing behavior -- %s\n",
3700 names
[i
], u_errorName(errorCode
));