]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/ccapitst.c
ICU-511.35.tar.gz
[apple/icu.git] / icuSources / test / cintltst / ccapitst.c
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2012, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /*****************************************************************************
7 *
8 * File CU_CAPITST.C
9 *
10 * Modification History:
11 * Name Description
12 * Madhu Katragadda Ported for C API
13 ******************************************************************************
14 */
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <ctype.h>
19 #include "unicode/uloc.h"
20 #include "unicode/ucnv.h"
21 #include "unicode/ucnv_err.h"
22 #include "unicode/putil.h"
23 #include "unicode/uset.h"
24 #include "unicode/ustring.h"
25 #include "ucnv_bld.h" /* for sizeof(UConverter) */
26 #include "cmemory.h" /* for UAlignedMemory */
27 #include "cintltst.h"
28 #include "ccapitst.h"
29 #include "cstring.h"
30
31 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
32
33 #define NUM_CODEPAGE 1
34 #define MAX_FILE_LEN 1024*20
35 #define UCS_FILE_NAME_SIZE 512
36
37 /*returns an action other than the one provided*/
38 #if !UCONFIG_NO_LEGACY_CONVERSION
39 static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA);
40 static UConverterToUCallback otherCharAction(UConverterToUCallback MIA);
41 #endif
42
43 static UConverter *
44 cnv_open(const char *name, UErrorCode *pErrorCode) {
45 if(name!=NULL && name[0]=='*') {
46 return ucnv_openPackage(loadTestData(pErrorCode), name+1, pErrorCode);
47 } else {
48 return ucnv_open(name, pErrorCode);
49 }
50 }
51
52
53 static void ListNames(void);
54 static void TestFlushCache(void);
55 static void TestDuplicateAlias(void);
56 static void TestCCSID(void);
57 static void TestJ932(void);
58 static void TestJ1968(void);
59 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
60 static void TestLMBCSMaxChar(void);
61 #endif
62
63 #if !UCONFIG_NO_LEGACY_CONVERSION
64 static void TestConvertSafeCloneCallback(void);
65 #endif
66
67 static void TestEBCDICSwapLFNL(void);
68 static void TestConvertEx(void);
69 static void TestConvertExFromUTF8(void);
70 static void TestConvertExFromUTF8_C5F0(void);
71 static void TestConvertAlgorithmic(void);
72 void TestDefaultConverterError(void); /* defined in cctest.c */
73 void TestDefaultConverterSet(void); /* defined in cctest.c */
74 static void TestToUCountPending(void);
75 static void TestFromUCountPending(void);
76 static void TestDefaultName(void);
77 static void TestCompareNames(void);
78 static void TestSubstString(void);
79 static void InvalidArguments(void);
80 static void TestGetName(void);
81 static void TestUTFBOM(void);
82
83 void addTestConvert(TestNode** root);
84
85 void addTestConvert(TestNode** root)
86 {
87 addTest(root, &ListNames, "tsconv/ccapitst/ListNames");
88 addTest(root, &TestConvert, "tsconv/ccapitst/TestConvert");
89 addTest(root, &TestFlushCache, "tsconv/ccapitst/TestFlushCache");
90 addTest(root, &TestAlias, "tsconv/ccapitst/TestAlias");
91 addTest(root, &TestDuplicateAlias, "tsconv/ccapitst/TestDuplicateAlias");
92 addTest(root, &TestConvertSafeClone, "tsconv/ccapitst/TestConvertSafeClone");
93 #if !UCONFIG_NO_LEGACY_CONVERSION
94 addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback");
95 #endif
96 addTest(root, &TestCCSID, "tsconv/ccapitst/TestCCSID");
97 addTest(root, &TestJ932, "tsconv/ccapitst/TestJ932");
98 addTest(root, &TestJ1968, "tsconv/ccapitst/TestJ1968");
99 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
100 addTest(root, &TestLMBCSMaxChar, "tsconv/ccapitst/TestLMBCSMaxChar");
101 #endif
102 addTest(root, &TestEBCDICSwapLFNL, "tsconv/ccapitst/TestEBCDICSwapLFNL");
103 addTest(root, &TestConvertEx, "tsconv/ccapitst/TestConvertEx");
104 addTest(root, &TestConvertExFromUTF8, "tsconv/ccapitst/TestConvertExFromUTF8");
105 addTest(root, &TestConvertExFromUTF8_C5F0, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0");
106 addTest(root, &TestConvertAlgorithmic, "tsconv/ccapitst/TestConvertAlgorithmic");
107 addTest(root, &TestDefaultConverterError, "tsconv/ccapitst/TestDefaultConverterError");
108 addTest(root, &TestDefaultConverterSet, "tsconv/ccapitst/TestDefaultConverterSet");
109 #if !UCONFIG_NO_FILE_IO
110 addTest(root, &TestToUCountPending, "tsconv/ccapitst/TestToUCountPending");
111 addTest(root, &TestFromUCountPending, "tsconv/ccapitst/TestFromUCountPending");
112 #endif
113 addTest(root, &TestDefaultName, "tsconv/ccapitst/TestDefaultName");
114 addTest(root, &TestCompareNames, "tsconv/ccapitst/TestCompareNames");
115 addTest(root, &TestSubstString, "tsconv/ccapitst/TestSubstString");
116 addTest(root, &InvalidArguments, "tsconv/ccapitst/InvalidArguments");
117 addTest(root, &TestGetName, "tsconv/ccapitst/TestGetName");
118 addTest(root, &TestUTFBOM, "tsconv/ccapitst/TestUTFBOM");
119 }
120
121 static void ListNames(void) {
122 UErrorCode err = U_ZERO_ERROR;
123 int32_t testLong1 = 0;
124 const char* available_conv;
125 UEnumeration *allNamesEnum = NULL;
126 int32_t allNamesCount = 0;
127 uint16_t count;
128
129 log_verbose("Testing ucnv_openAllNames()...");
130 allNamesEnum = ucnv_openAllNames(&err);
131 if(U_FAILURE(err)) {
132 log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err));
133 }
134 else {
135 const char *string = NULL;
136 int32_t len = 0;
137 int32_t count1 = 0;
138 int32_t count2 = 0;
139 allNamesCount = uenum_count(allNamesEnum, &err);
140 while ((string = uenum_next(allNamesEnum, &len, &err))) {
141 count1++;
142 log_verbose("read \"%s\", length %i\n", string, len);
143 }
144 if (U_FAILURE(err)) {
145 log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err));
146 err = U_ZERO_ERROR;
147 }
148 uenum_reset(allNamesEnum, &err);
149 while ((string = uenum_next(allNamesEnum, &len, &err))) {
150 count2++;
151 ucnv_close(ucnv_open(string, &err));
152 log_verbose("read \"%s\", length %i (%s)\n", string, len, U_SUCCESS(err) ? "available" : "unavailable");
153 err = U_ZERO_ERROR;
154 }
155 if (count1 != count2) {
156 log_err("FAILURE! uenum_reset(allNamesEnum, &err); doesn't work\n");
157 }
158 }
159 uenum_close(allNamesEnum);
160 err = U_ZERO_ERROR;
161
162 /*Tests ucnv_getAvailableName(), getAvialableCount()*/
163
164 log_verbose("Testing ucnv_countAvailable()...");
165
166 testLong1=ucnv_countAvailable();
167 log_info("Number of available codepages: %d/%d\n", testLong1, allNamesCount);
168
169 log_verbose("\n---Testing ucnv_getAvailableName.."); /*need to check this out */
170
171 available_conv = ucnv_getAvailableName(testLong1);
172 /*test ucnv_getAvailableName with err condition*/
173 log_verbose("\n---Testing ucnv_getAvailableName..with index < 0 ");
174 available_conv = ucnv_getAvailableName(-1);
175 if(available_conv != NULL){
176 log_err("ucnv_getAvailableName() with index < 0) should return NULL\n");
177 }
178
179 /* Test ucnv_countAliases() etc. */
180 count = ucnv_countAliases("utf-8", &err);
181 if(U_FAILURE(err)) {
182 log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err));
183 } else if(count <= 0) {
184 log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count);
185 } else {
186 /* try to get the aliases individually */
187 const char *alias;
188 alias = ucnv_getAlias("utf-8", 0, &err);
189 if(U_FAILURE(err)) {
190 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s\n", myErrorName(err));
191 } else if(strcmp("UTF-8", alias) != 0) {
192 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s instead of UTF-8\n", alias);
193 } else {
194 uint16_t aliasNum;
195 for(aliasNum = 0; aliasNum < count; ++aliasNum) {
196 alias = ucnv_getAlias("utf-8", aliasNum, &err);
197 if(U_FAILURE(err)) {
198 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err));
199 } else if(strlen(alias) > 20) {
200 /* sanity check */
201 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> alias %s insanely long, corrupt?!\n", aliasNum, alias);
202 } else {
203 log_verbose("alias %d for utf-8: %s\n", aliasNum, alias);
204 }
205 }
206 if(U_SUCCESS(err)) {
207 /* try to fill an array with all aliases */
208 const char **aliases;
209 aliases=(const char **)malloc(count * sizeof(const char *));
210 if(aliases != 0) {
211 ucnv_getAliases("utf-8", aliases, &err);
212 if(U_FAILURE(err)) {
213 log_err("FAILURE! ucnv_getAliases(\"utf-8\") -> %s\n", myErrorName(err));
214 } else {
215 for(aliasNum = 0; aliasNum < count; ++aliasNum) {
216 /* compare the pointers with the ones returned individually */
217 alias = ucnv_getAlias("utf-8", aliasNum, &err);
218 if(U_FAILURE(err)) {
219 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err));
220 } else if(aliases[aliasNum] != alias) {
221 log_err("FAILURE! ucnv_getAliases(\"utf-8\")[%d] != ucnv_getAlias(\"utf-8\", %d)\n", aliasNum, aliasNum);
222 }
223 }
224 }
225 free((char **)aliases);
226 }
227 }
228 }
229 }
230 }
231
232
233 static void TestConvert()
234 {
235 #if !UCONFIG_NO_LEGACY_CONVERSION
236 char myptr[4];
237 char save[4];
238 int32_t testLong1 = 0;
239 uint16_t rest = 0;
240 int32_t len = 0;
241 int32_t x = 0;
242 FILE* ucs_file_in = NULL;
243 UChar BOM = 0x0000;
244 UChar myUChar = 0x0000;
245 char* mytarget; /* [MAX_FILE_LEN] */
246 char* mytarget_1;
247 char* mytarget_use;
248 UChar* consumedUni = NULL;
249 char* consumed = NULL;
250 char* output_cp_buffer; /* [MAX_FILE_LEN] */
251 UChar* ucs_file_buffer; /* [MAX_FILE_LEN] */
252 UChar* ucs_file_buffer_use;
253 UChar* my_ucs_file_buffer; /* [MAX_FILE_LEN] */
254 UChar* my_ucs_file_buffer_1;
255 int8_t ii = 0;
256 int32_t j = 0;
257 uint16_t codepage_index = 0;
258 int32_t cp = 0;
259 UErrorCode err = U_ZERO_ERROR;
260 char ucs_file_name[UCS_FILE_NAME_SIZE];
261 UConverterFromUCallback MIA1, MIA1_2;
262 UConverterToUCallback MIA2, MIA2_2;
263 const void *MIA1Context, *MIA1Context2, *MIA2Context, *MIA2Context2;
264 UConverter* someConverters[5];
265 UConverter* myConverter = 0;
266 UChar* displayname = 0;
267
268 const char* locale;
269
270 UChar* uchar1 = 0;
271 UChar* uchar2 = 0;
272 UChar* uchar3 = 0;
273 int32_t targetcapacity2;
274 int32_t targetcapacity;
275 int32_t targetsize;
276 int32_t disnamelen;
277
278 const UChar* tmp_ucs_buf;
279 const UChar* tmp_consumedUni=NULL;
280 const char* tmp_mytarget_use;
281 const char* tmp_consumed;
282
283 /******************************************************************
284 Checking Unicode -> ksc
285 ******************************************************************/
286
287 const char* CodePagesToTest[NUM_CODEPAGE] =
288 {
289 "ibm-949_P110-1999"
290
291
292 };
293 const uint16_t CodePageNumberToTest[NUM_CODEPAGE] =
294 {
295 949
296 };
297
298
299 const int8_t CodePagesMinChars[NUM_CODEPAGE] =
300 {
301 1
302
303 };
304
305 const int8_t CodePagesMaxChars[NUM_CODEPAGE] =
306 {
307 2
308
309 };
310
311 const uint16_t CodePagesSubstitutionChars[NUM_CODEPAGE] =
312 {
313 0xAFFE
314 };
315
316 const char* CodePagesTestFiles[NUM_CODEPAGE] =
317 {
318 "uni-text.bin"
319 };
320
321
322 const UConverterPlatform CodePagesPlatform[NUM_CODEPAGE] =
323 {
324 UCNV_IBM
325
326 };
327
328 const char* CodePagesLocale[NUM_CODEPAGE] =
329 {
330 "ko_KR"
331 };
332
333 UConverterFromUCallback oldFromUAction = NULL;
334 UConverterToUCallback oldToUAction = NULL;
335 const void* oldFromUContext = NULL;
336 const void* oldToUContext = NULL;
337
338 /* Allocate memory */
339 mytarget = (char*) malloc(MAX_FILE_LEN * sizeof(mytarget[0]));
340 output_cp_buffer = (char*) malloc(MAX_FILE_LEN * sizeof(output_cp_buffer[0]));
341 ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(ucs_file_buffer[0]));
342 my_ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(my_ucs_file_buffer[0]));
343
344 ucs_file_buffer_use = ucs_file_buffer;
345 mytarget_1=mytarget;
346 mytarget_use = mytarget;
347 my_ucs_file_buffer_1=my_ucs_file_buffer;
348
349 /* flush the converter cache to get a consistent state before the flushing is tested */
350 ucnv_flushCache();
351
352 /*Testing ucnv_openU()*/
353 {
354 UChar converterName[]={ 0x0069, 0x0062, 0x006d, 0x002d, 0x0039, 0x0034, 0x0033, 0x0000}; /*ibm-943*/
355 UChar firstSortedName[]={ 0x0021, 0x0000}; /* ! */
356 UChar lastSortedName[]={ 0x007E, 0x0000}; /* ~ */
357 const char *illegalNameChars={ "ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943"};
358 UChar illegalName[100];
359 UConverter *converter=NULL;
360 err=U_ZERO_ERROR;
361 converter=ucnv_openU(converterName, &err);
362 if(U_FAILURE(err)){
363 log_data_err("FAILURE! ucnv_openU(ibm-943, err) failed. %s\n", myErrorName(err));
364 }
365 ucnv_close(converter);
366 err=U_ZERO_ERROR;
367 converter=ucnv_openU(NULL, &err);
368 if(U_FAILURE(err)){
369 log_err("FAILURE! ucnv_openU(NULL, err) failed. %s\n", myErrorName(err));
370 }
371 ucnv_close(converter);
372 /*testing with error value*/
373 err=U_ILLEGAL_ARGUMENT_ERROR;
374 converter=ucnv_openU(converterName, &err);
375 if(!(converter == NULL)){
376 log_data_err("FAILURE! ucnv_openU(ibm-943, U_ILLEGAL_ARGUMENT_ERROR) is expected to fail\n");
377 }
378 ucnv_close(converter);
379 err=U_ZERO_ERROR;
380 u_uastrcpy(illegalName, "");
381 u_uastrcpy(illegalName, illegalNameChars);
382 ucnv_openU(illegalName, &err);
383 if(!(err==U_ILLEGAL_ARGUMENT_ERROR)){
384 log_err("FAILURE! ucnv_openU(illegalName, err) is expected to fail\n");
385 }
386
387 err=U_ZERO_ERROR;
388 ucnv_openU(firstSortedName, &err);
389 if(err!=U_FILE_ACCESS_ERROR){
390 log_err("FAILURE! ucnv_openU(firstSortedName, err) is expected to fail\n");
391 }
392
393 err=U_ZERO_ERROR;
394 ucnv_openU(lastSortedName, &err);
395 if(err!=U_FILE_ACCESS_ERROR){
396 log_err("FAILURE! ucnv_openU(lastSortedName, err) is expected to fail\n");
397 }
398
399 err=U_ZERO_ERROR;
400 }
401 log_verbose("Testing ucnv_open() with converter name greater than 7 characters\n");
402 {
403 UConverter *cnv=NULL;
404 err=U_ZERO_ERROR;
405 cnv=ucnv_open("ibm-949,Madhu", &err);
406 if(U_FAILURE(err)){
407 log_data_err("FAILURE! ucnv_open(\"ibm-949,Madhu\", err) failed. %s\n", myErrorName(err));
408 }
409 ucnv_close(cnv);
410
411 }
412 /*Testing ucnv_convert()*/
413 {
414 int32_t targetLimit=0, sourceLimit=0, i=0, targetCapacity=0;
415 const uint8_t source[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00};
416 const uint8_t expectedTarget[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00};
417 char *target=0;
418 sourceLimit=sizeof(source)/sizeof(source[0]);
419 err=U_ZERO_ERROR;
420 targetLimit=0;
421
422 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", NULL, targetLimit , (const char*)source, sourceLimit, &err);
423 if(err == U_BUFFER_OVERFLOW_ERROR){
424 err=U_ZERO_ERROR;
425 targetLimit=targetCapacity+1;
426 target=(char*)malloc(sizeof(char) * targetLimit);
427 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
428 }
429 if(U_FAILURE(err)){
430 log_data_err("FAILURE! ucnv_convert(ibm-1363->ibm-1364) failed. %s\n", myErrorName(err));
431 }
432 else {
433 for(i=0; i<targetCapacity; i++){
434 if(target[i] != expectedTarget[i]){
435 log_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i, (UChar)expectedTarget[i], (uint8_t)target[i]);
436 }
437 }
438
439 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source+1, -1, &err);
440 if(U_FAILURE(err) || i!=7){
441 log_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 7\n",
442 u_errorName(err), i);
443 }
444
445 /*Test error conditions*/
446 err=U_ZERO_ERROR;
447 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, 0, &err);
448 if(i !=0){
449 log_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n");
450 }
451
452 err=U_ILLEGAL_ARGUMENT_ERROR;
453 sourceLimit=sizeof(source)/sizeof(source[0]);
454 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
455 if(i !=0 ){
456 log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n");
457 }
458
459 err=U_ZERO_ERROR;
460 sourceLimit=sizeof(source)/sizeof(source[0]);
461 targetLimit=0;
462 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
463 if(!(U_FAILURE(err) && err==U_BUFFER_OVERFLOW_ERROR)){
464 log_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n");
465 }
466 err=U_ZERO_ERROR;
467 free(target);
468 }
469 }
470
471 /*Testing ucnv_openCCSID and ucnv_open with error conditions*/
472 log_verbose("\n---Testing ucnv_open with err ! = U_ZERO_ERROR...\n");
473 err=U_ILLEGAL_ARGUMENT_ERROR;
474 if(ucnv_open(NULL, &err) != NULL){
475 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
476 }
477 if(ucnv_openCCSID(1051, UCNV_IBM, &err) != NULL){
478 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
479 }
480 err=U_ZERO_ERROR;
481
482 /* Testing ucnv_openCCSID(), ucnv_open(), ucnv_getName() */
483 log_verbose("\n---Testing ucnv_open default...\n");
484 someConverters[0] = ucnv_open(NULL,&err);
485 someConverters[1] = ucnv_open(NULL,&err);
486 someConverters[2] = ucnv_open("utf8", &err);
487 someConverters[3] = ucnv_openCCSID(949,UCNV_IBM,&err);
488 ucnv_close(ucnv_openCCSID(1051, UCNV_IBM, &err)); /* test for j350; ucnv_close(NULL) is safe */
489 if (U_FAILURE(err)){ log_data_err("FAILURE! %s\n", myErrorName(err));}
490
491 /* Testing ucnv_getName()*/
492 /*default code page */
493 ucnv_getName(someConverters[0], &err);
494 if(U_FAILURE(err)) {
495 log_data_err("getName[0] failed\n");
496 } else {
497 log_verbose("getName(someConverters[0]) returned %s\n", ucnv_getName(someConverters[0], &err));
498 }
499 ucnv_getName(someConverters[1], &err);
500 if(U_FAILURE(err)) {
501 log_data_err("getName[1] failed\n");
502 } else {
503 log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters[1], &err));
504 }
505
506 ucnv_close(someConverters[0]);
507 ucnv_close(someConverters[1]);
508 ucnv_close(someConverters[2]);
509 ucnv_close(someConverters[3]);
510
511
512 for (codepage_index=0; codepage_index < NUM_CODEPAGE; ++codepage_index)
513 {
514 int32_t i = 0;
515
516 err = U_ZERO_ERROR;
517 #ifdef U_TOPSRCDIR
518 strcpy(ucs_file_name, U_TOPSRCDIR U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING);
519 #else
520 strcpy(ucs_file_name, loadTestData(&err));
521
522 if(U_FAILURE(err)){
523 log_err("\nCouldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err));
524 return;
525 }
526
527 {
528 char* index = strrchr(ucs_file_name,(char)U_FILE_SEP_CHAR);
529
530 if((unsigned int)(index-ucs_file_name) != (strlen(ucs_file_name)-1)){
531 *(index+1)=0;
532 }
533 }
534
535 strcat(ucs_file_name,".."U_FILE_SEP_STRING);
536 #endif
537 strcat(ucs_file_name, CodePagesTestFiles[codepage_index]);
538
539 ucs_file_in = fopen(ucs_file_name,"rb");
540 if (!ucs_file_in)
541 {
542 log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name);
543 return;
544 }
545
546 /*Creates a converter and testing ucnv_openCCSID(u_int code_page, platform, errstatus*/
547
548 /* myConverter =ucnv_openCCSID(CodePageNumberToTest[codepage_index],UCNV_IBM, &err); */
549 /* ucnv_flushCache(); */
550 myConverter =ucnv_open( "ibm-949", &err);
551 if (!myConverter || U_FAILURE(err))
552 {
553 log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err));
554 fclose(ucs_file_in);
555 break;
556 }
557
558 /*testing for ucnv_getName() */
559 log_verbose("Testing ucnv_getName()...\n");
560 ucnv_getName(myConverter, &err);
561 if(U_FAILURE(err))
562 log_err("Error in getName\n");
563 else
564 {
565 log_verbose("getName o.k. %s\n", ucnv_getName(myConverter, &err));
566 }
567 if (uprv_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index]))
568 log_err("getName failed\n");
569 else
570 log_verbose("getName ok\n");
571 /*Test getName with error condition*/
572 {
573 const char* name=0;
574 err=U_ILLEGAL_ARGUMENT_ERROR;
575 log_verbose("Testing ucnv_getName with err != U_ZERO_ERROR");
576 name=ucnv_getName(myConverter, &err);
577 if(name != NULL){
578 log_err("ucnv_getName() with err != U_ZERO_ERROR is expected to fail");
579 }
580 err=U_ZERO_ERROR;
581 }
582
583
584 /*Tests ucnv_getMaxCharSize() and ucnv_getMinCharSize()*/
585
586 log_verbose("Testing ucnv_getMaxCharSize()...\n");
587 if (ucnv_getMaxCharSize(myConverter)==CodePagesMaxChars[codepage_index])
588 log_verbose("Max byte per character OK\n");
589 else
590 log_err("Max byte per character failed\n");
591
592 log_verbose("\n---Testing ucnv_getMinCharSize()...\n");
593 if (ucnv_getMinCharSize(myConverter)==CodePagesMinChars[codepage_index])
594 log_verbose("Min byte per character OK\n");
595 else
596 log_err("Min byte per character failed\n");
597
598
599 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars()*/
600 log_verbose("\n---Testing ucnv_getSubstChars...\n");
601 ii=4;
602 ucnv_getSubstChars(myConverter, myptr, &ii, &err);
603 if (ii <= 0) {
604 log_err("ucnv_getSubstChars returned a negative number %d\n", ii);
605 }
606
607 for(x=0;x<ii;x++)
608 rest = (uint16_t)(((unsigned char)rest << 8) + (unsigned char)myptr[x]);
609 if (rest==CodePagesSubstitutionChars[codepage_index])
610 log_verbose("Substitution character ok\n");
611 else
612 log_err("Substitution character failed.\n");
613
614 log_verbose("\n---Testing ucnv_setSubstChars RoundTrip Test ...\n");
615 ucnv_setSubstChars(myConverter, myptr, ii, &err);
616 if (U_FAILURE(err))
617 {
618 log_err("FAILURE! %s\n", myErrorName(err));
619 }
620 ucnv_getSubstChars(myConverter,save, &ii, &err);
621 if (U_FAILURE(err))
622 {
623 log_err("FAILURE! %s\n", myErrorName(err));
624 }
625
626 if (strncmp(save, myptr, ii))
627 log_err("Saved substitution character failed\n");
628 else
629 log_verbose("Saved substitution character ok\n");
630
631 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars() with error conditions*/
632 log_verbose("\n---Testing ucnv_getSubstChars.. with len < minBytesPerChar\n");
633 ii=1;
634 ucnv_getSubstChars(myConverter, myptr, &ii, &err);
635 if(err != U_INDEX_OUTOFBOUNDS_ERROR){
636 log_err("ucnv_getSubstChars() with len < minBytesPerChar should throw U_INDEX_OUTOFBOUNDS_ERROR Got %s\n", myErrorName(err));
637 }
638 err=U_ZERO_ERROR;
639 ii=4;
640 ucnv_getSubstChars(myConverter, myptr, &ii, &err);
641 log_verbose("\n---Testing ucnv_setSubstChars.. with len < minBytesPerChar\n");
642 ucnv_setSubstChars(myConverter, myptr, 0, &err);
643 if(err != U_ILLEGAL_ARGUMENT_ERROR){
644 log_err("ucnv_setSubstChars() with len < minBytesPerChar should throw U_ILLEGAL_ARGUMENT_ERROR Got %s\n", myErrorName(err));
645 }
646 log_verbose("\n---Testing ucnv_setSubstChars.. with err != U_ZERO_ERROR \n");
647 strcpy(myptr, "abc");
648 ucnv_setSubstChars(myConverter, myptr, ii, &err);
649 err=U_ZERO_ERROR;
650 ucnv_getSubstChars(myConverter, save, &ii, &err);
651 if(strncmp(save, myptr, ii) == 0){
652 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't set the SubstChars and just return\n");
653 }
654 log_verbose("\n---Testing ucnv_getSubstChars.. with err != U_ZERO_ERROR \n");
655 err=U_ZERO_ERROR;
656 strcpy(myptr, "abc");
657 ucnv_setSubstChars(myConverter, myptr, ii, &err);
658 err=U_ILLEGAL_ARGUMENT_ERROR;
659 ucnv_getSubstChars(myConverter, save, &ii, &err);
660 if(strncmp(save, myptr, ii) == 0){
661 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't fill the SubstChars in the buffer, it just returns\n");
662 }
663 err=U_ZERO_ERROR;
664 /*------*/
665
666 #ifdef U_ENABLE_GENERIC_ISO_2022
667 /*resetState ucnv_reset()*/
668 log_verbose("\n---Testing ucnv_reset()..\n");
669 ucnv_reset(myConverter);
670 {
671 UChar32 c;
672 const uint8_t in[]={ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x61, 0xc0, 0x80, 0xe0, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80};
673 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
674 UConverter *cnv=ucnv_open("ISO_2022", &err);
675 if(U_FAILURE(err)) {
676 log_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
677 }
678 c=ucnv_getNextUChar(cnv, &source, limit, &err);
679 if((U_FAILURE(err) || c != (UChar32)0x0031)) {
680 log_err("ucnv_getNextUChar() failed: %s\n", u_errorName(err));
681 }
682 ucnv_reset(cnv);
683 ucnv_close(cnv);
684
685 }
686 #endif
687
688 /*getDisplayName*/
689 log_verbose("\n---Testing ucnv_getDisplayName()...\n");
690 locale=CodePagesLocale[codepage_index];
691 len=0;
692 displayname=NULL;
693 disnamelen = ucnv_getDisplayName(myConverter, locale, displayname, len, &err);
694 if(err==U_BUFFER_OVERFLOW_ERROR) {
695 err=U_ZERO_ERROR;
696 displayname=(UChar*)malloc((disnamelen+1) * sizeof(UChar));
697 ucnv_getDisplayName(myConverter,locale,displayname,disnamelen+1, &err);
698 if(U_FAILURE(err)) {
699 log_err("getDisplayName failed. The error is %s\n", myErrorName(err));
700 }
701 else {
702 log_verbose(" getDisplayName o.k.\n");
703 }
704 free(displayname);
705 displayname=NULL;
706 }
707 else {
708 log_err("getDisplayName preflight doesn't work. Error is %s\n", myErrorName(err));
709 }
710 /*test ucnv_getDiaplayName with error condition*/
711 err= U_ILLEGAL_ARGUMENT_ERROR;
712 len=ucnv_getDisplayName(myConverter,locale,NULL,0, &err);
713 if( len !=0 ){
714 log_err("ucnv_getDisplayName() with err != U_ZERO_ERROR is supposed to return 0\n");
715 }
716 /*test ucnv_getDiaplayName with error condition*/
717 err=U_ZERO_ERROR;
718 len=ucnv_getDisplayName(NULL,locale,NULL,0, &err);
719 if( len !=0 || U_SUCCESS(err)){
720 log_err("ucnv_getDisplayName(NULL) with cnv == NULL is supposed to return 0\n");
721 }
722 err=U_ZERO_ERROR;
723
724 /* testing ucnv_setFromUCallBack() and ucnv_getFromUCallBack()*/
725 ucnv_getFromUCallBack(myConverter, &MIA1, &MIA1Context);
726
727 log_verbose("\n---Testing ucnv_setFromUCallBack...\n");
728 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err);
729 if (U_FAILURE(err) || oldFromUAction != MIA1 || oldFromUContext != MIA1Context)
730 {
731 log_err("FAILURE! %s\n", myErrorName(err));
732 }
733
734 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2);
735 if (MIA1_2 != otherUnicodeAction(MIA1) || MIA1Context2 != &BOM)
736 log_err("get From UCallBack failed\n");
737 else
738 log_verbose("get From UCallBack ok\n");
739
740 log_verbose("\n---Testing getFromUCallBack Roundtrip...\n");
741 ucnv_setFromUCallBack(myConverter,MIA1, MIA1Context, &oldFromUAction, &oldFromUContext, &err);
742 if (U_FAILURE(err) || oldFromUAction != otherUnicodeAction(MIA1) || oldFromUContext != &BOM)
743 {
744 log_err("FAILURE! %s\n", myErrorName(err));
745 }
746
747 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2);
748 if (MIA1_2 != MIA1 || MIA1Context2 != MIA1Context)
749 log_err("get From UCallBack action failed\n");
750 else
751 log_verbose("get From UCallBack action ok\n");
752
753 /*testing ucnv_setToUCallBack with error conditions*/
754 err=U_ILLEGAL_ARGUMENT_ERROR;
755 log_verbose("\n---Testing setFromUCallBack. with err != U_ZERO_ERROR..\n");
756 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err);
757 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2);
758 if(MIA1_2 == otherUnicodeAction(MIA1) || MIA1Context2 == &BOM){
759 log_err("To setFromUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
760 }
761 err=U_ZERO_ERROR;
762
763
764 /*testing ucnv_setToUCallBack() and ucnv_getToUCallBack()*/
765 ucnv_getToUCallBack(myConverter, &MIA2, &MIA2Context);
766
767 log_verbose("\n---Testing setTo UCallBack...\n");
768 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), &BOM, &oldToUAction, &oldToUContext, &err);
769 if (U_FAILURE(err) || oldToUAction != MIA2 || oldToUContext != MIA2Context)
770 {
771 log_err("FAILURE! %s\n", myErrorName(err));
772 }
773
774 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2);
775 if (MIA2_2 != otherCharAction(MIA2) || MIA2Context2 != &BOM)
776 log_err("To UCallBack failed\n");
777 else
778 log_verbose("To UCallBack ok\n");
779
780 log_verbose("\n---Testing setTo UCallBack Roundtrip...\n");
781 ucnv_setToUCallBack(myConverter,MIA2, MIA2Context, &oldToUAction, &oldToUContext, &err);
782 if (U_FAILURE(err) || oldToUAction != otherCharAction(MIA2) || oldToUContext != &BOM)
783 { log_err("FAILURE! %s\n", myErrorName(err)); }
784
785 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2);
786 if (MIA2_2 != MIA2 || MIA2Context2 != MIA2Context)
787 log_err("To UCallBack failed\n");
788 else
789 log_verbose("To UCallBack ok\n");
790
791 /*testing ucnv_setToUCallBack with error conditions*/
792 err=U_ILLEGAL_ARGUMENT_ERROR;
793 log_verbose("\n---Testing setToUCallBack. with err != U_ZERO_ERROR..\n");
794 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), NULL, &oldToUAction, &oldToUContext, &err);
795 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2);
796 if (MIA2_2 == otherCharAction(MIA2) || MIA2Context2 == &BOM){
797 log_err("To setToUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
798 }
799 err=U_ZERO_ERROR;
800
801
802 /*getcodepageid testing ucnv_getCCSID() */
803 log_verbose("\n----Testing getCCSID....\n");
804 cp = ucnv_getCCSID(myConverter,&err);
805 if (U_FAILURE(err))
806 {
807 log_err("FAILURE!..... %s\n", myErrorName(err));
808 }
809 if (cp != CodePageNumberToTest[codepage_index])
810 log_err("Codepage number test failed\n");
811 else
812 log_verbose("Codepage number test OK\n");
813
814 /*testing ucnv_getCCSID() with err != U_ZERO_ERROR*/
815 err=U_ILLEGAL_ARGUMENT_ERROR;
816 if( ucnv_getCCSID(myConverter,&err) != -1){
817 log_err("ucnv_getCCSID() with err != U_ZERO_ERROR is supposed to fail\n");
818 }
819 err=U_ZERO_ERROR;
820
821 /*getCodepagePlatform testing ucnv_getPlatform()*/
822 log_verbose("\n---Testing getCodepagePlatform ..\n");
823 if (CodePagesPlatform[codepage_index]!=ucnv_getPlatform(myConverter, &err))
824 log_err("Platform codepage test failed\n");
825 else
826 log_verbose("Platform codepage test ok\n");
827
828 if (U_FAILURE(err))
829 {
830 log_err("FAILURE! %s\n", myErrorName(err));
831 }
832 /*testing ucnv_getPlatform() with err != U_ZERO_ERROR*/
833 err= U_ILLEGAL_ARGUMENT_ERROR;
834 if(ucnv_getPlatform(myConverter, &err) != UCNV_UNKNOWN){
835 log_err("ucnv)getPlatform with err != U_ZERO_ERROR is supposed to fail\n");
836 }
837 err=U_ZERO_ERROR;
838
839
840 /*Reads the BOM*/
841 fread(&BOM, sizeof(UChar), 1, ucs_file_in);
842 if (BOM!=0xFEFF && BOM!=0xFFFE)
843 {
844 log_err("File Missing BOM...Bailing!\n");
845 fclose(ucs_file_in);
846 break;
847 }
848
849
850 /*Reads in the file*/
851 while(!feof(ucs_file_in)&&(i+=fread(ucs_file_buffer+i, sizeof(UChar), 1, ucs_file_in)))
852 {
853 myUChar = ucs_file_buffer[i-1];
854
855 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN*/
856 }
857
858 myUChar = ucs_file_buffer[i-1];
859 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN Corner Case*/
860
861
862 /*testing ucnv_fromUChars() and ucnv_toUChars() */
863 /*uchar1---fromUChar--->output_cp_buffer --toUChar--->uchar2*/
864
865 uchar1=(UChar*)malloc(sizeof(UChar) * (i+1));
866 u_uastrcpy(uchar1,"");
867 u_strncpy(uchar1,ucs_file_buffer,i);
868 uchar1[i] = 0;
869
870 uchar3=(UChar*)malloc(sizeof(UChar)*(i+1));
871 u_uastrcpy(uchar3,"");
872 u_strncpy(uchar3,ucs_file_buffer,i);
873 uchar3[i] = 0;
874
875 /*Calls the Conversion Routine */
876 testLong1 = MAX_FILE_LEN;
877 log_verbose("\n---Testing ucnv_fromUChars()\n");
878 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err);
879 if (U_FAILURE(err))
880 {
881 log_err("\nFAILURE...%s\n", myErrorName(err));
882 }
883 else
884 log_verbose(" ucnv_fromUChars() o.k.\n");
885
886 /*test the conversion routine */
887 log_verbose("\n---Testing ucnv_toUChars()\n");
888 /*call it first time for trapping the targetcapacity and size needed to allocate memory for the buffer uchar2 */
889 targetcapacity2=0;
890 targetsize = ucnv_toUChars(myConverter,
891 NULL,
892 targetcapacity2,
893 output_cp_buffer,
894 strlen(output_cp_buffer),
895 &err);
896 /*if there is an buffer overflow then trap the values and pass them and make the actual call*/
897
898 if(err==U_BUFFER_OVERFLOW_ERROR)
899 {
900 err=U_ZERO_ERROR;
901 uchar2=(UChar*)malloc((targetsize+1) * sizeof(UChar));
902 targetsize = ucnv_toUChars(myConverter,
903 uchar2,
904 targetsize+1,
905 output_cp_buffer,
906 strlen(output_cp_buffer),
907 &err);
908
909 if(U_FAILURE(err))
910 log_err("ucnv_toUChars() FAILED %s\n", myErrorName(err));
911 else
912 log_verbose(" ucnv_toUChars() o.k.\n");
913
914 if(u_strcmp(uchar1,uchar2)!=0)
915 log_err("equality test failed with conversion routine\n");
916 }
917 else
918 {
919 log_err("ERR: calling toUChars: Didn't get U_BUFFER_OVERFLOW .. expected it.\n");
920 }
921 /*Testing ucnv_fromUChars and ucnv_toUChars with error conditions*/
922 err=U_ILLEGAL_ARGUMENT_ERROR;
923 log_verbose("\n---Testing ucnv_fromUChars() with err != U_ZERO_ERROR\n");
924 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err);
925 if (targetcapacity !=0) {
926 log_err("\nFAILURE: ucnv_fromUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
927 }
928 err=U_ZERO_ERROR;
929 log_verbose("\n---Testing ucnv_fromUChars() with converter=NULL\n");
930 targetcapacity = ucnv_fromUChars(NULL, output_cp_buffer, testLong1, uchar1, -1, &err);
931 if (targetcapacity !=0 || err != U_ILLEGAL_ARGUMENT_ERROR) {
932 log_err("\nFAILURE: ucnv_fromUChars with converter=NULL is expected to fail\n");
933 }
934 err=U_ZERO_ERROR;
935 log_verbose("\n---Testing ucnv_fromUChars() with sourceLength = 0\n");
936 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, 0, &err);
937 if (targetcapacity !=0) {
938 log_err("\nFAILURE: ucnv_fromUChars with sourceLength 0 is expected to return 0\n");
939 }
940 log_verbose("\n---Testing ucnv_fromUChars() with targetLength = 0\n");
941 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, 0, uchar1, -1, &err);
942 if (err != U_BUFFER_OVERFLOW_ERROR) {
943 log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n");
944 }
945 /*toUChars with error conditions*/
946 targetsize = ucnv_toUChars(myConverter, uchar2, targetsize, output_cp_buffer, strlen(output_cp_buffer), &err);
947 if(targetsize != 0){
948 log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
949 }
950 err=U_ZERO_ERROR;
951 targetsize = ucnv_toUChars(myConverter, uchar2, -1, output_cp_buffer, strlen(output_cp_buffer), &err);
952 if(targetsize != 0 || err != U_ILLEGAL_ARGUMENT_ERROR){
953 log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n");
954 }
955 err=U_ZERO_ERROR;
956 targetsize = ucnv_toUChars(myConverter, uchar2, 0, output_cp_buffer, 0, &err);
957 if (targetsize !=0) {
958 log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n");
959 }
960 targetcapacity2=0;
961 targetsize = ucnv_toUChars(myConverter, NULL, targetcapacity2, output_cp_buffer, strlen(output_cp_buffer), &err);
962 if (err != U_STRING_NOT_TERMINATED_WARNING) {
963 log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n",
964 u_errorName(err));
965 }
966 err=U_ZERO_ERROR;
967 /*-----*/
968
969
970 /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */
971 /*Clean up re-usable vars*/
972 j=0;
973 log_verbose("Testing ucnv_fromUnicode().....\n");
974 tmp_ucs_buf=ucs_file_buffer_use;
975 ucnv_fromUnicode(myConverter, &mytarget_1,
976 mytarget + MAX_FILE_LEN,
977 &tmp_ucs_buf,
978 ucs_file_buffer_use+i,
979 NULL,
980 TRUE,
981 &err);
982 consumedUni = (UChar*)tmp_consumedUni;
983
984 if (U_FAILURE(err))
985 {
986 log_err("FAILURE! %s\n", myErrorName(err));
987 }
988 else
989 log_verbose("ucnv_fromUnicode() o.k.\n");
990
991 /*Uni1 ----ToUnicode----> Cp2 ----FromUnicode---->Uni3 */
992 log_verbose("Testing ucnv_toUnicode().....\n");
993 tmp_mytarget_use=mytarget_use;
994 tmp_consumed = consumed;
995 ucnv_toUnicode(myConverter, &my_ucs_file_buffer_1,
996 my_ucs_file_buffer + MAX_FILE_LEN,
997 &tmp_mytarget_use,
998 mytarget_use + (mytarget_1 - mytarget),
999 NULL,
1000 FALSE,
1001 &err);
1002 consumed = (char*)tmp_consumed;
1003 if (U_FAILURE(err))
1004 {
1005 log_err("FAILURE! %s\n", myErrorName(err));
1006 }
1007 else
1008 log_verbose("ucnv_toUnicode() o.k.\n");
1009
1010
1011 log_verbose("\n---Testing RoundTrip ...\n");
1012
1013
1014 u_strncpy(uchar3, my_ucs_file_buffer,i);
1015 uchar3[i] = 0;
1016
1017 if(u_strcmp(uchar1,uchar3)==0)
1018 log_verbose("Equality test o.k.\n");
1019 else
1020 log_err("Equality test failed\n");
1021
1022 /*sanity compare */
1023 if(uchar2 == NULL)
1024 {
1025 log_err("uchar2 was NULL (ccapitst.c line %d), couldn't do sanity check\n", __LINE__);
1026 }
1027 else
1028 {
1029 if(u_strcmp(uchar2, uchar3)==0)
1030 log_verbose("Equality test o.k.\n");
1031 else
1032 log_err("Equality test failed\n");
1033 }
1034
1035 fclose(ucs_file_in);
1036 ucnv_close(myConverter);
1037 if (uchar1 != 0) free(uchar1);
1038 if (uchar2 != 0) free(uchar2);
1039 if (uchar3 != 0) free(uchar3);
1040 }
1041
1042 free((void*)mytarget);
1043 free((void*)output_cp_buffer);
1044 free((void*)ucs_file_buffer);
1045 free((void*)my_ucs_file_buffer);
1046 #endif
1047 }
1048
1049 #if !UCONFIG_NO_LEGACY_CONVERSION
1050 static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA)
1051 {
1052 return (MIA==(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP)?(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_SUBSTITUTE:(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP;
1053 }
1054
1055 static UConverterToUCallback otherCharAction(UConverterToUCallback MIA)
1056 {
1057 return (MIA==(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP)?(UConverterToUCallback)UCNV_TO_U_CALLBACK_SUBSTITUTE:(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP;
1058 }
1059 #endif
1060
1061 static void TestFlushCache(void) {
1062 #if !UCONFIG_NO_LEGACY_CONVERSION
1063 UErrorCode err = U_ZERO_ERROR;
1064 UConverter* someConverters[5];
1065 int flushCount = 0;
1066
1067 /* flush the converter cache to get a consistent state before the flushing is tested */
1068 ucnv_flushCache();
1069
1070 /*Testing ucnv_open()*/
1071 /* Note: These converters have been chosen because they do NOT
1072 encode the Latin characters (U+0041, ...), and therefore are
1073 highly unlikely to be chosen as system default codepages */
1074
1075 someConverters[0] = ucnv_open("ibm-1047", &err);
1076 if (U_FAILURE(err)) {
1077 log_data_err("FAILURE! %s\n", myErrorName(err));
1078 }
1079
1080 someConverters[1] = ucnv_open("ibm-1047", &err);
1081 if (U_FAILURE(err)) {
1082 log_data_err("FAILURE! %s\n", myErrorName(err));
1083 }
1084
1085 someConverters[2] = ucnv_open("ibm-1047", &err);
1086 if (U_FAILURE(err)) {
1087 log_data_err("FAILURE! %s\n", myErrorName(err));
1088 }
1089
1090 someConverters[3] = ucnv_open("gb18030", &err);
1091 if (U_FAILURE(err)) {
1092 log_data_err("FAILURE! %s\n", myErrorName(err));
1093 }
1094
1095 someConverters[4] = ucnv_open("ibm-954", &err);
1096 if (U_FAILURE(err)) {
1097 log_data_err("FAILURE! %s\n", myErrorName(err));
1098 }
1099
1100
1101 /* Testing ucnv_flushCache() */
1102 log_verbose("\n---Testing ucnv_flushCache...\n");
1103 if ((flushCount=ucnv_flushCache())==0)
1104 log_verbose("Flush cache ok\n");
1105 else
1106 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount);
1107
1108 /*testing ucnv_close() and ucnv_flushCache() */
1109 ucnv_close(someConverters[0]);
1110 ucnv_close(someConverters[1]);
1111
1112 if ((flushCount=ucnv_flushCache())==0)
1113 log_verbose("Flush cache ok\n");
1114 else
1115 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount);
1116
1117 ucnv_close(someConverters[2]);
1118 ucnv_close(someConverters[3]);
1119
1120 if ((flushCount=ucnv_flushCache())==2)
1121 log_verbose("Flush cache ok\n"); /*because first, second and third are same */
1122 else
1123 log_data_err("Flush Cache failed line %d, got %d expected 2 or there is an error in ucnv_close()\n",
1124 __LINE__,
1125 flushCount);
1126
1127 ucnv_close(someConverters[4]);
1128 if ( (flushCount=ucnv_flushCache())==1)
1129 log_verbose("Flush cache ok\n");
1130 else
1131 log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__, flushCount);
1132 #endif
1133 }
1134
1135 /**
1136 * Test the converter alias API, specifically the fuzzy matching of
1137 * alias names and the alias table integrity. Make sure each
1138 * converter has at least one alias (itself), and that its listed
1139 * aliases map back to itself. Check some hard-coded UTF-8 and
1140 * ISO_2022 aliases to make sure they work.
1141 */
1142 static void TestAlias() {
1143 int32_t i, ncnv;
1144 UErrorCode status = U_ZERO_ERROR;
1145
1146 /* Predetermined aliases that we expect to map back to ISO_2022
1147 * and UTF-8. UPDATE THIS DATA AS NECESSARY. */
1148 const char* ISO_2022_NAMES[] =
1149 {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2",
1150 "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"};
1151 int32_t ISO_2022_NAMES_LENGTH =
1152 sizeof(ISO_2022_NAMES) / sizeof(ISO_2022_NAMES[0]);
1153 const char *UTF8_NAMES[] =
1154 { "UTF-8", "utf-8", "utf8", "ibm-1208",
1155 "utf_8", "ibm1208", "cp1208" };
1156 int32_t UTF8_NAMES_LENGTH =
1157 sizeof(UTF8_NAMES) / sizeof(UTF8_NAMES[0]);
1158
1159 struct {
1160 const char *name;
1161 const char *alias;
1162 } CONVERTERS_NAMES[] = {
1163 { "UTF-32BE", "UTF32_BigEndian" },
1164 { "UTF-32LE", "UTF32_LittleEndian" },
1165 { "UTF-32", "ISO-10646-UCS-4" },
1166 { "UTF32_PlatformEndian", "UTF32_PlatformEndian" },
1167 { "UTF-32", "ucs-4" }
1168 };
1169 int32_t CONVERTERS_NAMES_LENGTH = sizeof(CONVERTERS_NAMES) / sizeof(*CONVERTERS_NAMES);
1170
1171 /* When there are bugs in gencnval or in ucnv_io, converters can
1172 appear to have no aliases. */
1173 ncnv = ucnv_countAvailable();
1174 log_verbose("%d converters\n", ncnv);
1175 for (i=0; i<ncnv; ++i) {
1176 const char *name = ucnv_getAvailableName(i);
1177 const char *alias0;
1178 uint16_t na = ucnv_countAliases(name, &status);
1179 uint16_t j;
1180 UConverter *cnv;
1181
1182 if (na == 0) {
1183 log_err("FAIL: Converter \"%s\" (i=%d)"
1184 " has no aliases; expect at least one\n",
1185 name, i);
1186 continue;
1187 }
1188 cnv = ucnv_open(name, &status);
1189 if (U_FAILURE(status)) {
1190 log_data_err("FAIL: Converter \"%s\" (i=%d)"
1191 " can't be opened.\n",
1192 name, i);
1193 }
1194 else {
1195 if (strcmp(ucnv_getName(cnv, &status), name) != 0
1196 && (strstr(name, "PlatformEndian") == 0 && strstr(name, "OppositeEndian") == 0)) {
1197 log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. "
1198 "The should be the same\n",
1199 name, ucnv_getName(cnv, &status));
1200 }
1201 }
1202 ucnv_close(cnv);
1203
1204 status = U_ZERO_ERROR;
1205 alias0 = ucnv_getAlias(name, 0, &status);
1206 for (j=1; j<na; ++j) {
1207 const char *alias;
1208 /* Make sure each alias maps back to the the same list of
1209 aliases. Assume that if alias 0 is the same, the whole
1210 list is the same (this should always be true). */
1211 const char *mapBack;
1212
1213 status = U_ZERO_ERROR;
1214 alias = ucnv_getAlias(name, j, &status);
1215 if (status == U_AMBIGUOUS_ALIAS_WARNING) {
1216 log_err("FAIL: Converter \"%s\"is ambiguous\n", name);
1217 }
1218
1219 if (alias == NULL) {
1220 log_err("FAIL: Converter \"%s\" -> "
1221 "alias[%d]=NULL\n",
1222 name, j);
1223 continue;
1224 }
1225
1226 mapBack = ucnv_getAlias(alias, 0, &status);
1227
1228 if (mapBack == NULL) {
1229 log_err("FAIL: Converter \"%s\" -> "
1230 "alias[%d]=\"%s\" -> "
1231 "alias[0]=NULL, exp. \"%s\"\n",
1232 name, j, alias, alias0);
1233 continue;
1234 }
1235
1236 if (0 != strcmp(alias0, mapBack)) {
1237 int32_t idx;
1238 UBool foundAlias = FALSE;
1239 if (status == U_AMBIGUOUS_ALIAS_WARNING) {
1240 /* Make sure that we only get this mismapping when there is
1241 an ambiguous alias, and the other converter has this alias too. */
1242 for (idx = 0; idx < ucnv_countAliases(mapBack, &status); idx++) {
1243 if (strcmp(ucnv_getAlias(mapBack, (uint16_t)idx, &status), alias) == 0) {
1244 foundAlias = TRUE;
1245 break;
1246 }
1247 }
1248 }
1249 /* else not ambiguous, and this is a real problem. foundAlias = FALSE */
1250
1251 if (!foundAlias) {
1252 log_err("FAIL: Converter \"%s\" -> "
1253 "alias[%d]=\"%s\" -> "
1254 "alias[0]=\"%s\", exp. \"%s\"\n",
1255 name, j, alias, mapBack, alias0);
1256 }
1257 }
1258 }
1259 }
1260
1261
1262 /* Check a list of predetermined aliases that we expect to map
1263 * back to ISO_2022 and UTF-8. */
1264 for (i=1; i<ISO_2022_NAMES_LENGTH; ++i) {
1265 const char* mapBack = ucnv_getAlias(ISO_2022_NAMES[i], 0, &status);
1266 if(!mapBack) {
1267 log_data_err("Couldn't get alias for %s. You probably have no data\n", ISO_2022_NAMES[i]);
1268 continue;
1269 }
1270 if (0 != strcmp(mapBack, ISO_2022_NAMES[0])) {
1271 log_err("FAIL: \"%s\" -> \"%s\", expect \"ISO_2022,locale=ja,version=2\"\n",
1272 ISO_2022_NAMES[i], mapBack);
1273 }
1274 }
1275
1276
1277 for (i=1; i<UTF8_NAMES_LENGTH; ++i) {
1278 const char* mapBack = ucnv_getAlias(UTF8_NAMES[i], 0, &status);
1279 if(!mapBack) {
1280 log_data_err("Couldn't get alias for %s. You probably have no data\n", UTF8_NAMES[i]);
1281 continue;
1282 }
1283 if (mapBack && 0 != strcmp(mapBack, UTF8_NAMES[0])) {
1284 log_err("FAIL: \"%s\" -> \"%s\", expect UTF-8\n",
1285 UTF8_NAMES[i], mapBack);
1286 }
1287 }
1288
1289 /*
1290 * Check a list of predetermined aliases that we expect to map
1291 * back to predermined converter names.
1292 */
1293
1294 for (i = 0; i < CONVERTERS_NAMES_LENGTH; ++i) {
1295 const char* mapBack = ucnv_getAlias(CONVERTERS_NAMES[i].alias, 0, &status);
1296 if(!mapBack) {
1297 log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES[i].name);
1298 continue;
1299 }
1300 if (0 != strcmp(mapBack, CONVERTERS_NAMES[i].name)) {
1301 log_err("FAIL: \"%s\" -> \"%s\", expect %s\n",
1302 CONVERTERS_NAMES[i].alias, mapBack, CONVERTERS_NAMES[i].name);
1303 }
1304 }
1305
1306 }
1307
1308 static void TestDuplicateAlias(void) {
1309 const char *alias;
1310 UErrorCode status = U_ZERO_ERROR;
1311
1312 status = U_ZERO_ERROR;
1313 alias = ucnv_getStandardName("Shift_JIS", "IBM", &status);
1314 if (alias == NULL || strcmp(alias, "ibm-943") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) {
1315 log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias);
1316 }
1317 status = U_ZERO_ERROR;
1318 alias = ucnv_getStandardName("ibm-943", "IANA", &status);
1319 if (alias == NULL || strcmp(alias, "Shift_JIS") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) {
1320 log_data_err("FAIL: Didn't get Shift_JIS for ibm-943 {IANA}. Got %s\n", alias);
1321 }
1322 status = U_ZERO_ERROR;
1323 alias = ucnv_getStandardName("ibm-943_P130-2000", "IANA", &status);
1324 if (alias != NULL || status == U_AMBIGUOUS_ALIAS_WARNING) {
1325 log_data_err("FAIL: Didn't get NULL for ibm-943 {IANA}. Got %s\n", alias);
1326 }
1327 }
1328
1329
1330 /* Test safe clone callback */
1331
1332 static uint32_t TSCC_nextSerial()
1333 {
1334 static uint32_t n = 1;
1335
1336 return (n++);
1337 }
1338
1339 typedef struct
1340 {
1341 uint32_t magic; /* 0xC0FFEE to identify that the object is OK */
1342 uint32_t serial; /* minted from nextSerial, above */
1343 UBool wasClosed; /* close happened on the object */
1344 } TSCCContext;
1345
1346 static TSCCContext *TSCC_clone(TSCCContext *ctx)
1347 {
1348 TSCCContext *newCtx = (TSCCContext *)malloc(sizeof(TSCCContext));
1349
1350 newCtx->serial = TSCC_nextSerial();
1351 newCtx->wasClosed = 0;
1352 newCtx->magic = 0xC0FFEE;
1353
1354 log_verbose("TSCC_clone: %p:%d -> new context %p:%d\n", ctx, ctx->serial, newCtx, newCtx->serial);
1355
1356 return newCtx;
1357 }
1358
1359 #if !UCONFIG_NO_LEGACY_CONVERSION
1360 static void TSCC_fromU(const void *context,
1361 UConverterFromUnicodeArgs *fromUArgs,
1362 const UChar* codeUnits,
1363 int32_t length,
1364 UChar32 codePoint,
1365 UConverterCallbackReason reason,
1366 UErrorCode * err)
1367 {
1368 TSCCContext *ctx = (TSCCContext*)context;
1369 UConverterFromUCallback junkFrom;
1370
1371 log_verbose("TSCC_fromU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, fromUArgs->converter);
1372
1373 if(ctx->magic != 0xC0FFEE) {
1374 log_err("TSCC_fromU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic);
1375 return;
1376 }
1377
1378 if(reason == UCNV_CLONE) {
1379 UErrorCode subErr = U_ZERO_ERROR;
1380 TSCCContext *newCtx;
1381 TSCCContext *junkCtx;
1382 TSCCContext **pjunkCtx = &junkCtx;
1383
1384 /* "recreate" it */
1385 log_verbose("TSCC_fromU: cloning..\n");
1386 newCtx = TSCC_clone(ctx);
1387
1388 if(newCtx == NULL) {
1389 log_err("TSCC_fromU: internal clone failed on %p\n", ctx);
1390 }
1391
1392 /* now, SET it */
1393 ucnv_getFromUCallBack(fromUArgs->converter, &junkFrom, (const void**)pjunkCtx);
1394 ucnv_setFromUCallBack(fromUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr);
1395
1396 if(U_FAILURE(subErr)) {
1397 *err = subErr;
1398 }
1399 }
1400
1401 if(reason == UCNV_CLOSE) {
1402 log_verbose("TSCC_fromU: Context %p:%d closing\n", ctx, ctx->serial);
1403 ctx->wasClosed = TRUE;
1404 }
1405 }
1406
1407 static void TSCC_toU(const void *context,
1408 UConverterToUnicodeArgs *toUArgs,
1409 const char* codeUnits,
1410 int32_t length,
1411 UConverterCallbackReason reason,
1412 UErrorCode * err)
1413 {
1414 TSCCContext *ctx = (TSCCContext*)context;
1415 UConverterToUCallback junkFrom;
1416
1417 log_verbose("TSCC_toU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, toUArgs->converter);
1418
1419 if(ctx->magic != 0xC0FFEE) {
1420 log_err("TSCC_toU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic);
1421 return;
1422 }
1423
1424 if(reason == UCNV_CLONE) {
1425 UErrorCode subErr = U_ZERO_ERROR;
1426 TSCCContext *newCtx;
1427 TSCCContext *junkCtx;
1428 TSCCContext **pjunkCtx = &junkCtx;
1429
1430 /* "recreate" it */
1431 log_verbose("TSCC_toU: cloning..\n");
1432 newCtx = TSCC_clone(ctx);
1433
1434 if(newCtx == NULL) {
1435 log_err("TSCC_toU: internal clone failed on %p\n", ctx);
1436 }
1437
1438 /* now, SET it */
1439 ucnv_getToUCallBack(toUArgs->converter, &junkFrom, (const void**)pjunkCtx);
1440 ucnv_setToUCallBack(toUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr);
1441
1442 if(U_FAILURE(subErr)) {
1443 *err = subErr;
1444 }
1445 }
1446
1447 if(reason == UCNV_CLOSE) {
1448 log_verbose("TSCC_toU: Context %p:%d closing\n", ctx, ctx->serial);
1449 ctx->wasClosed = TRUE;
1450 }
1451 }
1452
1453 static void TSCC_init(TSCCContext *q)
1454 {
1455 q->magic = 0xC0FFEE;
1456 q->serial = TSCC_nextSerial();
1457 q->wasClosed = 0;
1458 }
1459
1460 static void TSCC_print_log(TSCCContext *q, const char *name)
1461 {
1462 if(q==NULL) {
1463 log_verbose("TSCContext: %s is NULL!!\n", name);
1464 } else {
1465 if(q->magic != 0xC0FFEE) {
1466 log_err("TSCCContext: %p:%d's magic is %x, supposed to be 0xC0FFEE\n",
1467 q,q->serial, q->magic);
1468 }
1469 log_verbose("TSCCContext %p:%d=%s - magic %x, %s\n",
1470 q, q->serial, name, q->magic, q->wasClosed?"CLOSED":"open");
1471 }
1472 }
1473
1474 static void TestConvertSafeCloneCallback()
1475 {
1476 UErrorCode err = U_ZERO_ERROR;
1477 TSCCContext from1, to1;
1478 TSCCContext *from2, *from3, *to2, *to3;
1479 TSCCContext **pfrom2 = &from2, **pfrom3 = &from3, **pto2 = &to2, **pto3 = &to3;
1480 char hunk[8192];
1481 int32_t hunkSize = 8192;
1482 UConverterFromUCallback junkFrom;
1483 UConverterToUCallback junkTo;
1484 UConverter *conv1, *conv2 = NULL;
1485
1486 conv1 = ucnv_open("iso-8859-3", &err);
1487
1488 if(U_FAILURE(err)) {
1489 log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err));
1490 return;
1491 }
1492
1493 log_verbose("Opened conv1=%p\n", conv1);
1494
1495 TSCC_init(&from1);
1496 TSCC_init(&to1);
1497
1498 TSCC_print_log(&from1, "from1");
1499 TSCC_print_log(&to1, "to1");
1500
1501 ucnv_setFromUCallBack(conv1, TSCC_fromU, &from1, NULL, NULL, &err);
1502 log_verbose("Set from1 on conv1\n");
1503 TSCC_print_log(&from1, "from1");
1504
1505 ucnv_setToUCallBack(conv1, TSCC_toU, &to1, NULL, NULL, &err);
1506 log_verbose("Set to1 on conv1\n");
1507 TSCC_print_log(&to1, "to1");
1508
1509 conv2 = ucnv_safeClone(conv1, hunk, &hunkSize, &err);
1510 if(U_FAILURE(err)) {
1511 log_err("safeClone failed: %s\n", u_errorName(err));
1512 return;
1513 }
1514 log_verbose("Cloned to conv2=%p.\n", conv2);
1515
1516 /********** from *********************/
1517 ucnv_getFromUCallBack(conv2, &junkFrom, (const void**)pfrom2);
1518 ucnv_getFromUCallBack(conv1, &junkFrom, (const void**)pfrom3);
1519
1520 TSCC_print_log(from2, "from2");
1521 TSCC_print_log(from3, "from3(==from1)");
1522
1523 if(from2 == NULL) {
1524 log_err("FAIL! from2 is null \n");
1525 return;
1526 }
1527
1528 if(from3 == NULL) {
1529 log_err("FAIL! from3 is null \n");
1530 return;
1531 }
1532
1533 if(from3 != (&from1) ) {
1534 log_err("FAIL! conv1's FROM context changed!\n");
1535 }
1536
1537 if(from2 == (&from1) ) {
1538 log_err("FAIL! conv1's FROM context is the same as conv2's!\n");
1539 }
1540
1541 if(from1.wasClosed) {
1542 log_err("FAIL! from1 is closed \n");
1543 }
1544
1545 if(from2->wasClosed) {
1546 log_err("FAIL! from2 was closed\n");
1547 }
1548
1549 /********** to *********************/
1550 ucnv_getToUCallBack(conv2, &junkTo, (const void**)pto2);
1551 ucnv_getToUCallBack(conv1, &junkTo, (const void**)pto3);
1552
1553 TSCC_print_log(to2, "to2");
1554 TSCC_print_log(to3, "to3(==to1)");
1555
1556 if(to2 == NULL) {
1557 log_err("FAIL! to2 is null \n");
1558 return;
1559 }
1560
1561 if(to3 == NULL) {
1562 log_err("FAIL! to3 is null \n");
1563 return;
1564 }
1565
1566 if(to3 != (&to1) ) {
1567 log_err("FAIL! conv1's TO context changed!\n");
1568 }
1569
1570 if(to2 == (&to1) ) {
1571 log_err("FAIL! conv1's TO context is the same as conv2's!\n");
1572 }
1573
1574 if(to1.wasClosed) {
1575 log_err("FAIL! to1 is closed \n");
1576 }
1577
1578 if(to2->wasClosed) {
1579 log_err("FAIL! to2 was closed\n");
1580 }
1581
1582 /*************************************/
1583
1584 ucnv_close(conv1);
1585 log_verbose("ucnv_closed (conv1)\n");
1586 TSCC_print_log(&from1, "from1");
1587 TSCC_print_log(from2, "from2");
1588 TSCC_print_log(&to1, "to1");
1589 TSCC_print_log(to2, "to2");
1590
1591 if(from1.wasClosed == FALSE) {
1592 log_err("FAIL! from1 is NOT closed \n");
1593 }
1594
1595 if(from2->wasClosed) {
1596 log_err("FAIL! from2 was closed\n");
1597 }
1598
1599 if(to1.wasClosed == FALSE) {
1600 log_err("FAIL! to1 is NOT closed \n");
1601 }
1602
1603 if(to2->wasClosed) {
1604 log_err("FAIL! to2 was closed\n");
1605 }
1606
1607 ucnv_close(conv2);
1608 log_verbose("ucnv_closed (conv2)\n");
1609
1610 TSCC_print_log(&from1, "from1");
1611 TSCC_print_log(from2, "from2");
1612
1613 if(from1.wasClosed == FALSE) {
1614 log_err("FAIL! from1 is NOT closed \n");
1615 }
1616
1617 if(from2->wasClosed == FALSE) {
1618 log_err("FAIL! from2 was NOT closed\n");
1619 }
1620
1621 TSCC_print_log(&to1, "to1");
1622 TSCC_print_log(to2, "to2");
1623
1624 if(to1.wasClosed == FALSE) {
1625 log_err("FAIL! to1 is NOT closed \n");
1626 }
1627
1628 if(to2->wasClosed == FALSE) {
1629 log_err("FAIL! to2 was NOT closed\n");
1630 }
1631
1632 if(to2 != (&to1)) {
1633 free(to2); /* to1 is stack based */
1634 }
1635 if(from2 != (&from1)) {
1636 free(from2); /* from1 is stack based */
1637 }
1638 }
1639 #endif
1640
1641 static UBool
1642 containsAnyOtherByte(uint8_t *p, int32_t length, uint8_t b) {
1643 while(length>0) {
1644 if(*p!=b) {
1645 return TRUE;
1646 }
1647 ++p;
1648 --length;
1649 }
1650 return FALSE;
1651 }
1652
1653 static void TestConvertSafeClone()
1654 {
1655 /* one 'regular' & all the 'private stateful' converters */
1656 static const char *const names[] = {
1657 #if !UCONFIG_NO_LEGACY_CONVERSION
1658 "ibm-1047",
1659 "ISO_2022,locale=zh,version=1",
1660 #endif
1661 "SCSU",
1662 #if !UCONFIG_NO_LEGACY_CONVERSION
1663 "HZ",
1664 "lmbcs",
1665 "ISCII,version=0",
1666 "ISO_2022,locale=kr,version=1",
1667 "ISO_2022,locale=jp,version=2",
1668 #endif
1669 "BOCU-1",
1670 "UTF-7",
1671 #if !UCONFIG_NO_LEGACY_CONVERSION
1672 "IMAP-mailbox-name",
1673 "ibm-1047-s390"
1674 #else
1675 "IMAP=mailbox-name"
1676 #endif
1677 };
1678
1679 /* store the actual sizes of each converter */
1680 int32_t actualSizes[LENGTHOF(names)];
1681
1682 static const int32_t bufferSizes[] = {
1683 U_CNV_SAFECLONE_BUFFERSIZE,
1684 (int32_t)(3*sizeof(UConverter))/2, /* 1.5*sizeof(UConverter) */
1685 (int32_t)sizeof(UConverter)/2 /* 0.5*sizeof(UConverter) */
1686 };
1687
1688 char charBuffer[21]; /* Leave at an odd number for alignment testing */
1689 uint8_t buffer[3] [U_CNV_SAFECLONE_BUFFERSIZE];
1690 int32_t bufferSize, maxBufferSize;
1691 const char *maxName;
1692 UConverter * cnv, *cnv2;
1693 UErrorCode err;
1694
1695 char *pCharBuffer;
1696 const char *pConstCharBuffer;
1697 const char *charBufferLimit = charBuffer + sizeof(charBuffer)/sizeof(*charBuffer);
1698 UChar uniBuffer[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */
1699 UChar uniCharBuffer[20];
1700 char charSourceBuffer[] = { 0x1b, 0x24, 0x42 };
1701 const char *pCharSource = charSourceBuffer;
1702 const char *pCharSourceLimit = charSourceBuffer + sizeof(charSourceBuffer);
1703 UChar *pUCharTarget = uniCharBuffer;
1704 UChar *pUCharTargetLimit = uniCharBuffer + sizeof(uniCharBuffer)/sizeof(*uniCharBuffer);
1705 const UChar * pUniBuffer;
1706 const UChar *uniBufferLimit = uniBuffer + sizeof(uniBuffer)/sizeof(*uniBuffer);
1707 int32_t idx, j;
1708
1709 err = U_ZERO_ERROR;
1710 cnv = ucnv_open(names[0], &err);
1711 if(U_SUCCESS(err)) {
1712 /* Check the various error & informational states: */
1713
1714 /* Null status - just returns NULL */
1715 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
1716 if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, 0))
1717 {
1718 log_err("FAIL: Cloned converter failed to deal correctly with null status\n");
1719 }
1720 /* error status - should return 0 & keep error the same */
1721 err = U_MEMORY_ALLOCATION_ERROR;
1722 if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR)
1723 {
1724 log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n");
1725 }
1726 err = U_ZERO_ERROR;
1727
1728 /* Null buffer size pointer - just returns NULL & set error to U_ILLEGAL_ARGUMENT_ERROR*/
1729 if (0 != ucnv_safeClone(cnv, buffer[0], 0, &err) || err != U_ILLEGAL_ARGUMENT_ERROR)
1730 {
1731 log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n");
1732 }
1733 err = U_ZERO_ERROR;
1734
1735 /* buffer size pointer is 0 - fill in pbufferSize with a size */
1736 bufferSize = 0;
1737 if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0)
1738 {
1739 log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n");
1740 }
1741 /* Verify our define is large enough */
1742 if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize)
1743 {
1744 log_err("FAIL: Pre-calculated buffer size is too small\n");
1745 }
1746 /* Verify we can use this run-time calculated size */
1747 if (0 == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err))
1748 {
1749 log_err("FAIL: Converter can't be cloned with run-time size\n");
1750 }
1751 if (cnv2) {
1752 ucnv_close(cnv2);
1753 }
1754
1755 /* size one byte too small - should allocate & let us know */
1756 --bufferSize;
1757 if (0 == (cnv2 = ucnv_safeClone(cnv, 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
1758 {
1759 log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n");
1760 }
1761 if (cnv2) {
1762 ucnv_close(cnv2);
1763 }
1764
1765 err = U_ZERO_ERROR;
1766 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
1767
1768 /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */
1769 if (0 == (cnv2 = ucnv_safeClone(cnv, 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
1770 {
1771 log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n");
1772 }
1773 if (cnv2) {
1774 ucnv_close(cnv2);
1775 }
1776
1777 err = U_ZERO_ERROR;
1778
1779 /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */
1780 if (0 != ucnv_safeClone(0, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR)
1781 {
1782 log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n");
1783 }
1784
1785 ucnv_close(cnv);
1786 }
1787
1788 maxBufferSize = 0;
1789 maxName = "";
1790
1791 /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/
1792
1793 for(j = 0; j < LENGTHOF(bufferSizes); ++j) {
1794 for (idx = 0; idx < LENGTHOF(names); idx++)
1795 {
1796 err = U_ZERO_ERROR;
1797 cnv = ucnv_open(names[idx], &err);
1798 if(U_FAILURE(err)) {
1799 log_data_err("ucnv_open(\"%s\") failed - %s\n", names[idx], u_errorName(err));
1800 continue;
1801 }
1802
1803 if(j == 0) {
1804 /* preflight to get maxBufferSize */
1805 actualSizes[idx] = 0;
1806 ucnv_safeClone(cnv, NULL, &actualSizes[idx], &err);
1807 if(actualSizes[idx] > maxBufferSize) {
1808 maxBufferSize = actualSizes[idx];
1809 maxName = names[idx];
1810 }
1811 }
1812
1813 memset(buffer, 0xaa, sizeof(buffer));
1814
1815 bufferSize = bufferSizes[j];
1816 cnv2 = ucnv_safeClone(cnv, buffer[1], &bufferSize, &err);
1817
1818 /* close the original immediately to make sure that the clone works by itself */
1819 ucnv_close(cnv);
1820
1821 if( actualSizes[idx] <= (bufferSizes[j] - (int32_t)sizeof(UAlignedMemory)) &&
1822 err == U_SAFECLONE_ALLOCATED_WARNING
1823 ) {
1824 log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names[idx]);
1825 }
1826
1827 /* check if the clone function overwrote any bytes that it is not supposed to touch */
1828 if(bufferSize <= bufferSizes[j]) {
1829 /* used the stack buffer */
1830 if( containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer[0]), 0xaa) ||
1831 containsAnyOtherByte(buffer[1]+bufferSize, (int32_t)(sizeof(buffer)-(sizeof(buffer[0])+bufferSize)), 0xaa)
1832 ) {
1833 log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n",
1834 names[idx], bufferSize, bufferSizes[j]);
1835 }
1836 } else {
1837 /* heap-allocated the clone */
1838 if(containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer), 0xaa)) {
1839 log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n",
1840 names[idx], bufferSize, bufferSizes[j]);
1841 }
1842 }
1843
1844 pCharBuffer = charBuffer;
1845 pUniBuffer = uniBuffer;
1846
1847 ucnv_fromUnicode(cnv2,
1848 &pCharBuffer,
1849 charBufferLimit,
1850 &pUniBuffer,
1851 uniBufferLimit,
1852 NULL,
1853 TRUE,
1854 &err);
1855 if(U_FAILURE(err)){
1856 log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err));
1857 }
1858 ucnv_toUnicode(cnv2,
1859 &pUCharTarget,
1860 pUCharTargetLimit,
1861 &pCharSource,
1862 pCharSourceLimit,
1863 NULL,
1864 TRUE,
1865 &err
1866 );
1867
1868 if(U_FAILURE(err)){
1869 log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err));
1870 }
1871
1872 pConstCharBuffer = charBuffer;
1873 if (uniBuffer [0] != ucnv_getNextUChar(cnv2, &pConstCharBuffer, pCharBuffer, &err))
1874 {
1875 log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err));
1876 }
1877 ucnv_close(cnv2);
1878 }
1879 }
1880
1881 log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1882 sizeof(UConverter), maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE);
1883 if(maxBufferSize > U_CNV_SAFECLONE_BUFFERSIZE) {
1884 log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1885 maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE);
1886 }
1887 }
1888
1889 static void TestCCSID() {
1890 #if !UCONFIG_NO_LEGACY_CONVERSION
1891 UConverter *cnv;
1892 UErrorCode errorCode;
1893 int32_t ccsids[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 };
1894 int32_t i, ccsid;
1895
1896 for(i=0; i<(int32_t)(sizeof(ccsids)/sizeof(int32_t)); ++i) {
1897 ccsid=ccsids[i];
1898
1899 errorCode=U_ZERO_ERROR;
1900 cnv=ucnv_openCCSID(ccsid, UCNV_IBM, &errorCode);
1901 if(U_FAILURE(errorCode)) {
1902 log_data_err("error: ucnv_openCCSID(%ld) failed (%s)\n", ccsid, u_errorName(errorCode));
1903 continue;
1904 }
1905
1906 if(ccsid!=ucnv_getCCSID(cnv, &errorCode)) {
1907 log_err("error: ucnv_getCCSID(ucnv_openCCSID(%ld))=%ld\n", ccsid, ucnv_getCCSID(cnv, &errorCode));
1908 }
1909
1910 /* skip gb18030(ccsid 1392) */
1911 if(ccsid != 1392 && UCNV_IBM!=ucnv_getPlatform(cnv, &errorCode)) {
1912 log_err("error: ucnv_getPlatform(ucnv_openCCSID(%ld))=%ld!=UCNV_IBM\n", ccsid, ucnv_getPlatform(cnv, &errorCode));
1913 }
1914
1915 ucnv_close(cnv);
1916 }
1917 #endif
1918 }
1919
1920 /* jitterbug 932: ucnv_convert() bugs --------------------------------------- */
1921
1922 /* CHUNK_SIZE defined in common\ucnv.c: */
1923 #define CHUNK_SIZE 1024
1924
1925 static void bug1(void);
1926 static void bug2(void);
1927 static void bug3(void);
1928
1929 static void
1930 TestJ932(void)
1931 {
1932 bug1(); /* Unicode intermediate buffer straddle bug */
1933 bug2(); /* pre-flighting size incorrect caused by simple overflow */
1934 bug3(); /* pre-flighting size incorrect caused by expansion overflow */
1935 }
1936
1937 /*
1938 * jitterbug 932: test chunking boundary conditions in
1939
1940 int32_t ucnv_convert(const char *toConverterName,
1941 const char *fromConverterName,
1942 char *target,
1943 int32_t targetSize,
1944 const char *source,
1945 int32_t sourceSize,
1946 UErrorCode * err)
1947
1948 * See discussions on the icu mailing list in
1949 * 2001-April with the subject "converter 'flush' question".
1950 *
1951 * Bug report and test code provided by Edward J. Batutis.
1952 */
1953 static void bug1()
1954 {
1955 #if !UCONFIG_NO_LEGACY_CONVERSION
1956 char char_in[CHUNK_SIZE+32];
1957 char char_out[CHUNK_SIZE*2];
1958
1959 /* GB 18030 equivalent of U+10000 is 90308130 */
1960 static const char test_seq[]={ (char)0x90u, 0x30, (char)0x81u, 0x30 };
1961
1962 UErrorCode err = U_ZERO_ERROR;
1963 int32_t i, test_seq_len = sizeof(test_seq);
1964
1965 /*
1966 * causes straddle bug in Unicode intermediate buffer by sliding the test sequence forward
1967 * until the straddle bug appears. I didn't want to hard-code everything so this test could
1968 * be expanded - however this is the only type of straddle bug I can think of at the moment -
1969 * a high surrogate in the last position of the Unicode intermediate buffer. Apparently no
1970 * other Unicode sequences cause a bug since combining sequences are not supported by the
1971 * converters.
1972 */
1973
1974 for (i = test_seq_len; i >= 0; i--) {
1975 /* put character sequence into input buffer */
1976 memset(char_in, 0x61, sizeof(char_in)); /* GB 18030 'a' */
1977 memcpy(char_in + (CHUNK_SIZE - i), test_seq, test_seq_len);
1978
1979 /* do the conversion */
1980 ucnv_convert("us-ascii", /* out */
1981 "gb18030", /* in */
1982 char_out,
1983 sizeof(char_out),
1984 char_in,
1985 sizeof(char_in),
1986 &err);
1987
1988 /* bug1: */
1989 if (err == U_TRUNCATED_CHAR_FOUND) {
1990 /* this happens when surrogate pair straddles the intermediate buffer in
1991 * T_UConverter_fromCodepageToCodepage */
1992 log_err("error j932 bug 1: expected success, got U_TRUNCATED_CHAR_FOUND\n");
1993 }
1994 }
1995 #endif
1996 }
1997
1998 /* bug2: pre-flighting loop bug: simple overflow causes bug */
1999 static void bug2()
2000 {
2001 /* US-ASCII "1234567890" */
2002 static const char source[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 };
2003 static const char sourceUTF8[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (char)0xef, (char)0x80, (char)0x80 };
2004 static const char sourceUTF32[]={ 0x00, 0x00, 0x00, 0x30,
2005 0x00, 0x00, 0x00, 0x31,
2006 0x00, 0x00, 0x00, 0x32,
2007 0x00, 0x00, 0x00, 0x33,
2008 0x00, 0x00, 0x00, 0x34,
2009 0x00, 0x00, 0x00, 0x35,
2010 0x00, 0x00, 0x00, 0x36,
2011 0x00, 0x00, 0x00, 0x37,
2012 0x00, 0x00, 0x00, 0x38,
2013 0x00, 0x00, (char)0xf0, 0x00};
2014 static char target[5];
2015
2016 UErrorCode err = U_ZERO_ERROR;
2017 int32_t size;
2018
2019 /* do the conversion */
2020 size = ucnv_convert("iso-8859-1", /* out */
2021 "us-ascii", /* in */
2022 target,
2023 sizeof(target),
2024 source,
2025 sizeof(source),
2026 &err);
2027
2028 if ( size != 10 ) {
2029 /* bug2: size is 5, should be 10 */
2030 log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting size %d instead of 10\n", size);
2031 }
2032
2033 err = U_ZERO_ERROR;
2034 /* do the conversion */
2035 size = ucnv_convert("UTF-32BE", /* out */
2036 "UTF-8", /* in */
2037 target,
2038 sizeof(target),
2039 sourceUTF8,
2040 sizeof(sourceUTF8),
2041 &err);
2042
2043 if ( size != 32 ) {
2044 /* bug2: size is 5, should be 32 */
2045 log_err("error j932 bug 2 UTF-8->UTF-32BE: got preflighting size %d instead of 32\n", size);
2046 }
2047
2048 err = U_ZERO_ERROR;
2049 /* do the conversion */
2050 size = ucnv_convert("UTF-8", /* out */
2051 "UTF-32BE", /* in */
2052 target,
2053 sizeof(target),
2054 sourceUTF32,
2055 sizeof(sourceUTF32),
2056 &err);
2057
2058 if ( size != 12 ) {
2059 /* bug2: size is 5, should be 12 */
2060 log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size);
2061 }
2062 }
2063
2064 /*
2065 * bug3: when the characters expand going from source to target codepage
2066 * you get bug3 in addition to bug2
2067 */
2068 static void bug3()
2069 {
2070 #if !UCONFIG_NO_LEGACY_CONVERSION
2071 char char_in[CHUNK_SIZE*4];
2072 char target[5];
2073 UErrorCode err = U_ZERO_ERROR;
2074 int32_t size;
2075
2076 /*
2077 * first get the buggy size from bug2 then
2078 * compare it to buggy size with an expansion
2079 */
2080 memset(char_in, 0x61, sizeof(char_in)); /* US-ASCII 'a' */
2081
2082 /* do the conversion */
2083 size = ucnv_convert("lmbcs", /* out */
2084 "us-ascii", /* in */
2085 target,
2086 sizeof(target),
2087 char_in,
2088 sizeof(char_in),
2089 &err);
2090
2091 if ( size != sizeof(char_in) ) {
2092 /*
2093 * bug2: size is 0x2805 (CHUNK_SIZE*2+5 - maybe 5 is the size of the overflow buffer
2094 * in the converter?), should be CHUNK_SIZE*4
2095 *
2096 * Markus 2001-05-18: 5 is the size of our target[] here, ucnv_convert() did not reset targetSize...
2097 */
2098 log_data_err("error j932 bug 2/3a: expected preflighting size 0x%04x, got 0x%04x\n", sizeof(char_in), size);
2099 }
2100
2101 /*
2102 * now do the conversion with expansion
2103 * ascii 0x08 expands to 0x0F 0x28 in lmbcs
2104 */
2105 memset(char_in, 8, sizeof(char_in));
2106 err = U_ZERO_ERROR;
2107
2108 /* do the conversion */
2109 size = ucnv_convert("lmbcs", /* out */
2110 "us-ascii", /* in */
2111 target,
2112 sizeof(target),
2113 char_in,
2114 sizeof(char_in),
2115 &err);
2116
2117 /* expect 2X expansion */
2118 if ( size != sizeof(char_in) * 2 ) {
2119 /*
2120 * bug3:
2121 * bug2 would lead us to expect 0x2805, but it isn't that either, it is 0x3c05:
2122 */
2123 log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in) * 2, size);
2124 }
2125 #endif
2126 }
2127
2128 static void
2129 convertExStreaming(UConverter *srcCnv, UConverter *targetCnv,
2130 const char *src, int32_t srcLength,
2131 const char *expectTarget, int32_t expectTargetLength,
2132 int32_t chunkSize,
2133 const char *testName,
2134 UErrorCode expectCode) {
2135 UChar pivotBuffer[CHUNK_SIZE];
2136 UChar *pivotSource, *pivotTarget;
2137 const UChar *pivotLimit;
2138
2139 char targetBuffer[CHUNK_SIZE];
2140 char *target;
2141 const char *srcLimit, *finalSrcLimit, *targetLimit;
2142
2143 int32_t targetLength;
2144
2145 UBool flush;
2146
2147 UErrorCode errorCode;
2148
2149 /* setup */
2150 if(chunkSize>CHUNK_SIZE) {
2151 chunkSize=CHUNK_SIZE;
2152 }
2153
2154 pivotSource=pivotTarget=pivotBuffer;
2155 pivotLimit=pivotBuffer+chunkSize;
2156
2157 finalSrcLimit=src+srcLength;
2158 target=targetBuffer;
2159 targetLimit=targetBuffer+chunkSize;
2160
2161 ucnv_resetToUnicode(srcCnv);
2162 ucnv_resetFromUnicode(targetCnv);
2163
2164 errorCode=U_ZERO_ERROR;
2165 flush=FALSE;
2166
2167 /* convert, streaming-style (both converters and pivot keep state) */
2168 for(;;) {
2169 /* for testing, give ucnv_convertEx() at most <chunkSize> input/pivot/output units at a time */
2170 if(src+chunkSize<=finalSrcLimit) {
2171 srcLimit=src+chunkSize;
2172 } else {
2173 srcLimit=finalSrcLimit;
2174 }
2175 ucnv_convertEx(targetCnv, srcCnv,
2176 &target, targetLimit,
2177 &src, srcLimit,
2178 pivotBuffer, &pivotSource, &pivotTarget, pivotLimit,
2179 FALSE, flush, &errorCode);
2180 targetLength=(int32_t)(target-targetBuffer);
2181 if(target>targetLimit) {
2182 log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n",
2183 testName, chunkSize, target, targetLimit);
2184 break; /* TODO: major problem! */
2185 }
2186 if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
2187 /* continue converting another chunk */
2188 errorCode=U_ZERO_ERROR;
2189 if(targetLength+chunkSize<=sizeof(targetBuffer)) {
2190 targetLimit=target+chunkSize;
2191 } else {
2192 targetLimit=targetBuffer+sizeof(targetBuffer);
2193 }
2194 } else if(U_FAILURE(errorCode)) {
2195 /* failure */
2196 break;
2197 } else if(flush) {
2198 /* all done */
2199 break;
2200 } else if(src==finalSrcLimit && pivotSource==pivotTarget) {
2201 /* all consumed, now flush without input (separate from conversion for testing) */
2202 flush=TRUE;
2203 }
2204 }
2205
2206 if(!(errorCode==expectCode || (expectCode==U_ZERO_ERROR && errorCode==U_STRING_NOT_TERMINATED_WARNING))) {
2207 log_err("ucnv_convertEx(%s) chunk[%d] results in %s instead of %s\n",
2208 testName, chunkSize, u_errorName(errorCode), u_errorName(expectCode));
2209 } else if(targetLength!=expectTargetLength) {
2210 log_err("ucnv_convertEx(%s) chunk[%d] writes %d bytes instead of %d\n",
2211 testName, chunkSize, targetLength, expectTargetLength);
2212 } else if(memcmp(targetBuffer, expectTarget, targetLength)!=0) {
2213 log_err("ucnv_convertEx(%s) chunk[%d] writes different bytes than expected\n",
2214 testName, chunkSize);
2215 }
2216 }
2217
2218 static void
2219 convertExMultiStreaming(UConverter *srcCnv, UConverter *targetCnv,
2220 const char *src, int32_t srcLength,
2221 const char *expectTarget, int32_t expectTargetLength,
2222 const char *testName,
2223 UErrorCode expectCode) {
2224 convertExStreaming(srcCnv, targetCnv,
2225 src, srcLength,
2226 expectTarget, expectTargetLength,
2227 1, testName, expectCode);
2228 convertExStreaming(srcCnv, targetCnv,
2229 src, srcLength,
2230 expectTarget, expectTargetLength,
2231 3, testName, expectCode);
2232 convertExStreaming(srcCnv, targetCnv,
2233 src, srcLength,
2234 expectTarget, expectTargetLength,
2235 7, testName, expectCode);
2236 }
2237
2238 static void TestConvertEx() {
2239 #if !UCONFIG_NO_LEGACY_CONVERSION
2240 static const uint8_t
2241 utf8[]={
2242 /* 4e00 30a1 ff61 0410 */
2243 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2244 },
2245 shiftJIS[]={
2246 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2247 },
2248 errorTarget[]={
2249 /*
2250 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2251 * SUB, SUB, 0x40, SUB, SUB, 0x40
2252 */
2253 0xfc, 0xfc, 0xfc, 0xfc, 0x40, 0xfc, 0xfc, 0xfc, 0xfc, 0x40
2254 };
2255
2256 char srcBuffer[100], targetBuffer[100];
2257
2258 const char *src;
2259 char *target;
2260
2261 UChar pivotBuffer[100];
2262 UChar *pivotSource, *pivotTarget;
2263
2264 UConverter *cnv1, *cnv2;
2265 UErrorCode errorCode;
2266
2267 errorCode=U_ZERO_ERROR;
2268 cnv1=ucnv_open("UTF-8", &errorCode);
2269 if(U_FAILURE(errorCode)) {
2270 log_err("unable to open a UTF-8 converter - %s\n", u_errorName(errorCode));
2271 return;
2272 }
2273
2274 cnv2=ucnv_open("Shift-JIS", &errorCode);
2275 if(U_FAILURE(errorCode)) {
2276 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode));
2277 ucnv_close(cnv1);
2278 return;
2279 }
2280
2281 /* test ucnv_convertEx() with streaming conversion style */
2282 convertExMultiStreaming(cnv1, cnv2,
2283 (const char *)utf8, sizeof(utf8), (const char *)shiftJIS, sizeof(shiftJIS),
2284 "UTF-8 -> Shift-JIS", U_ZERO_ERROR);
2285
2286 convertExMultiStreaming(cnv2, cnv1,
2287 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8),
2288 "Shift-JIS -> UTF-8", U_ZERO_ERROR);
2289
2290 /* U_ZERO_ERROR because by default the SUB callbacks are set */
2291 convertExMultiStreaming(cnv1, cnv2,
2292 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)errorTarget, sizeof(errorTarget),
2293 "shiftJIS[] UTF-8 -> Shift-JIS", U_ZERO_ERROR);
2294
2295 /* test some simple conversions */
2296
2297 /* NUL-terminated source and target */
2298 errorCode=U_STRING_NOT_TERMINATED_WARNING;
2299 memcpy(srcBuffer, utf8, sizeof(utf8));
2300 srcBuffer[sizeof(utf8)]=0;
2301 src=srcBuffer;
2302 target=targetBuffer;
2303 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2304 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode);
2305 if( errorCode!=U_ZERO_ERROR ||
2306 target-targetBuffer!=sizeof(shiftJIS) ||
2307 *target!=0 ||
2308 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0
2309 ) {
2310 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s - writes %d bytes, expect %d\n",
2311 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS));
2312 }
2313
2314 /* NUL-terminated source and U_STRING_NOT_TERMINATED_WARNING */
2315 errorCode=U_AMBIGUOUS_ALIAS_WARNING;
2316 memset(targetBuffer, 0xff, sizeof(targetBuffer));
2317 src=srcBuffer;
2318 target=targetBuffer;
2319 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(shiftJIS), &src, NULL,
2320 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode);
2321 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
2322 target-targetBuffer!=sizeof(shiftJIS) ||
2323 *target!=(char)0xff ||
2324 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0
2325 ) {
2326 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s, expect U_STRING_NOT_TERMINATED_WARNING - writes %d bytes, expect %d\n",
2327 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS));
2328 }
2329
2330 /* bad arguments */
2331 errorCode=U_MESSAGE_PARSE_ERROR;
2332 src=srcBuffer;
2333 target=targetBuffer;
2334 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2335 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode);
2336 if(errorCode!=U_MESSAGE_PARSE_ERROR) {
2337 log_err("ucnv_convertEx(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode));
2338 }
2339
2340 /* pivotLimit==pivotStart */
2341 errorCode=U_ZERO_ERROR;
2342 pivotSource=pivotTarget=pivotBuffer;
2343 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2344 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer, TRUE, TRUE, &errorCode);
2345 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2346 log_err("ucnv_convertEx(pivotLimit==pivotStart) sets %s\n", u_errorName(errorCode));
2347 }
2348
2349 /* *pivotSource==NULL */
2350 errorCode=U_ZERO_ERROR;
2351 pivotSource=NULL;
2352 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2353 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode);
2354 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2355 log_err("ucnv_convertEx(*pivotSource==NULL) sets %s\n", u_errorName(errorCode));
2356 }
2357
2358 /* *source==NULL */
2359 errorCode=U_ZERO_ERROR;
2360 src=NULL;
2361 pivotSource=pivotBuffer;
2362 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2363 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode);
2364 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2365 log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode));
2366 }
2367
2368 /* streaming conversion without a pivot buffer */
2369 errorCode=U_ZERO_ERROR;
2370 src=srcBuffer;
2371 pivotSource=pivotBuffer;
2372 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2373 NULL, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, FALSE, &errorCode);
2374 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2375 log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode));
2376 }
2377
2378 ucnv_close(cnv1);
2379 ucnv_close(cnv2);
2380 #endif
2381 }
2382
2383 /* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */
2384 static const char *const badUTF8[]={
2385 /* trail byte */
2386 "\x80",
2387
2388 /* truncated multi-byte sequences */
2389 "\xd0",
2390 "\xe0",
2391 "\xe1",
2392 "\xed",
2393 "\xee",
2394 "\xf0",
2395 "\xf1",
2396 "\xf4",
2397 "\xf8",
2398 "\xfc",
2399
2400 "\xe0\x80",
2401 "\xe0\xa0",
2402 "\xe1\x80",
2403 "\xed\x80",
2404 "\xed\xa0",
2405 "\xee\x80",
2406 "\xf0\x80",
2407 "\xf0\x90",
2408 "\xf1\x80",
2409 "\xf4\x80",
2410 "\xf4\x90",
2411 "\xf8\x80",
2412 "\xfc\x80",
2413
2414 "\xf0\x80\x80",
2415 "\xf0\x90\x80",
2416 "\xf1\x80\x80",
2417 "\xf4\x80\x80",
2418 "\xf4\x90\x80",
2419 "\xf8\x80\x80",
2420 "\xfc\x80\x80",
2421
2422 "\xf8\x80\x80\x80",
2423 "\xfc\x80\x80\x80",
2424
2425 "\xfc\x80\x80\x80\x80",
2426
2427 /* complete sequences but non-shortest forms or out of range etc. */
2428 "\xc0\x80",
2429 "\xe0\x80\x80",
2430 "\xed\xa0\x80",
2431 "\xf0\x80\x80\x80",
2432 "\xf4\x90\x80\x80",
2433 "\xf8\x80\x80\x80\x80",
2434 "\xfc\x80\x80\x80\x80\x80",
2435 "\xfe",
2436 "\xff"
2437 };
2438
2439 #define ARG_CHAR_ARR_SIZE 8
2440
2441 /* get some character that can be converted and convert it */
2442 static UBool getTestChar(UConverter *cnv, const char *converterName,
2443 char charUTF8[4], int32_t *pCharUTF8Length,
2444 char char0[ARG_CHAR_ARR_SIZE], int32_t *pChar0Length,
2445 char char1[ARG_CHAR_ARR_SIZE], int32_t *pChar1Length) {
2446 UChar utf16[U16_MAX_LENGTH];
2447 int32_t utf16Length;
2448
2449 const UChar *utf16Source;
2450 char *target;
2451
2452 USet *set;
2453 UChar32 c;
2454 UErrorCode errorCode;
2455
2456 errorCode=U_ZERO_ERROR;
2457 set=uset_open(1, 0);
2458 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
2459 c=uset_charAt(set, uset_size(set)/2);
2460 uset_close(set);
2461
2462 utf16Length=0;
2463 U16_APPEND_UNSAFE(utf16, utf16Length, c);
2464 *pCharUTF8Length=0;
2465 U8_APPEND_UNSAFE(charUTF8, *pCharUTF8Length, c);
2466
2467 utf16Source=utf16;
2468 target=char0;
2469 ucnv_fromUnicode(cnv,
2470 &target, char0+ARG_CHAR_ARR_SIZE,
2471 &utf16Source, utf16+utf16Length,
2472 NULL, FALSE, &errorCode);
2473 *pChar0Length=(int32_t)(target-char0);
2474
2475 utf16Source=utf16;
2476 target=char1;
2477 ucnv_fromUnicode(cnv,
2478 &target, char1+ARG_CHAR_ARR_SIZE,
2479 &utf16Source, utf16+utf16Length,
2480 NULL, FALSE, &errorCode);
2481 *pChar1Length=(int32_t)(target-char1);
2482
2483 if(U_FAILURE(errorCode)) {
2484 log_err("unable to get test character for %s - %s\n", converterName, u_errorName(errorCode));
2485 return FALSE;
2486 }
2487 return TRUE;
2488 }
2489
2490 static void testFromTruncatedUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName,
2491 char charUTF8[4], int32_t charUTF8Length,
2492 char char0[8], int32_t char0Length,
2493 char char1[8], int32_t char1Length) {
2494 char utf8[16];
2495 int32_t utf8Length;
2496
2497 char output[16];
2498 int32_t outputLength;
2499
2500 char invalidChars[8];
2501 int8_t invalidLength;
2502
2503 const char *source;
2504 char *target;
2505
2506 UChar pivotBuffer[8];
2507 UChar *pivotSource, *pivotTarget;
2508
2509 UErrorCode errorCode;
2510 int32_t i;
2511
2512 /* test truncated sequences */
2513 errorCode=U_ZERO_ERROR;
2514 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2515
2516 memcpy(utf8, charUTF8, charUTF8Length);
2517
2518 for(i=0; i<LENGTHOF(badUTF8); ++i) {
2519 /* truncated sequence? */
2520 int32_t length=strlen(badUTF8[i]);
2521 if(length>=(1+U8_COUNT_TRAIL_BYTES(badUTF8[i][0]))) {
2522 continue;
2523 }
2524
2525 /* assemble a string with the test character and the truncated sequence */
2526 memcpy(utf8+charUTF8Length, badUTF8[i], length);
2527 utf8Length=charUTF8Length+length;
2528
2529 /* convert and check the invalidChars */
2530 source=utf8;
2531 target=output;
2532 pivotSource=pivotTarget=pivotBuffer;
2533 errorCode=U_ZERO_ERROR;
2534 ucnv_convertEx(cnv, utf8Cnv,
2535 &target, output+sizeof(output),
2536 &source, utf8+utf8Length,
2537 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+LENGTHOF(pivotBuffer),
2538 TRUE, TRUE, /* reset & flush */
2539 &errorCode);
2540 outputLength=(int32_t)(target-output);
2541 if(errorCode!=U_TRUNCATED_CHAR_FOUND || pivotSource!=pivotBuffer) {
2542 log_err("unexpected error %s from %s badUTF8[%ld]\n", u_errorName(errorCode), converterName, (long)i);
2543 continue;
2544 }
2545
2546 errorCode=U_ZERO_ERROR;
2547 invalidLength=(int8_t)sizeof(invalidChars);
2548 ucnv_getInvalidChars(utf8Cnv, invalidChars, &invalidLength, &errorCode);
2549 if(invalidLength!=length || 0!=memcmp(invalidChars, badUTF8[i], length)) {
2550 log_err("wrong invalidChars from %s badUTF8[%ld]\n", converterName, (long)i);
2551 }
2552 }
2553 }
2554
2555 static void testFromBadUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName,
2556 char charUTF8[4], int32_t charUTF8Length,
2557 char char0[8], int32_t char0Length,
2558 char char1[8], int32_t char1Length) {
2559 char utf8[600], expect[600];
2560 int32_t utf8Length, expectLength;
2561
2562 char testName[32];
2563
2564 UErrorCode errorCode;
2565 int32_t i;
2566
2567 errorCode=U_ZERO_ERROR;
2568 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, NULL, &errorCode);
2569
2570 /*
2571 * assemble an input string with the test character between each
2572 * bad sequence,
2573 * and an expected string with repeated test character output
2574 */
2575 memcpy(utf8, charUTF8, charUTF8Length);
2576 utf8Length=charUTF8Length;
2577
2578 memcpy(expect, char0, char0Length);
2579 expectLength=char0Length;
2580
2581 for(i=0; i<LENGTHOF(badUTF8); ++i) {
2582 int32_t length=strlen(badUTF8[i]);
2583 memcpy(utf8+utf8Length, badUTF8[i], length);
2584 utf8Length+=length;
2585
2586 memcpy(utf8+utf8Length, charUTF8, charUTF8Length);
2587 utf8Length+=charUTF8Length;
2588
2589 memcpy(expect+expectLength, char1, char1Length);
2590 expectLength+=char1Length;
2591 }
2592
2593 /* expect that each bad UTF-8 sequence is detected and skipped */
2594 strcpy(testName, "from bad UTF-8 to ");
2595 strcat(testName, converterName);
2596
2597 convertExMultiStreaming(utf8Cnv, cnv,
2598 utf8, utf8Length,
2599 expect, expectLength,
2600 testName,
2601 U_ZERO_ERROR);
2602 }
2603
2604 /* Test illegal UTF-8 input. */
2605 static void TestConvertExFromUTF8() {
2606 static const char *const converterNames[]={
2607 #if !UCONFIG_NO_LEGACY_CONVERSION
2608 "windows-1252",
2609 "shift-jis",
2610 #endif
2611 "us-ascii",
2612 "iso-8859-1",
2613 "utf-8"
2614 };
2615
2616 UConverter *utf8Cnv, *cnv;
2617 UErrorCode errorCode;
2618 int32_t i;
2619
2620 /* fromUnicode versions of some character, from initial state and later */
2621 char charUTF8[4], char0[8], char1[8];
2622 int32_t charUTF8Length, char0Length, char1Length;
2623
2624 errorCode=U_ZERO_ERROR;
2625 utf8Cnv=ucnv_open("UTF-8", &errorCode);
2626 if(U_FAILURE(errorCode)) {
2627 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode));
2628 return;
2629 }
2630
2631 for(i=0; i<LENGTHOF(converterNames); ++i) {
2632 errorCode=U_ZERO_ERROR;
2633 cnv=ucnv_open(converterNames[i], &errorCode);
2634 if(U_FAILURE(errorCode)) {
2635 log_data_err("unable to open %s converter - %s\n", converterNames[i], u_errorName(errorCode));
2636 continue;
2637 }
2638 if(!getTestChar(cnv, converterNames[i], charUTF8, &charUTF8Length, char0, &char0Length, char1, &char1Length)) {
2639 continue;
2640 }
2641 testFromTruncatedUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length);
2642 testFromBadUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length);
2643 ucnv_close(cnv);
2644 }
2645 ucnv_close(utf8Cnv);
2646 }
2647
2648 static void TestConvertExFromUTF8_C5F0() {
2649 static const char *const converterNames[]={
2650 #if !UCONFIG_NO_LEGACY_CONVERSION
2651 "windows-1251",
2652 "shift-jis",
2653 #endif
2654 "us-ascii",
2655 "iso-8859-1",
2656 "utf-8"
2657 };
2658
2659 UConverter *utf8Cnv, *cnv;
2660 UErrorCode errorCode;
2661 int32_t i;
2662
2663 static const char bad_utf8[2]={ (char)0xC5, (char)0xF0 };
2664 /* Expect "&#65533;&#65533;" (2x U+FFFD as decimal NCRs) */
2665 static const char twoNCRs[16]={
2666 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B,
2667 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B
2668 };
2669 static const char twoFFFD[6]={
2670 (char)0xef, (char)0xbf, (char)0xbd,
2671 (char)0xef, (char)0xbf, (char)0xbd
2672 };
2673 const char *expected;
2674 int32_t expectedLength;
2675 char dest[20]; /* longer than longest expectedLength */
2676
2677 const char *src;
2678 char *target;
2679
2680 UChar pivotBuffer[128];
2681 UChar *pivotSource, *pivotTarget;
2682
2683 errorCode=U_ZERO_ERROR;
2684 utf8Cnv=ucnv_open("UTF-8", &errorCode);
2685 if(U_FAILURE(errorCode)) {
2686 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode));
2687 return;
2688 }
2689
2690 for(i=0; i<LENGTHOF(converterNames); ++i) {
2691 errorCode=U_ZERO_ERROR;
2692 cnv=ucnv_open(converterNames[i], &errorCode);
2693 ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
2694 NULL, NULL, &errorCode);
2695 if(U_FAILURE(errorCode)) {
2696 log_data_err("unable to open %s converter - %s\n",
2697 converterNames[i], u_errorName(errorCode));
2698 continue;
2699 }
2700 src=bad_utf8;
2701 target=dest;
2702 uprv_memset(dest, 9, sizeof(dest));
2703 if(i==LENGTHOF(converterNames)-1) {
2704 /* conversion to UTF-8 yields two U+FFFD directly */
2705 expected=twoFFFD;
2706 expectedLength=6;
2707 } else {
2708 /* conversion to a non-Unicode charset yields two NCRs */
2709 expected=twoNCRs;
2710 expectedLength=16;
2711 }
2712 pivotBuffer[0]=0;
2713 pivotBuffer[1]=1;
2714 pivotBuffer[2]=2;
2715 pivotSource=pivotTarget=pivotBuffer;
2716 ucnv_convertEx(
2717 cnv, utf8Cnv,
2718 &target, dest+expectedLength,
2719 &src, bad_utf8+sizeof(bad_utf8),
2720 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+LENGTHOF(pivotBuffer),
2721 TRUE, TRUE, &errorCode);
2722 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || src!=bad_utf8+2 ||
2723 target!=dest+expectedLength || 0!=uprv_memcmp(dest, expected, expectedLength) ||
2724 dest[expectedLength]!=9
2725 ) {
2726 log_err("ucnv_convertEx(UTF-8 C5 F0 -> %s/decimal NCRs) failed\n", converterNames[i]);
2727 }
2728 ucnv_close(cnv);
2729 }
2730 ucnv_close(utf8Cnv);
2731 }
2732
2733 static void
2734 TestConvertAlgorithmic() {
2735 #if !UCONFIG_NO_LEGACY_CONVERSION
2736 static const uint8_t
2737 utf8[]={
2738 /* 4e00 30a1 ff61 0410 */
2739 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2740 },
2741 shiftJIS[]={
2742 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2743 },
2744 /*errorTarget[]={*/
2745 /*
2746 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2747 * SUB, SUB, 0x40, SUB, SUB, 0x40
2748 */
2749 /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/
2750 /*},*/
2751 utf16[]={
2752 0xfe, 0xff /* BOM only, no text */
2753 },
2754 utf32[]={
2755 0xff, 0xfe, 0, 0 /* BOM only, no text */
2756 };
2757
2758 char target[100], utf8NUL[100], shiftJISNUL[100];
2759
2760 UConverter *cnv;
2761 UErrorCode errorCode;
2762
2763 int32_t length;
2764
2765 errorCode=U_ZERO_ERROR;
2766 cnv=ucnv_open("Shift-JIS", &errorCode);
2767 if(U_FAILURE(errorCode)) {
2768 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode));
2769 ucnv_close(cnv);
2770 return;
2771 }
2772
2773 memcpy(utf8NUL, utf8, sizeof(utf8));
2774 utf8NUL[sizeof(utf8)]=0;
2775 memcpy(shiftJISNUL, shiftJIS, sizeof(shiftJIS));
2776 shiftJISNUL[sizeof(shiftJIS)]=0;
2777
2778 /*
2779 * The to/from algorithmic convenience functions share a common implementation,
2780 * so we need not test all permutations of them.
2781 */
2782
2783 /* length in, not terminated out */
2784 errorCode=U_ZERO_ERROR;
2785 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF8, target, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), &errorCode);
2786 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
2787 length!=sizeof(shiftJIS) ||
2788 memcmp(target, shiftJIS, length)!=0
2789 ) {
2790 log_err("ucnv_fromAlgorithmic(UTF-8 -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect %d\n",
2791 u_errorName(errorCode), length, sizeof(shiftJIS));
2792 }
2793
2794 /* terminated in and out */
2795 memset(target, 0x55, sizeof(target));
2796 errorCode=U_STRING_NOT_TERMINATED_WARNING;
2797 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, -1, &errorCode);
2798 if( errorCode!=U_ZERO_ERROR ||
2799 length!=sizeof(utf8) ||
2800 memcmp(target, utf8, length)!=0
2801 ) {
2802 log_err("ucnv_toAlgorithmic(Shift-JIS -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect %d\n",
2803 u_errorName(errorCode), length, sizeof(shiftJIS));
2804 }
2805
2806 /* empty string, some target buffer */
2807 errorCode=U_STRING_NOT_TERMINATED_WARNING;
2808 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, 0, &errorCode);
2809 if( errorCode!=U_ZERO_ERROR ||
2810 length!=0
2811 ) {
2812 log_err("ucnv_toAlgorithmic(empty string -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect 0\n",
2813 u_errorName(errorCode), length);
2814 }
2815
2816 /* pseudo-empty string, no target buffer */
2817 errorCode=U_ZERO_ERROR;
2818 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode);
2819 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
2820 length!=0
2821 ) {
2822 log_err("ucnv_fromAlgorithmic(UTF-16 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
2823 u_errorName(errorCode), length);
2824 }
2825
2826 errorCode=U_ZERO_ERROR;
2827 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF32, target, 0, (const char *)utf32, 4, &errorCode);
2828 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
2829 length!=0
2830 ) {
2831 log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
2832 u_errorName(errorCode), length);
2833 }
2834
2835 /* bad arguments */
2836 errorCode=U_MESSAGE_PARSE_ERROR;
2837 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode);
2838 if(errorCode!=U_MESSAGE_PARSE_ERROR) {
2839 log_err("ucnv_fromAlgorithmic(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode));
2840 }
2841
2842 /* source==NULL */
2843 errorCode=U_ZERO_ERROR;
2844 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, NULL, 2, &errorCode);
2845 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2846 log_err("ucnv_fromAlgorithmic(source==NULL) sets %s\n", u_errorName(errorCode));
2847 }
2848
2849 /* illegal alg. type */
2850 errorCode=U_ZERO_ERROR;
2851 length=ucnv_fromAlgorithmic(cnv, (UConverterType)99, target, 0, (const char *)utf16, 2, &errorCode);
2852 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2853 log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode));
2854 }
2855 ucnv_close(cnv);
2856 #endif
2857 }
2858
2859 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
2860 static void TestLMBCSMaxChar(void) {
2861 static const struct {
2862 int8_t maxSize;
2863 const char *name;
2864 } converter[] = {
2865 /* some non-LMBCS converters - perfect test setup here */
2866 { 1, "US-ASCII"},
2867 { 1, "ISO-8859-1"},
2868
2869 { 2, "UTF-16"},
2870 { 2, "UTF-16BE"},
2871 { 3, "UTF-8"},
2872 { 3, "CESU-8"},
2873 { 3, "SCSU"},
2874 { 4, "UTF-32"},
2875 { 4, "UTF-7"},
2876 { 4, "IMAP-mailbox-name"},
2877 { 4, "BOCU-1"},
2878
2879 { 1, "windows-1256"},
2880 { 2, "Shift-JIS"},
2881 { 2, "ibm-16684"},
2882 { 3, "ibm-930"},
2883 { 3, "ibm-1390"},
2884 { 4, "*test3"},
2885 { 16,"*test4"},
2886
2887 { 4, "ISCII"},
2888 { 4, "HZ"},
2889
2890 { 3, "ISO-2022"},
2891 { 3, "ISO-2022-KR"},
2892 { 6, "ISO-2022-JP"},
2893 { 8, "ISO-2022-CN"},
2894
2895 /* LMBCS */
2896 { 3, "LMBCS-1"},
2897 { 3, "LMBCS-2"},
2898 { 3, "LMBCS-3"},
2899 { 3, "LMBCS-4"},
2900 { 3, "LMBCS-5"},
2901 { 3, "LMBCS-6"},
2902 { 3, "LMBCS-8"},
2903 { 3, "LMBCS-11"},
2904 { 3, "LMBCS-16"},
2905 { 3, "LMBCS-17"},
2906 { 3, "LMBCS-18"},
2907 { 3, "LMBCS-19"}
2908 };
2909 int32_t idx;
2910
2911 for (idx = 0; idx < LENGTHOF(converter); idx++) {
2912 UErrorCode status = U_ZERO_ERROR;
2913 UConverter *cnv = cnv_open(converter[idx].name, &status);
2914 if (U_FAILURE(status)) {
2915 continue;
2916 }
2917 if (converter[idx].maxSize != ucnv_getMaxCharSize(cnv)) {
2918 log_err("error: ucnv_getMaxCharSize(%s) expected %d, got %d\n",
2919 converter[idx].name, converter[idx].maxSize, ucnv_getMaxCharSize(cnv));
2920 }
2921 ucnv_close(cnv);
2922 }
2923
2924 /* mostly test that the macro compiles */
2925 if(UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10) {
2926 log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n");
2927 }
2928 }
2929 #endif
2930
2931 static void TestJ1968(void) {
2932 UErrorCode err = U_ZERO_ERROR;
2933 UConverter *cnv;
2934 char myConvName[] = "My really really really really really really really really really really really"
2935 " really really really really really really really really really really really"
2936 " really really really really really really really really long converter name";
2937 UChar myConvNameU[sizeof(myConvName)];
2938
2939 u_charsToUChars(myConvName, myConvNameU, sizeof(myConvName));
2940
2941 err = U_ZERO_ERROR;
2942 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH+1] = 0;
2943 cnv = ucnv_openU(myConvNameU, &err);
2944 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
2945 log_err("1U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
2946 }
2947
2948 err = U_ZERO_ERROR;
2949 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0;
2950 cnv = ucnv_openU(myConvNameU, &err);
2951 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
2952 log_err("2U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
2953 }
2954
2955 err = U_ZERO_ERROR;
2956 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0;
2957 cnv = ucnv_openU(myConvNameU, &err);
2958 if (cnv || err != U_FILE_ACCESS_ERROR) {
2959 log_err("3U) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
2960 }
2961
2962
2963
2964
2965 err = U_ZERO_ERROR;
2966 cnv = ucnv_open(myConvName, &err);
2967 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
2968 log_err("1) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
2969 }
2970
2971 err = U_ZERO_ERROR;
2972 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = ',';
2973 cnv = ucnv_open(myConvName, &err);
2974 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
2975 log_err("2) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
2976 }
2977
2978 err = U_ZERO_ERROR;
2979 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ',';
2980 cnv = ucnv_open(myConvName, &err);
2981 if (cnv || err != U_FILE_ACCESS_ERROR) {
2982 log_err("3) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
2983 }
2984
2985 err = U_ZERO_ERROR;
2986 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ',';
2987 strncpy(myConvName + UCNV_MAX_CONVERTER_NAME_LENGTH, "locale=", 7);
2988 cnv = ucnv_open(myConvName, &err);
2989 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
2990 log_err("4) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
2991 }
2992
2993 /* The comma isn't really a part of the converter name. */
2994 err = U_ZERO_ERROR;
2995 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0;
2996 cnv = ucnv_open(myConvName, &err);
2997 if (cnv || err != U_FILE_ACCESS_ERROR) {
2998 log_err("5) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
2999 }
3000
3001 err = U_ZERO_ERROR;
3002 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ' ';
3003 cnv = ucnv_open(myConvName, &err);
3004 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
3005 log_err("6) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
3006 }
3007
3008 err = U_ZERO_ERROR;
3009 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0;
3010 cnv = ucnv_open(myConvName, &err);
3011 if (cnv || err != U_FILE_ACCESS_ERROR) {
3012 log_err("7) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
3013 }
3014
3015 }
3016
3017 #if !UCONFIG_NO_LEGACY_CONVERSION
3018 static void
3019 testSwap(const char *name, UBool swap) {
3020 /*
3021 * Test Unicode text.
3022 * Contains characters that are the highest for some of the
3023 * tested conversions, to make sure that the ucnvmbcs.c code that modifies the
3024 * tables copies the entire tables.
3025 */
3026 static const UChar text[]={
3027 0x61, 0xd, 0x62, 0xa, 0x4e00, 0x3000, 0xfffd, 0xa, 0x20, 0x85, 0xff5e, 0x7a
3028 };
3029
3030 UChar uNormal[32], uSwapped[32];
3031 char normal[32], swapped[32];
3032 const UChar *pcu;
3033 UChar *pu;
3034 char *pc;
3035 int32_t i, normalLength, swappedLength;
3036 UChar u;
3037 char c;
3038
3039 const char *swappedName;
3040 UConverter *cnv, *swapCnv;
3041 UErrorCode errorCode;
3042
3043 /* if the swap flag is FALSE, then the test encoding is not EBCDIC and must not swap */
3044
3045 /* open both the normal and the LF/NL-swapping converters */
3046 strcpy(swapped, name);
3047 strcat(swapped, UCNV_SWAP_LFNL_OPTION_STRING);
3048
3049 errorCode=U_ZERO_ERROR;
3050 swapCnv=ucnv_open(swapped, &errorCode);
3051 cnv=ucnv_open(name, &errorCode);
3052 if(U_FAILURE(errorCode)) {
3053 log_data_err("TestEBCDICSwapLFNL error: unable to open %s or %s (%s)\n", name, swapped, u_errorName(errorCode));
3054 goto cleanup;
3055 }
3056
3057 /* the name must contain the swap option if and only if we expect the converter to swap */
3058 swappedName=ucnv_getName(swapCnv, &errorCode);
3059 if(U_FAILURE(errorCode)) {
3060 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl) failed (%s)\n", name, u_errorName(errorCode));
3061 goto cleanup;
3062 }
3063
3064 pc=strstr(swappedName, UCNV_SWAP_LFNL_OPTION_STRING);
3065 if(swap != (pc!=NULL)) {
3066 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl)=%s should (%d) contain 'swaplfnl'\n", name, swappedName, swap);
3067 goto cleanup;
3068 }
3069
3070 /* convert to EBCDIC */
3071 pcu=text;
3072 pc=normal;
3073 ucnv_fromUnicode(cnv, &pc, normal+LENGTHOF(normal), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode);
3074 normalLength=(int32_t)(pc-normal);
3075
3076 pcu=text;
3077 pc=swapped;
3078 ucnv_fromUnicode(swapCnv, &pc, swapped+LENGTHOF(swapped), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode);
3079 swappedLength=(int32_t)(pc-swapped);
3080
3081 if(U_FAILURE(errorCode)) {
3082 log_err("TestEBCDICSwapLFNL error converting to %s - (%s)\n", name, u_errorName(errorCode));
3083 goto cleanup;
3084 }
3085
3086 /* compare EBCDIC output */
3087 if(normalLength!=swappedLength) {
3088 log_err("TestEBCDICSwapLFNL error converting to %s - output lengths %d vs. %d\n", name, normalLength, swappedLength);
3089 goto cleanup;
3090 }
3091 for(i=0; i<normalLength; ++i) {
3092 /* swap EBCDIC LF/NL for comparison */
3093 c=normal[i];
3094 if(swap) {
3095 if(c==0x15) {
3096 c=0x25;
3097 } else if(c==0x25) {
3098 c=0x15;
3099 }
3100 }
3101
3102 if(c!=swapped[i]) {
3103 log_err("TestEBCDICSwapLFNL error converting to %s - did not swap properly, output[%d]=0x%02x\n", name, i, (uint8_t)swapped[i]);
3104 goto cleanup;
3105 }
3106 }
3107
3108 /* convert back to Unicode (may not roundtrip) */
3109 pc=normal;
3110 pu=uNormal;
3111 ucnv_toUnicode(cnv, &pu, uNormal+LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, TRUE, &errorCode);
3112 normalLength=(int32_t)(pu-uNormal);
3113
3114 pc=normal;
3115 pu=uSwapped;
3116 ucnv_toUnicode(swapCnv, &pu, uSwapped+LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, TRUE, &errorCode);
3117 swappedLength=(int32_t)(pu-uSwapped);
3118
3119 if(U_FAILURE(errorCode)) {
3120 log_err("TestEBCDICSwapLFNL error converting from %s - (%s)\n", name, u_errorName(errorCode));
3121 goto cleanup;
3122 }
3123
3124 /* compare EBCDIC output */
3125 if(normalLength!=swappedLength) {
3126 log_err("TestEBCDICSwapLFNL error converting from %s - output lengths %d vs. %d\n", name, normalLength, swappedLength);
3127 goto cleanup;
3128 }
3129 for(i=0; i<normalLength; ++i) {
3130 /* swap EBCDIC LF/NL for comparison */
3131 u=uNormal[i];
3132 if(swap) {
3133 if(u==0xa) {
3134 u=0x85;
3135 } else if(u==0x85) {
3136 u=0xa;
3137 }
3138 }
3139
3140 if(u!=uSwapped[i]) {
3141 log_err("TestEBCDICSwapLFNL error converting from %s - did not swap properly, output[%d]=U+%04x\n", name, i, uSwapped[i]);
3142 goto cleanup;
3143 }
3144 }
3145
3146 /* clean up */
3147 cleanup:
3148 ucnv_close(cnv);
3149 ucnv_close(swapCnv);
3150 }
3151
3152 static void
3153 TestEBCDICSwapLFNL() {
3154 static const struct {
3155 const char *name;
3156 UBool swap;
3157 } tests[]={
3158 { "ibm-37", TRUE },
3159 { "ibm-1047", TRUE },
3160 { "ibm-1140", TRUE },
3161 { "ibm-930", TRUE },
3162 { "iso-8859-3", FALSE }
3163 };
3164
3165 int i;
3166
3167 for(i=0; i<LENGTHOF(tests); ++i) {
3168 testSwap(tests[i].name, tests[i].swap);
3169 }
3170 }
3171 #else
3172 static void
3173 TestEBCDICSwapLFNL() {
3174 /* test nothing... */
3175 }
3176 #endif
3177
3178 static const UVersionInfo ICU_34 = {3,4,0,0};
3179
3180 static void TestFromUCountPending(){
3181 #if !UCONFIG_NO_LEGACY_CONVERSION
3182 UErrorCode status = U_ZERO_ERROR;
3183 /* const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; */
3184 static const struct {
3185 UChar input[6];
3186 int32_t len;
3187 int32_t exp;
3188 }fromUnicodeTests[] = {
3189 /*m:n conversion*/
3190 {{0xdbc4},1,1},
3191 {{ 0xdbc4, 0xde34, 0xd84d},3,1},
3192 {{ 0xdbc4, 0xde34, 0xd900},3,3},
3193 };
3194 int i;
3195 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status);
3196 if(U_FAILURE(status)){
3197 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
3198 return;
3199 }
3200 for(i=0; i<LENGTHOF(fromUnicodeTests); ++i) {
3201 char tgt[10];
3202 char* target = tgt;
3203 char* targetLimit = target + 10;
3204 const UChar* source = fromUnicodeTests[i].input;
3205 const UChar* sourceLimit = source + fromUnicodeTests[i].len;
3206 int32_t len = 0;
3207 ucnv_reset(cnv);
3208 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3209 len = ucnv_fromUCountPending(cnv, &status);
3210 if(U_FAILURE(status)){
3211 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3212 status = U_ZERO_ERROR;
3213 continue;
3214 }
3215 if(len != fromUnicodeTests[i].exp){
3216 log_err("Did not get the expeced output for ucnv_fromUInputConsumed.\n");
3217 }
3218 }
3219 status = U_ZERO_ERROR;
3220 {
3221 /*
3222 * The converter has to read the tail before it knows that
3223 * only head alone matches.
3224 * At the end, the output for head will overflow the target,
3225 * middle will be pending, and tail will not have been consumed.
3226 */
3227 /*
3228 \U00101234 -> x (<U101234> \x07 |0)
3229 \U00101234\U00050005 -> y (<U101234>+<U50005> \x07+\x00+\x01\x02\x0e+\x05 |0)
3230 \U00101234\U00050005\U00060006 -> z (<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0)
3231 \U00060007 -> unassigned
3232 */
3233 static const UChar head[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */
3234 static const UChar middle[] = {0xD940,0x0000}; /* first half of \U00060006 or \U00060007 */
3235 static const UChar tail[] = {0xDC07,0x0000};/* second half of \U00060007 */
3236 char tgt[10];
3237 char* target = tgt;
3238 char* targetLimit = target + 2; /* expect overflow from converting \U00101234\U00050005 */
3239 const UChar* source = head;
3240 const UChar* sourceLimit = source + u_strlen(head);
3241 int32_t len = 0;
3242 ucnv_reset(cnv);
3243 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3244 len = ucnv_fromUCountPending(cnv, &status);
3245 if(U_FAILURE(status)){
3246 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3247 status = U_ZERO_ERROR;
3248 }
3249 if(len!=4){
3250 log_err("ucnv_fromUInputHeld did not return correct length for head\n");
3251 }
3252 source = middle;
3253 sourceLimit = source + u_strlen(middle);
3254 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3255 len = ucnv_fromUCountPending(cnv, &status);
3256 if(U_FAILURE(status)){
3257 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3258 status = U_ZERO_ERROR;
3259 }
3260 if(len!=5){
3261 log_err("ucnv_fromUInputHeld did not return correct length for middle\n");
3262 }
3263 source = tail;
3264 sourceLimit = source + u_strlen(tail);
3265 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3266 if(status != U_BUFFER_OVERFLOW_ERROR){
3267 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3268 }
3269 status = U_ZERO_ERROR;
3270 len = ucnv_fromUCountPending(cnv, &status);
3271 /* middle[1] is pending, tail has not been consumed */
3272 if(U_FAILURE(status)){
3273 log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status));
3274 }
3275 if(len!=1){
3276 log_err("ucnv_fromUInputHeld did not return correct length for tail\n");
3277 }
3278 }
3279 ucnv_close(cnv);
3280 #endif
3281 }
3282
3283 static void
3284 TestToUCountPending(){
3285 #if !UCONFIG_NO_LEGACY_CONVERSION
3286 UErrorCode status = U_ZERO_ERROR;
3287 static const struct {
3288 char input[6];
3289 int32_t len;
3290 int32_t exp;
3291 }toUnicodeTests[] = {
3292 /*m:n conversion*/
3293 {{0x05, 0x01, 0x02},3,3},
3294 {{0x01, 0x02},2,2},
3295 {{0x07, 0x00, 0x01, 0x02},4,4},
3296 };
3297
3298 int i;
3299 UConverterToUCallback *oldToUAction= NULL;
3300 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status);
3301 if(U_FAILURE(status)){
3302 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
3303 return;
3304 }
3305 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status);
3306 for(i=0; i<LENGTHOF(toUnicodeTests); ++i) {
3307 UChar tgt[20];
3308 UChar* target = tgt;
3309 UChar* targetLimit = target + 20;
3310 const char* source = toUnicodeTests[i].input;
3311 const char* sourceLimit = source + toUnicodeTests[i].len;
3312 int32_t len = 0;
3313 ucnv_reset(cnv);
3314 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3315 len = ucnv_toUCountPending(cnv,&status);
3316 if(U_FAILURE(status)){
3317 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3318 status = U_ZERO_ERROR;
3319 continue;
3320 }
3321 if(len != toUnicodeTests[i].exp){
3322 log_err("Did not get the expeced output for ucnv_toUInputConsumed.\n");
3323 }
3324 }
3325 status = U_ZERO_ERROR;
3326 ucnv_close(cnv);
3327
3328 {
3329 /*
3330 * The converter has to read the tail before it knows that
3331 * only head alone matches.
3332 * At the end, the output for head will overflow the target,
3333 * mid will be pending, and tail will not have been consumed.
3334 */
3335 char head[] = { 0x01, 0x02, 0x03, 0x0a , 0x00};
3336 char mid[] = { 0x01, 0x02, 0x03, 0x0b, 0x00 };
3337 char tail[] = { 0x01, 0x02, 0x03, 0x0d, 0x00 };
3338 /*
3339 0x01, 0x02, 0x03, 0x0a -> x (<U23456> \x01\x02\x03\x0a |0)
3340 0x01, 0x02, 0x03, 0x0b -> y (<U000b> \x01\x02\x03\x0b |0)
3341 0x01, 0x02, 0x03, 0x0d -> z (<U34567> \x01\x02\x03\x0d |3)
3342 0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar")
3343 */
3344 UChar tgt[10];
3345 UChar* target = tgt;
3346 UChar* targetLimit = target + 1; /* expect overflow from converting */
3347 const char* source = head;
3348 const char* sourceLimit = source + strlen(head);
3349 int32_t len = 0;
3350 cnv = ucnv_openPackage(loadTestData(&status), "test4", &status);
3351 if(U_FAILURE(status)){
3352 log_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
3353 return;
3354 }
3355 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status);
3356 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3357 len = ucnv_toUCountPending(cnv,&status);
3358 if(U_FAILURE(status)){
3359 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3360 }
3361 if(len != 4){
3362 log_err("Did not get the expected len for head.\n");
3363 }
3364 source=mid;
3365 sourceLimit = source+strlen(mid);
3366 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3367 len = ucnv_toUCountPending(cnv,&status);
3368 if(U_FAILURE(status)){
3369 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3370 }
3371 if(len != 8){
3372 log_err("Did not get the expected len for mid.\n");
3373 }
3374
3375 source=tail;
3376 sourceLimit = source+strlen(tail);
3377 targetLimit = target;
3378 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3379 if(status != U_BUFFER_OVERFLOW_ERROR){
3380 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3381 }
3382 status = U_ZERO_ERROR;
3383 len = ucnv_toUCountPending(cnv,&status);
3384 /* mid[4] is pending, tail has not been consumed */
3385 if(U_FAILURE(status)){
3386 log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status));
3387 }
3388 if(len != 4){
3389 log_err("Did not get the expected len for tail.\n");
3390 }
3391 ucnv_close(cnv);
3392 }
3393 #endif
3394 }
3395
3396 static void TestOneDefaultNameChange(const char *name, const char *expected) {
3397 UErrorCode status = U_ZERO_ERROR;
3398 UConverter *cnv;
3399 ucnv_setDefaultName(name);
3400 if(strcmp(ucnv_getDefaultName(), expected)==0)
3401 log_verbose("setDefaultName of %s works.\n", name);
3402 else
3403 log_err("setDefaultName of %s failed\n", name);
3404 cnv=ucnv_open(NULL, &status);
3405 if (U_FAILURE(status) || cnv == NULL) {
3406 log_err("opening the default converter of %s failed\n", name);
3407 return;
3408 }
3409 if(strcmp(ucnv_getName(cnv, &status), expected)==0)
3410 log_verbose("ucnv_getName of %s works.\n", name);
3411 else
3412 log_err("ucnv_getName of %s failed\n", name);
3413 ucnv_close(cnv);
3414 }
3415
3416 static void TestDefaultName(void) {
3417 /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/
3418 static char defaultName[UCNV_MAX_CONVERTER_NAME_LENGTH + 1];
3419 strcpy(defaultName, ucnv_getDefaultName());
3420
3421 log_verbose("getDefaultName returned %s\n", defaultName);
3422
3423 /*change the default name by setting it */
3424 TestOneDefaultNameChange("UTF-8", "UTF-8");
3425 #if U_CHARSET_IS_UTF8
3426 TestOneDefaultNameChange("ISCII,version=1", "UTF-8");
3427 TestOneDefaultNameChange("ISCII,version=2", "UTF-8");
3428 TestOneDefaultNameChange("ISO-8859-1", "UTF-8");
3429 #else
3430 # if !UCONFIG_NO_LEGACY_CONVERSION
3431 TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1");
3432 TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2");
3433 # endif
3434 TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1");
3435 #endif
3436
3437 /*set the default name back*/
3438 ucnv_setDefaultName(defaultName);
3439 }
3440
3441 /* Test that ucnv_compareNames() matches names according to spec. ----------- */
3442
3443 static int
3444 sign(int n) {
3445 if(n==0) {
3446 return 0;
3447 } else if(n<0) {
3448 return -1;
3449 } else /* n>0 */ {
3450 return 1;
3451 }
3452 }
3453
3454 static void
3455 compareNames(const char **names) {
3456 const char *relation, *name1, *name2;
3457 int rel, result;
3458
3459 relation=*names++;
3460 if(*relation=='=') {
3461 rel = 0;
3462 } else if(*relation=='<') {
3463 rel = -1;
3464 } else {
3465 rel = 1;
3466 }
3467
3468 name1=*names++;
3469 if(name1==NULL) {
3470 return;
3471 }
3472 while((name2=*names++)!=NULL) {
3473 result=ucnv_compareNames(name1, name2);
3474 if(sign(result)!=rel) {
3475 log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1, name2, result, rel);
3476 }
3477 name1=name2;
3478 }
3479 }
3480
3481 static void
3482 TestCompareNames() {
3483 static const char *equalUTF8[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL };
3484 static const char *equalIBM[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL };
3485 static const char *lessMac[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL };
3486 static const char *lessUTF080[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL };
3487
3488 compareNames(equalUTF8);
3489 compareNames(equalIBM);
3490 compareNames(lessMac);
3491 compareNames(lessUTF080);
3492 }
3493
3494 static void
3495 TestSubstString() {
3496 static const UChar surrogate[1]={ 0xd900 };
3497 char buffer[16];
3498
3499 static const UChar sub[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3500 static const char subChars[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3501 UConverter *cnv;
3502 UErrorCode errorCode;
3503 int32_t length;
3504 int8_t len8;
3505
3506 /* UTF-16/32: test that the BOM is output before the sub character */
3507 errorCode=U_ZERO_ERROR;
3508 cnv=ucnv_open("UTF-16", &errorCode);
3509 if(U_FAILURE(errorCode)) {
3510 log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode));
3511 return;
3512 }
3513 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode);
3514 ucnv_close(cnv);
3515 if(U_FAILURE(errorCode) ||
3516 length!=4 ||
3517 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode)
3518 ) {
3519 log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n");
3520 }
3521
3522 errorCode=U_ZERO_ERROR;
3523 cnv=ucnv_open("UTF-32", &errorCode);
3524 if(U_FAILURE(errorCode)) {
3525 log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode));
3526 return;
3527 }
3528 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode);
3529 ucnv_close(cnv);
3530 if(U_FAILURE(errorCode) ||
3531 length!=8 ||
3532 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode)
3533 ) {
3534 log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n");
3535 }
3536
3537 /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */
3538 errorCode=U_ZERO_ERROR;
3539 cnv=ucnv_open("ISO-8859-1", &errorCode);
3540 if(U_FAILURE(errorCode)) {
3541 log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode));
3542 return;
3543 }
3544 ucnv_setSubstString(cnv, sub, LENGTHOF(sub), &errorCode);
3545 if(U_FAILURE(errorCode)) {
3546 log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode));
3547 } else {
3548 len8 = sizeof(buffer);
3549 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode);
3550 /* Stateless converter, we expect the string converted to charset bytes. */
3551 if(U_FAILURE(errorCode) || len8!=sizeof(subChars) || 0!=uprv_memcmp(buffer, subChars, len8)) {
3552 log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode));
3553 }
3554 }
3555 ucnv_close(cnv);
3556
3557 #if !UCONFIG_NO_LEGACY_CONVERSION
3558 errorCode=U_ZERO_ERROR;
3559 cnv=ucnv_open("HZ", &errorCode);
3560 if(U_FAILURE(errorCode)) {
3561 log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode));
3562 return;
3563 }
3564 ucnv_setSubstString(cnv, sub, LENGTHOF(sub), &errorCode);
3565 if(U_FAILURE(errorCode)) {
3566 log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode));
3567 } else {
3568 len8 = sizeof(buffer);
3569 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode);
3570 /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */
3571 if(U_FAILURE(errorCode) || len8!=0) {
3572 log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode));
3573 }
3574 }
3575 ucnv_close(cnv);
3576 #endif
3577 /*
3578 * Further testing of ucnv_setSubstString() is done via intltest convert.
3579 * We do not test edge cases of illegal arguments and similar because the
3580 * function implementation uses all of its parameters in calls to other
3581 * functions with UErrorCode parameters.
3582 */
3583 }
3584
3585 static void
3586 InvalidArguments() {
3587 UConverter *cnv;
3588 UErrorCode errorCode;
3589 char charBuffer[2] = {1, 1};
3590 char ucharAsCharBuffer[2] = {2, 2};
3591 char *charsPtr = charBuffer;
3592 UChar *ucharsPtr = (UChar *)ucharAsCharBuffer;
3593 UChar *ucharsBadPtr = (UChar *)(ucharAsCharBuffer + 1);
3594
3595 errorCode=U_ZERO_ERROR;
3596 cnv=ucnv_open("UTF-8", &errorCode);
3597 if(U_FAILURE(errorCode)) {
3598 log_err("ucnv_open() failed - %s\n", u_errorName(errorCode));
3599 return;
3600 }
3601
3602 errorCode=U_ZERO_ERROR;
3603 /* This one should fail because an incomplete UChar is being passed in */
3604 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsPtr, ucharsBadPtr, NULL, TRUE, &errorCode);
3605 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3606 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode));
3607 }
3608
3609 errorCode=U_ZERO_ERROR;
3610 /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3611 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsBadPtr, ucharsPtr, NULL, TRUE, &errorCode);
3612 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3613 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode));
3614 }
3615
3616 errorCode=U_ZERO_ERROR;
3617 /* This one should fail because an incomplete UChar is being passed in */
3618 ucnv_toUnicode(cnv, &ucharsPtr, ucharsBadPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode);
3619 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3620 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode));
3621 }
3622
3623 errorCode=U_ZERO_ERROR;
3624 /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3625 ucnv_toUnicode(cnv, &ucharsBadPtr, ucharsPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode);
3626 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3627 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode));
3628 }
3629
3630 if (charBuffer[0] != 1 || charBuffer[1] != 1
3631 || ucharAsCharBuffer[0] != 2 || ucharAsCharBuffer[1] != 2)
3632 {
3633 log_err("Data was incorrectly written to buffers\n");
3634 }
3635
3636 ucnv_close(cnv);
3637 }
3638
3639 static void TestGetName() {
3640 static const char *const names[] = {
3641 "Unicode", "UTF-16",
3642 "UnicodeBigUnmarked", "UTF-16BE",
3643 "UnicodeBig", "UTF-16BE,version=1",
3644 "UnicodeLittleUnmarked", "UTF-16LE",
3645 "UnicodeLittle", "UTF-16LE,version=1",
3646 "x-UTF-16LE-BOM", "UTF-16LE,version=1"
3647 };
3648 int32_t i;
3649 for(i = 0; i < LENGTHOF(names); i += 2) {
3650 UErrorCode errorCode = U_ZERO_ERROR;
3651 UConverter *cnv = ucnv_open(names[i], &errorCode);
3652 if(U_SUCCESS(errorCode)) {
3653 const char *name = ucnv_getName(cnv, &errorCode);
3654 if(U_FAILURE(errorCode) || 0 != strcmp(name, names[i+1])) {
3655 log_err("ucnv_getName(%s) = %s != %s -- %s\n",
3656 names[i], name, names[i+1], u_errorName(errorCode));
3657 }
3658 ucnv_close(cnv);
3659 }
3660 }
3661 }
3662
3663 static void TestUTFBOM() {
3664 static const UChar a16[] = { 0x61 };
3665 static const char *const names[] = {
3666 "UTF-16",
3667 "UTF-16,version=1",
3668 "UTF-16BE",
3669 "UnicodeBig",
3670 "UTF-16LE",
3671 "UnicodeLittle"
3672 };
3673 static const uint8_t expected[][5] = {
3674 #if U_IS_BIG_ENDIAN
3675 { 4, 0xfe, 0xff, 0, 0x61 },
3676 { 4, 0xfe, 0xff, 0, 0x61 },
3677 #else
3678 { 4, 0xff, 0xfe, 0x61, 0 },
3679 { 4, 0xff, 0xfe, 0x61, 0 },
3680 #endif
3681
3682 { 2, 0, 0x61 },
3683 { 4, 0xfe, 0xff, 0, 0x61 },
3684
3685 { 2, 0x61, 0 },
3686 { 4, 0xff, 0xfe, 0x61, 0 }
3687 };
3688
3689 char bytes[10];
3690 int32_t i;
3691
3692 for(i = 0; i < LENGTHOF(names); ++i) {
3693 UErrorCode errorCode = U_ZERO_ERROR;
3694 UConverter *cnv = ucnv_open(names[i], &errorCode);
3695 int32_t length = 0;
3696 const uint8_t *exp = expected[i];
3697 if (U_FAILURE(errorCode)) {
3698 log_err_status(errorCode, "Unable to open converter: %s got error code: %s\n", names[i], u_errorName(errorCode));
3699 continue;
3700 }
3701 length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &errorCode);
3702
3703 if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1, length)) {
3704 log_err("unexpected %s BOM writing behavior -- %s\n",
3705 names[i], u_errorName(errorCode));
3706 }
3707 ucnv_close(cnv);
3708 }
3709 }