]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/test/cintltst/ccapitst.c
ICU-491.11.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / ccapitst.c
... / ...
CommitLineData
1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2011, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*****************************************************************************
7*
8* File CU_CAPITST.C
9*
10* Modification History:
11* Name Description
12* Madhu Katragadda Ported for C API
13******************************************************************************
14*/
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18#include <ctype.h>
19#include "unicode/uloc.h"
20#include "unicode/ucnv.h"
21#include "unicode/ucnv_err.h"
22#include "unicode/putil.h"
23#include "unicode/uset.h"
24#include "unicode/ustring.h"
25#include "ucnv_bld.h" /* for sizeof(UConverter) */
26#include "cmemory.h" /* for UAlignedMemory */
27#include "cintltst.h"
28#include "ccapitst.h"
29#include "cstring.h"
30
31#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
32
33#define NUM_CODEPAGE 1
34#define MAX_FILE_LEN 1024*20
35#define UCS_FILE_NAME_SIZE 512
36
37/*returns an action other than the one provided*/
38static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA);
39static UConverterToUCallback otherCharAction(UConverterToUCallback MIA);
40
41static UConverter *
42cnv_open(const char *name, UErrorCode *pErrorCode) {
43 if(name!=NULL && name[0]=='*') {
44 return ucnv_openPackage(loadTestData(pErrorCode), name+1, pErrorCode);
45 } else {
46 return ucnv_open(name, pErrorCode);
47 }
48}
49
50
51static void ListNames(void);
52static void TestFlushCache(void);
53static void TestDuplicateAlias(void);
54static void TestCCSID(void);
55static void TestJ932(void);
56static void TestJ1968(void);
57static void TestLMBCSMaxChar(void);
58
59#if !UCONFIG_NO_LEGACY_CONVERSION
60static void TestConvertSafeCloneCallback(void);
61#endif
62
63static void TestEBCDICSwapLFNL(void);
64static void TestConvertEx(void);
65static void TestConvertExFromUTF8(void);
66static void TestConvertExFromUTF8_C5F0(void);
67static void TestConvertAlgorithmic(void);
68 void TestDefaultConverterError(void); /* defined in cctest.c */
69 void TestDefaultConverterSet(void); /* defined in cctest.c */
70static void TestToUCountPending(void);
71static void TestFromUCountPending(void);
72static void TestDefaultName(void);
73static void TestCompareNames(void);
74static void TestSubstString(void);
75static void InvalidArguments(void);
76static void TestGetName(void);
77static void TestUTFBOM(void);
78
79void addTestConvert(TestNode** root);
80
81void addTestConvert(TestNode** root)
82{
83 addTest(root, &ListNames, "tsconv/ccapitst/ListNames");
84 addTest(root, &TestConvert, "tsconv/ccapitst/TestConvert");
85 addTest(root, &TestFlushCache, "tsconv/ccapitst/TestFlushCache");
86 addTest(root, &TestAlias, "tsconv/ccapitst/TestAlias");
87 addTest(root, &TestDuplicateAlias, "tsconv/ccapitst/TestDuplicateAlias");
88 addTest(root, &TestConvertSafeClone, "tsconv/ccapitst/TestConvertSafeClone");
89#if !UCONFIG_NO_LEGACY_CONVERSION
90 addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback");
91#endif
92 addTest(root, &TestCCSID, "tsconv/ccapitst/TestCCSID");
93 addTest(root, &TestJ932, "tsconv/ccapitst/TestJ932");
94 addTest(root, &TestJ1968, "tsconv/ccapitst/TestJ1968");
95#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
96 addTest(root, &TestLMBCSMaxChar, "tsconv/ccapitst/TestLMBCSMaxChar");
97#endif
98 addTest(root, &TestEBCDICSwapLFNL, "tsconv/ccapitst/TestEBCDICSwapLFNL");
99 addTest(root, &TestConvertEx, "tsconv/ccapitst/TestConvertEx");
100 addTest(root, &TestConvertExFromUTF8, "tsconv/ccapitst/TestConvertExFromUTF8");
101 addTest(root, &TestConvertExFromUTF8_C5F0, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0");
102 addTest(root, &TestConvertAlgorithmic, "tsconv/ccapitst/TestConvertAlgorithmic");
103 addTest(root, &TestDefaultConverterError, "tsconv/ccapitst/TestDefaultConverterError");
104 addTest(root, &TestDefaultConverterSet, "tsconv/ccapitst/TestDefaultConverterSet");
105#if !UCONFIG_NO_FILE_IO
106 addTest(root, &TestToUCountPending, "tsconv/ccapitst/TestToUCountPending");
107 addTest(root, &TestFromUCountPending, "tsconv/ccapitst/TestFromUCountPending");
108#endif
109 addTest(root, &TestDefaultName, "tsconv/ccapitst/TestDefaultName");
110 addTest(root, &TestCompareNames, "tsconv/ccapitst/TestCompareNames");
111 addTest(root, &TestSubstString, "tsconv/ccapitst/TestSubstString");
112 addTest(root, &InvalidArguments, "tsconv/ccapitst/InvalidArguments");
113 addTest(root, &TestGetName, "tsconv/ccapitst/TestGetName");
114 addTest(root, &TestUTFBOM, "tsconv/ccapitst/TestUTFBOM");
115}
116
117static void ListNames(void) {
118 UErrorCode err = U_ZERO_ERROR;
119 int32_t testLong1 = 0;
120 const char* available_conv;
121 UEnumeration *allNamesEnum = NULL;
122 int32_t allNamesCount = 0;
123 uint16_t count;
124
125 log_verbose("Testing ucnv_openAllNames()...");
126 allNamesEnum = ucnv_openAllNames(&err);
127 if(U_FAILURE(err)) {
128 log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err));
129 }
130 else {
131 const char *string = NULL;
132 int32_t len = 0;
133 int32_t count1 = 0;
134 int32_t count2 = 0;
135 allNamesCount = uenum_count(allNamesEnum, &err);
136 while ((string = uenum_next(allNamesEnum, &len, &err))) {
137 count1++;
138 log_verbose("read \"%s\", length %i\n", string, len);
139 }
140 if (U_FAILURE(err)) {
141 log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err));
142 err = U_ZERO_ERROR;
143 }
144 uenum_reset(allNamesEnum, &err);
145 while ((string = uenum_next(allNamesEnum, &len, &err))) {
146 count2++;
147 ucnv_close(ucnv_open(string, &err));
148 log_verbose("read \"%s\", length %i (%s)\n", string, len, U_SUCCESS(err) ? "available" : "unavailable");
149 err = U_ZERO_ERROR;
150 }
151 if (count1 != count2) {
152 log_err("FAILURE! uenum_reset(allNamesEnum, &err); doesn't work\n");
153 }
154 }
155 uenum_close(allNamesEnum);
156 err = U_ZERO_ERROR;
157
158 /*Tests ucnv_getAvailableName(), getAvialableCount()*/
159
160 log_verbose("Testing ucnv_countAvailable()...");
161
162 testLong1=ucnv_countAvailable();
163 log_info("Number of available codepages: %d/%d\n", testLong1, allNamesCount);
164
165 log_verbose("\n---Testing ucnv_getAvailableName.."); /*need to check this out */
166
167 available_conv = ucnv_getAvailableName(testLong1);
168 /*test ucnv_getAvailableName with err condition*/
169 log_verbose("\n---Testing ucnv_getAvailableName..with index < 0 ");
170 available_conv = ucnv_getAvailableName(-1);
171 if(available_conv != NULL){
172 log_err("ucnv_getAvailableName() with index < 0) should return NULL\n");
173 }
174
175 /* Test ucnv_countAliases() etc. */
176 count = ucnv_countAliases("utf-8", &err);
177 if(U_FAILURE(err)) {
178 log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err));
179 } else if(count <= 0) {
180 log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count);
181 } else {
182 /* try to get the aliases individually */
183 const char *alias;
184 alias = ucnv_getAlias("utf-8", 0, &err);
185 if(U_FAILURE(err)) {
186 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s\n", myErrorName(err));
187 } else if(strcmp("UTF-8", alias) != 0) {
188 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s instead of UTF-8\n", alias);
189 } else {
190 uint16_t aliasNum;
191 for(aliasNum = 0; aliasNum < count; ++aliasNum) {
192 alias = ucnv_getAlias("utf-8", aliasNum, &err);
193 if(U_FAILURE(err)) {
194 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err));
195 } else if(strlen(alias) > 20) {
196 /* sanity check */
197 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> alias %s insanely long, corrupt?!\n", aliasNum, alias);
198 } else {
199 log_verbose("alias %d for utf-8: %s\n", aliasNum, alias);
200 }
201 }
202 if(U_SUCCESS(err)) {
203 /* try to fill an array with all aliases */
204 const char **aliases;
205 aliases=(const char **)malloc(count * sizeof(const char *));
206 if(aliases != 0) {
207 ucnv_getAliases("utf-8", aliases, &err);
208 if(U_FAILURE(err)) {
209 log_err("FAILURE! ucnv_getAliases(\"utf-8\") -> %s\n", myErrorName(err));
210 } else {
211 for(aliasNum = 0; aliasNum < count; ++aliasNum) {
212 /* compare the pointers with the ones returned individually */
213 alias = ucnv_getAlias("utf-8", aliasNum, &err);
214 if(U_FAILURE(err)) {
215 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err));
216 } else if(aliases[aliasNum] != alias) {
217 log_err("FAILURE! ucnv_getAliases(\"utf-8\")[%d] != ucnv_getAlias(\"utf-8\", %d)\n", aliasNum, aliasNum);
218 }
219 }
220 }
221 free((char **)aliases);
222 }
223 }
224 }
225 }
226}
227
228
229static void TestConvert()
230{
231#if !UCONFIG_NO_LEGACY_CONVERSION
232 char myptr[4];
233 char save[4];
234 int32_t testLong1 = 0;
235 uint16_t rest = 0;
236 int32_t len = 0;
237 int32_t x = 0;
238 FILE* ucs_file_in = NULL;
239 UChar BOM = 0x0000;
240 UChar myUChar = 0x0000;
241 char* mytarget; /* [MAX_FILE_LEN] */
242 char* mytarget_1;
243 char* mytarget_use;
244 UChar* consumedUni = NULL;
245 char* consumed = NULL;
246 char* output_cp_buffer; /* [MAX_FILE_LEN] */
247 UChar* ucs_file_buffer; /* [MAX_FILE_LEN] */
248 UChar* ucs_file_buffer_use;
249 UChar* my_ucs_file_buffer; /* [MAX_FILE_LEN] */
250 UChar* my_ucs_file_buffer_1;
251 int8_t ii = 0;
252 int32_t j = 0;
253 uint16_t codepage_index = 0;
254 int32_t cp = 0;
255 UErrorCode err = U_ZERO_ERROR;
256 char ucs_file_name[UCS_FILE_NAME_SIZE];
257 UConverterFromUCallback MIA1, MIA1_2;
258 UConverterToUCallback MIA2, MIA2_2;
259 const void *MIA1Context, *MIA1Context2, *MIA2Context, *MIA2Context2;
260 UConverter* someConverters[5];
261 UConverter* myConverter = 0;
262 UChar* displayname = 0;
263
264 const char* locale;
265
266 UChar* uchar1 = 0;
267 UChar* uchar2 = 0;
268 UChar* uchar3 = 0;
269 int32_t targetcapacity2;
270 int32_t targetcapacity;
271 int32_t targetsize;
272 int32_t disnamelen;
273
274 const UChar* tmp_ucs_buf;
275 const UChar* tmp_consumedUni=NULL;
276 const char* tmp_mytarget_use;
277 const char* tmp_consumed;
278
279 /******************************************************************
280 Checking Unicode -> ksc
281 ******************************************************************/
282
283 const char* CodePagesToTest[NUM_CODEPAGE] =
284 {
285 "ibm-949_P110-1999"
286
287
288 };
289 const uint16_t CodePageNumberToTest[NUM_CODEPAGE] =
290 {
291 949
292 };
293
294
295 const int8_t CodePagesMinChars[NUM_CODEPAGE] =
296 {
297 1
298
299 };
300
301 const int8_t CodePagesMaxChars[NUM_CODEPAGE] =
302 {
303 2
304
305 };
306
307 const uint16_t CodePagesSubstitutionChars[NUM_CODEPAGE] =
308 {
309 0xAFFE
310 };
311
312 const char* CodePagesTestFiles[NUM_CODEPAGE] =
313 {
314 "uni-text.bin"
315 };
316
317
318 const UConverterPlatform CodePagesPlatform[NUM_CODEPAGE] =
319 {
320 UCNV_IBM
321
322 };
323
324 const char* CodePagesLocale[NUM_CODEPAGE] =
325 {
326 "ko_KR"
327 };
328
329 UConverterFromUCallback oldFromUAction = NULL;
330 UConverterToUCallback oldToUAction = NULL;
331 const void* oldFromUContext = NULL;
332 const void* oldToUContext = NULL;
333
334 /* Allocate memory */
335 mytarget = (char*) malloc(MAX_FILE_LEN * sizeof(mytarget[0]));
336 output_cp_buffer = (char*) malloc(MAX_FILE_LEN * sizeof(output_cp_buffer[0]));
337 ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(ucs_file_buffer[0]));
338 my_ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(my_ucs_file_buffer[0]));
339
340 ucs_file_buffer_use = ucs_file_buffer;
341 mytarget_1=mytarget;
342 mytarget_use = mytarget;
343 my_ucs_file_buffer_1=my_ucs_file_buffer;
344
345 /* flush the converter cache to get a consistent state before the flushing is tested */
346 ucnv_flushCache();
347
348 /*Testing ucnv_openU()*/
349 {
350 UChar converterName[]={ 0x0069, 0x0062, 0x006d, 0x002d, 0x0039, 0x0034, 0x0033, 0x0000}; /*ibm-943*/
351 UChar firstSortedName[]={ 0x0021, 0x0000}; /* ! */
352 UChar lastSortedName[]={ 0x007E, 0x0000}; /* ~ */
353 const char *illegalNameChars={ "ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943"};
354 UChar illegalName[100];
355 UConverter *converter=NULL;
356 err=U_ZERO_ERROR;
357 converter=ucnv_openU(converterName, &err);
358 if(U_FAILURE(err)){
359 log_data_err("FAILURE! ucnv_openU(ibm-943, err) failed. %s\n", myErrorName(err));
360 }
361 ucnv_close(converter);
362 err=U_ZERO_ERROR;
363 converter=ucnv_openU(NULL, &err);
364 if(U_FAILURE(err)){
365 log_err("FAILURE! ucnv_openU(NULL, err) failed. %s\n", myErrorName(err));
366 }
367 ucnv_close(converter);
368 /*testing with error value*/
369 err=U_ILLEGAL_ARGUMENT_ERROR;
370 converter=ucnv_openU(converterName, &err);
371 if(!(converter == NULL)){
372 log_data_err("FAILURE! ucnv_openU(ibm-943, U_ILLEGAL_ARGUMENT_ERROR) is expected to fail\n");
373 }
374 ucnv_close(converter);
375 err=U_ZERO_ERROR;
376 u_uastrcpy(illegalName, "");
377 u_uastrcpy(illegalName, illegalNameChars);
378 ucnv_openU(illegalName, &err);
379 if(!(err==U_ILLEGAL_ARGUMENT_ERROR)){
380 log_err("FAILURE! ucnv_openU(illegalName, err) is expected to fail\n");
381 }
382
383 err=U_ZERO_ERROR;
384 ucnv_openU(firstSortedName, &err);
385 if(err!=U_FILE_ACCESS_ERROR){
386 log_err("FAILURE! ucnv_openU(firstSortedName, err) is expected to fail\n");
387 }
388
389 err=U_ZERO_ERROR;
390 ucnv_openU(lastSortedName, &err);
391 if(err!=U_FILE_ACCESS_ERROR){
392 log_err("FAILURE! ucnv_openU(lastSortedName, err) is expected to fail\n");
393 }
394
395 err=U_ZERO_ERROR;
396 }
397 log_verbose("Testing ucnv_open() with converter name greater than 7 characters\n");
398 {
399 UConverter *cnv=NULL;
400 err=U_ZERO_ERROR;
401 cnv=ucnv_open("ibm-949,Madhu", &err);
402 if(U_FAILURE(err)){
403 log_data_err("FAILURE! ucnv_open(\"ibm-949,Madhu\", err) failed. %s\n", myErrorName(err));
404 }
405 ucnv_close(cnv);
406
407 }
408 /*Testing ucnv_convert()*/
409 {
410 int32_t targetLimit=0, sourceLimit=0, i=0, targetCapacity=0;
411 const uint8_t source[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00};
412 const uint8_t expectedTarget[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00};
413 char *target=0;
414 sourceLimit=sizeof(source)/sizeof(source[0]);
415 err=U_ZERO_ERROR;
416 targetLimit=0;
417
418 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", NULL, targetLimit , (const char*)source, sourceLimit, &err);
419 if(err == U_BUFFER_OVERFLOW_ERROR){
420 err=U_ZERO_ERROR;
421 targetLimit=targetCapacity+1;
422 target=(char*)malloc(sizeof(char) * targetLimit);
423 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
424 }
425 if(U_FAILURE(err)){
426 log_data_err("FAILURE! ucnv_convert(ibm-1363->ibm-1364) failed. %s\n", myErrorName(err));
427 }
428 else {
429 for(i=0; i<targetCapacity; i++){
430 if(target[i] != expectedTarget[i]){
431 log_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i, (UChar)expectedTarget[i], (uint8_t)target[i]);
432 }
433 }
434
435 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source+1, -1, &err);
436 if(U_FAILURE(err) || i!=7){
437 log_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 7\n",
438 u_errorName(err), i);
439 }
440
441 /*Test error conditions*/
442 err=U_ZERO_ERROR;
443 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, 0, &err);
444 if(i !=0){
445 log_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n");
446 }
447
448 err=U_ILLEGAL_ARGUMENT_ERROR;
449 sourceLimit=sizeof(source)/sizeof(source[0]);
450 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
451 if(i !=0 ){
452 log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n");
453 }
454
455 err=U_ZERO_ERROR;
456 sourceLimit=sizeof(source)/sizeof(source[0]);
457 targetLimit=0;
458 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
459 if(!(U_FAILURE(err) && err==U_BUFFER_OVERFLOW_ERROR)){
460 log_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n");
461 }
462 err=U_ZERO_ERROR;
463 free(target);
464 }
465 }
466
467 /*Testing ucnv_openCCSID and ucnv_open with error conditions*/
468 log_verbose("\n---Testing ucnv_open with err ! = U_ZERO_ERROR...\n");
469 err=U_ILLEGAL_ARGUMENT_ERROR;
470 if(ucnv_open(NULL, &err) != NULL){
471 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
472 }
473 if(ucnv_openCCSID(1051, UCNV_IBM, &err) != NULL){
474 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
475 }
476 err=U_ZERO_ERROR;
477
478 /* Testing ucnv_openCCSID(), ucnv_open(), ucnv_getName() */
479 log_verbose("\n---Testing ucnv_open default...\n");
480 someConverters[0] = ucnv_open(NULL,&err);
481 someConverters[1] = ucnv_open(NULL,&err);
482 someConverters[2] = ucnv_open("utf8", &err);
483 someConverters[3] = ucnv_openCCSID(949,UCNV_IBM,&err);
484 ucnv_close(ucnv_openCCSID(1051, UCNV_IBM, &err)); /* test for j350; ucnv_close(NULL) is safe */
485 if (U_FAILURE(err)){ log_data_err("FAILURE! %s\n", myErrorName(err));}
486
487 /* Testing ucnv_getName()*/
488 /*default code page */
489 ucnv_getName(someConverters[0], &err);
490 if(U_FAILURE(err)) {
491 log_data_err("getName[0] failed\n");
492 } else {
493 log_verbose("getName(someConverters[0]) returned %s\n", ucnv_getName(someConverters[0], &err));
494 }
495 ucnv_getName(someConverters[1], &err);
496 if(U_FAILURE(err)) {
497 log_data_err("getName[1] failed\n");
498 } else {
499 log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters[1], &err));
500 }
501
502 ucnv_close(someConverters[0]);
503 ucnv_close(someConverters[1]);
504 ucnv_close(someConverters[2]);
505 ucnv_close(someConverters[3]);
506
507
508 for (codepage_index=0; codepage_index < NUM_CODEPAGE; ++codepage_index)
509 {
510 int32_t i = 0;
511
512 err = U_ZERO_ERROR;
513#ifdef U_TOPSRCDIR
514 strcpy(ucs_file_name, U_TOPSRCDIR U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING);
515#else
516 strcpy(ucs_file_name, loadTestData(&err));
517
518 if(U_FAILURE(err)){
519 log_err("\nCouldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err));
520 return;
521 }
522
523 {
524 char* index = strrchr(ucs_file_name,(char)U_FILE_SEP_CHAR);
525
526 if((unsigned int)(index-ucs_file_name) != (strlen(ucs_file_name)-1)){
527 *(index+1)=0;
528 }
529 }
530
531 strcat(ucs_file_name,".."U_FILE_SEP_STRING);
532#endif
533 strcat(ucs_file_name, CodePagesTestFiles[codepage_index]);
534
535 ucs_file_in = fopen(ucs_file_name,"rb");
536 if (!ucs_file_in)
537 {
538 log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name);
539 return;
540 }
541
542 /*Creates a converter and testing ucnv_openCCSID(u_int code_page, platform, errstatus*/
543
544 /* myConverter =ucnv_openCCSID(CodePageNumberToTest[codepage_index],UCNV_IBM, &err); */
545 /* ucnv_flushCache(); */
546 myConverter =ucnv_open( "ibm-949", &err);
547 if (!myConverter || U_FAILURE(err))
548 {
549 log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err));
550 fclose(ucs_file_in);
551 break;
552 }
553
554 /*testing for ucnv_getName() */
555 log_verbose("Testing ucnv_getName()...\n");
556 ucnv_getName(myConverter, &err);
557 if(U_FAILURE(err))
558 log_err("Error in getName\n");
559 else
560 {
561 log_verbose("getName o.k. %s\n", ucnv_getName(myConverter, &err));
562 }
563 if (uprv_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index]))
564 log_err("getName failed\n");
565 else
566 log_verbose("getName ok\n");
567 /*Test getName with error condition*/
568 {
569 const char* name=0;
570 err=U_ILLEGAL_ARGUMENT_ERROR;
571 log_verbose("Testing ucnv_getName with err != U_ZERO_ERROR");
572 name=ucnv_getName(myConverter, &err);
573 if(name != NULL){
574 log_err("ucnv_getName() with err != U_ZERO_ERROR is expected to fail");
575 }
576 err=U_ZERO_ERROR;
577 }
578
579
580 /*Tests ucnv_getMaxCharSize() and ucnv_getMinCharSize()*/
581
582 log_verbose("Testing ucnv_getMaxCharSize()...\n");
583 if (ucnv_getMaxCharSize(myConverter)==CodePagesMaxChars[codepage_index])
584 log_verbose("Max byte per character OK\n");
585 else
586 log_err("Max byte per character failed\n");
587
588 log_verbose("\n---Testing ucnv_getMinCharSize()...\n");
589 if (ucnv_getMinCharSize(myConverter)==CodePagesMinChars[codepage_index])
590 log_verbose("Min byte per character OK\n");
591 else
592 log_err("Min byte per character failed\n");
593
594
595 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars()*/
596 log_verbose("\n---Testing ucnv_getSubstChars...\n");
597 ii=4;
598 ucnv_getSubstChars(myConverter, myptr, &ii, &err);
599 if (ii <= 0) {
600 log_err("ucnv_getSubstChars returned a negative number %d\n", ii);
601 }
602
603 for(x=0;x<ii;x++)
604 rest = (uint16_t)(((unsigned char)rest << 8) + (unsigned char)myptr[x]);
605 if (rest==CodePagesSubstitutionChars[codepage_index])
606 log_verbose("Substitution character ok\n");
607 else
608 log_err("Substitution character failed.\n");
609
610 log_verbose("\n---Testing ucnv_setSubstChars RoundTrip Test ...\n");
611 ucnv_setSubstChars(myConverter, myptr, ii, &err);
612 if (U_FAILURE(err))
613 {
614 log_err("FAILURE! %s\n", myErrorName(err));
615 }
616 ucnv_getSubstChars(myConverter,save, &ii, &err);
617 if (U_FAILURE(err))
618 {
619 log_err("FAILURE! %s\n", myErrorName(err));
620 }
621
622 if (strncmp(save, myptr, ii))
623 log_err("Saved substitution character failed\n");
624 else
625 log_verbose("Saved substitution character ok\n");
626
627 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars() with error conditions*/
628 log_verbose("\n---Testing ucnv_getSubstChars.. with len < minBytesPerChar\n");
629 ii=1;
630 ucnv_getSubstChars(myConverter, myptr, &ii, &err);
631 if(err != U_INDEX_OUTOFBOUNDS_ERROR){
632 log_err("ucnv_getSubstChars() with len < minBytesPerChar should throw U_INDEX_OUTOFBOUNDS_ERROR Got %s\n", myErrorName(err));
633 }
634 err=U_ZERO_ERROR;
635 ii=4;
636 ucnv_getSubstChars(myConverter, myptr, &ii, &err);
637 log_verbose("\n---Testing ucnv_setSubstChars.. with len < minBytesPerChar\n");
638 ucnv_setSubstChars(myConverter, myptr, 0, &err);
639 if(err != U_ILLEGAL_ARGUMENT_ERROR){
640 log_err("ucnv_setSubstChars() with len < minBytesPerChar should throw U_ILLEGAL_ARGUMENT_ERROR Got %s\n", myErrorName(err));
641 }
642 log_verbose("\n---Testing ucnv_setSubstChars.. with err != U_ZERO_ERROR \n");
643 strcpy(myptr, "abc");
644 ucnv_setSubstChars(myConverter, myptr, ii, &err);
645 err=U_ZERO_ERROR;
646 ucnv_getSubstChars(myConverter, save, &ii, &err);
647 if(strncmp(save, myptr, ii) == 0){
648 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't set the SubstChars and just return\n");
649 }
650 log_verbose("\n---Testing ucnv_getSubstChars.. with err != U_ZERO_ERROR \n");
651 err=U_ZERO_ERROR;
652 strcpy(myptr, "abc");
653 ucnv_setSubstChars(myConverter, myptr, ii, &err);
654 err=U_ILLEGAL_ARGUMENT_ERROR;
655 ucnv_getSubstChars(myConverter, save, &ii, &err);
656 if(strncmp(save, myptr, ii) == 0){
657 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't fill the SubstChars in the buffer, it just returns\n");
658 }
659 err=U_ZERO_ERROR;
660 /*------*/
661
662#ifdef U_ENABLE_GENERIC_ISO_2022
663 /*resetState ucnv_reset()*/
664 log_verbose("\n---Testing ucnv_reset()..\n");
665 ucnv_reset(myConverter);
666 {
667 UChar32 c;
668 const uint8_t in[]={ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x61, 0xc0, 0x80, 0xe0, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80};
669 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
670 UConverter *cnv=ucnv_open("ISO_2022", &err);
671 if(U_FAILURE(err)) {
672 log_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
673 }
674 c=ucnv_getNextUChar(cnv, &source, limit, &err);
675 if((U_FAILURE(err) || c != (UChar32)0x0031)) {
676 log_err("ucnv_getNextUChar() failed: %s\n", u_errorName(err));
677 }
678 ucnv_reset(cnv);
679 ucnv_close(cnv);
680
681 }
682#endif
683
684 /*getDisplayName*/
685 log_verbose("\n---Testing ucnv_getDisplayName()...\n");
686 locale=CodePagesLocale[codepage_index];
687 len=0;
688 displayname=NULL;
689 disnamelen = ucnv_getDisplayName(myConverter, locale, displayname, len, &err);
690 if(err==U_BUFFER_OVERFLOW_ERROR) {
691 err=U_ZERO_ERROR;
692 displayname=(UChar*)malloc((disnamelen+1) * sizeof(UChar));
693 ucnv_getDisplayName(myConverter,locale,displayname,disnamelen+1, &err);
694 if(U_FAILURE(err)) {
695 log_err("getDisplayName failed. The error is %s\n", myErrorName(err));
696 }
697 else {
698 log_verbose(" getDisplayName o.k.\n");
699 }
700 free(displayname);
701 displayname=NULL;
702 }
703 else {
704 log_err("getDisplayName preflight doesn't work. Error is %s\n", myErrorName(err));
705 }
706 /*test ucnv_getDiaplayName with error condition*/
707 err= U_ILLEGAL_ARGUMENT_ERROR;
708 len=ucnv_getDisplayName(myConverter,locale,NULL,0, &err);
709 if( len !=0 ){
710 log_err("ucnv_getDisplayName() with err != U_ZERO_ERROR is supposed to return 0\n");
711 }
712 /*test ucnv_getDiaplayName with error condition*/
713 err=U_ZERO_ERROR;
714 len=ucnv_getDisplayName(NULL,locale,NULL,0, &err);
715 if( len !=0 || U_SUCCESS(err)){
716 log_err("ucnv_getDisplayName(NULL) with cnv == NULL is supposed to return 0\n");
717 }
718 err=U_ZERO_ERROR;
719
720 /* testing ucnv_setFromUCallBack() and ucnv_getFromUCallBack()*/
721 ucnv_getFromUCallBack(myConverter, &MIA1, &MIA1Context);
722
723 log_verbose("\n---Testing ucnv_setFromUCallBack...\n");
724 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err);
725 if (U_FAILURE(err) || oldFromUAction != MIA1 || oldFromUContext != MIA1Context)
726 {
727 log_err("FAILURE! %s\n", myErrorName(err));
728 }
729
730 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2);
731 if (MIA1_2 != otherUnicodeAction(MIA1) || MIA1Context2 != &BOM)
732 log_err("get From UCallBack failed\n");
733 else
734 log_verbose("get From UCallBack ok\n");
735
736 log_verbose("\n---Testing getFromUCallBack Roundtrip...\n");
737 ucnv_setFromUCallBack(myConverter,MIA1, MIA1Context, &oldFromUAction, &oldFromUContext, &err);
738 if (U_FAILURE(err) || oldFromUAction != otherUnicodeAction(MIA1) || oldFromUContext != &BOM)
739 {
740 log_err("FAILURE! %s\n", myErrorName(err));
741 }
742
743 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2);
744 if (MIA1_2 != MIA1 || MIA1Context2 != MIA1Context)
745 log_err("get From UCallBack action failed\n");
746 else
747 log_verbose("get From UCallBack action ok\n");
748
749 /*testing ucnv_setToUCallBack with error conditions*/
750 err=U_ILLEGAL_ARGUMENT_ERROR;
751 log_verbose("\n---Testing setFromUCallBack. with err != U_ZERO_ERROR..\n");
752 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err);
753 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2);
754 if(MIA1_2 == otherUnicodeAction(MIA1) || MIA1Context2 == &BOM){
755 log_err("To setFromUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
756 }
757 err=U_ZERO_ERROR;
758
759
760 /*testing ucnv_setToUCallBack() and ucnv_getToUCallBack()*/
761 ucnv_getToUCallBack(myConverter, &MIA2, &MIA2Context);
762
763 log_verbose("\n---Testing setTo UCallBack...\n");
764 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), &BOM, &oldToUAction, &oldToUContext, &err);
765 if (U_FAILURE(err) || oldToUAction != MIA2 || oldToUContext != MIA2Context)
766 {
767 log_err("FAILURE! %s\n", myErrorName(err));
768 }
769
770 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2);
771 if (MIA2_2 != otherCharAction(MIA2) || MIA2Context2 != &BOM)
772 log_err("To UCallBack failed\n");
773 else
774 log_verbose("To UCallBack ok\n");
775
776 log_verbose("\n---Testing setTo UCallBack Roundtrip...\n");
777 ucnv_setToUCallBack(myConverter,MIA2, MIA2Context, &oldToUAction, &oldToUContext, &err);
778 if (U_FAILURE(err) || oldToUAction != otherCharAction(MIA2) || oldToUContext != &BOM)
779 { log_err("FAILURE! %s\n", myErrorName(err)); }
780
781 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2);
782 if (MIA2_2 != MIA2 || MIA2Context2 != MIA2Context)
783 log_err("To UCallBack failed\n");
784 else
785 log_verbose("To UCallBack ok\n");
786
787 /*testing ucnv_setToUCallBack with error conditions*/
788 err=U_ILLEGAL_ARGUMENT_ERROR;
789 log_verbose("\n---Testing setToUCallBack. with err != U_ZERO_ERROR..\n");
790 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), NULL, &oldToUAction, &oldToUContext, &err);
791 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2);
792 if (MIA2_2 == otherCharAction(MIA2) || MIA2Context2 == &BOM){
793 log_err("To setToUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
794 }
795 err=U_ZERO_ERROR;
796
797
798 /*getcodepageid testing ucnv_getCCSID() */
799 log_verbose("\n----Testing getCCSID....\n");
800 cp = ucnv_getCCSID(myConverter,&err);
801 if (U_FAILURE(err))
802 {
803 log_err("FAILURE!..... %s\n", myErrorName(err));
804 }
805 if (cp != CodePageNumberToTest[codepage_index])
806 log_err("Codepage number test failed\n");
807 else
808 log_verbose("Codepage number test OK\n");
809
810 /*testing ucnv_getCCSID() with err != U_ZERO_ERROR*/
811 err=U_ILLEGAL_ARGUMENT_ERROR;
812 if( ucnv_getCCSID(myConverter,&err) != -1){
813 log_err("ucnv_getCCSID() with err != U_ZERO_ERROR is supposed to fail\n");
814 }
815 err=U_ZERO_ERROR;
816
817 /*getCodepagePlatform testing ucnv_getPlatform()*/
818 log_verbose("\n---Testing getCodepagePlatform ..\n");
819 if (CodePagesPlatform[codepage_index]!=ucnv_getPlatform(myConverter, &err))
820 log_err("Platform codepage test failed\n");
821 else
822 log_verbose("Platform codepage test ok\n");
823
824 if (U_FAILURE(err))
825 {
826 log_err("FAILURE! %s\n", myErrorName(err));
827 }
828 /*testing ucnv_getPlatform() with err != U_ZERO_ERROR*/
829 err= U_ILLEGAL_ARGUMENT_ERROR;
830 if(ucnv_getPlatform(myConverter, &err) != UCNV_UNKNOWN){
831 log_err("ucnv)getPlatform with err != U_ZERO_ERROR is supposed to fail\n");
832 }
833 err=U_ZERO_ERROR;
834
835
836 /*Reads the BOM*/
837 fread(&BOM, sizeof(UChar), 1, ucs_file_in);
838 if (BOM!=0xFEFF && BOM!=0xFFFE)
839 {
840 log_err("File Missing BOM...Bailing!\n");
841 fclose(ucs_file_in);
842 break;
843 }
844
845
846 /*Reads in the file*/
847 while(!feof(ucs_file_in)&&(i+=fread(ucs_file_buffer+i, sizeof(UChar), 1, ucs_file_in)))
848 {
849 myUChar = ucs_file_buffer[i-1];
850
851 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN*/
852 }
853
854 myUChar = ucs_file_buffer[i-1];
855 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN Corner Case*/
856
857
858 /*testing ucnv_fromUChars() and ucnv_toUChars() */
859 /*uchar1---fromUChar--->output_cp_buffer --toUChar--->uchar2*/
860
861 uchar1=(UChar*)malloc(sizeof(UChar) * (i+1));
862 u_uastrcpy(uchar1,"");
863 u_strncpy(uchar1,ucs_file_buffer,i);
864 uchar1[i] = 0;
865
866 uchar3=(UChar*)malloc(sizeof(UChar)*(i+1));
867 u_uastrcpy(uchar3,"");
868 u_strncpy(uchar3,ucs_file_buffer,i);
869 uchar3[i] = 0;
870
871 /*Calls the Conversion Routine */
872 testLong1 = MAX_FILE_LEN;
873 log_verbose("\n---Testing ucnv_fromUChars()\n");
874 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err);
875 if (U_FAILURE(err))
876 {
877 log_err("\nFAILURE...%s\n", myErrorName(err));
878 }
879 else
880 log_verbose(" ucnv_fromUChars() o.k.\n");
881
882 /*test the conversion routine */
883 log_verbose("\n---Testing ucnv_toUChars()\n");
884 /*call it first time for trapping the targetcapacity and size needed to allocate memory for the buffer uchar2 */
885 targetcapacity2=0;
886 targetsize = ucnv_toUChars(myConverter,
887 NULL,
888 targetcapacity2,
889 output_cp_buffer,
890 strlen(output_cp_buffer),
891 &err);
892 /*if there is an buffer overflow then trap the values and pass them and make the actual call*/
893
894 if(err==U_BUFFER_OVERFLOW_ERROR)
895 {
896 err=U_ZERO_ERROR;
897 uchar2=(UChar*)malloc((targetsize+1) * sizeof(UChar));
898 targetsize = ucnv_toUChars(myConverter,
899 uchar2,
900 targetsize+1,
901 output_cp_buffer,
902 strlen(output_cp_buffer),
903 &err);
904
905 if(U_FAILURE(err))
906 log_err("ucnv_toUChars() FAILED %s\n", myErrorName(err));
907 else
908 log_verbose(" ucnv_toUChars() o.k.\n");
909
910 if(u_strcmp(uchar1,uchar2)!=0)
911 log_err("equality test failed with conversion routine\n");
912 }
913 else
914 {
915 log_err("ERR: calling toUChars: Didn't get U_BUFFER_OVERFLOW .. expected it.\n");
916 }
917 /*Testing ucnv_fromUChars and ucnv_toUChars with error conditions*/
918 err=U_ILLEGAL_ARGUMENT_ERROR;
919 log_verbose("\n---Testing ucnv_fromUChars() with err != U_ZERO_ERROR\n");
920 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err);
921 if (targetcapacity !=0) {
922 log_err("\nFAILURE: ucnv_fromUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
923 }
924 err=U_ZERO_ERROR;
925 log_verbose("\n---Testing ucnv_fromUChars() with converter=NULL\n");
926 targetcapacity = ucnv_fromUChars(NULL, output_cp_buffer, testLong1, uchar1, -1, &err);
927 if (targetcapacity !=0 || err != U_ILLEGAL_ARGUMENT_ERROR) {
928 log_err("\nFAILURE: ucnv_fromUChars with converter=NULL is expected to fail\n");
929 }
930 err=U_ZERO_ERROR;
931 log_verbose("\n---Testing ucnv_fromUChars() with sourceLength = 0\n");
932 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, 0, &err);
933 if (targetcapacity !=0) {
934 log_err("\nFAILURE: ucnv_fromUChars with sourceLength 0 is expected to return 0\n");
935 }
936 log_verbose("\n---Testing ucnv_fromUChars() with targetLength = 0\n");
937 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, 0, uchar1, -1, &err);
938 if (err != U_BUFFER_OVERFLOW_ERROR) {
939 log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n");
940 }
941 /*toUChars with error conditions*/
942 targetsize = ucnv_toUChars(myConverter, uchar2, targetsize, output_cp_buffer, strlen(output_cp_buffer), &err);
943 if(targetsize != 0){
944 log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
945 }
946 err=U_ZERO_ERROR;
947 targetsize = ucnv_toUChars(myConverter, uchar2, -1, output_cp_buffer, strlen(output_cp_buffer), &err);
948 if(targetsize != 0 || err != U_ILLEGAL_ARGUMENT_ERROR){
949 log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n");
950 }
951 err=U_ZERO_ERROR;
952 targetsize = ucnv_toUChars(myConverter, uchar2, 0, output_cp_buffer, 0, &err);
953 if (targetsize !=0) {
954 log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n");
955 }
956 targetcapacity2=0;
957 targetsize = ucnv_toUChars(myConverter, NULL, targetcapacity2, output_cp_buffer, strlen(output_cp_buffer), &err);
958 if (err != U_STRING_NOT_TERMINATED_WARNING) {
959 log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n",
960 u_errorName(err));
961 }
962 err=U_ZERO_ERROR;
963 /*-----*/
964
965
966 /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */
967 /*Clean up re-usable vars*/
968 j=0;
969 log_verbose("Testing ucnv_fromUnicode().....\n");
970 tmp_ucs_buf=ucs_file_buffer_use;
971 ucnv_fromUnicode(myConverter, &mytarget_1,
972 mytarget + MAX_FILE_LEN,
973 &tmp_ucs_buf,
974 ucs_file_buffer_use+i,
975 NULL,
976 TRUE,
977 &err);
978 consumedUni = (UChar*)tmp_consumedUni;
979
980 if (U_FAILURE(err))
981 {
982 log_err("FAILURE! %s\n", myErrorName(err));
983 }
984 else
985 log_verbose("ucnv_fromUnicode() o.k.\n");
986
987 /*Uni1 ----ToUnicode----> Cp2 ----FromUnicode---->Uni3 */
988 log_verbose("Testing ucnv_toUnicode().....\n");
989 tmp_mytarget_use=mytarget_use;
990 tmp_consumed = consumed;
991 ucnv_toUnicode(myConverter, &my_ucs_file_buffer_1,
992 my_ucs_file_buffer + MAX_FILE_LEN,
993 &tmp_mytarget_use,
994 mytarget_use + (mytarget_1 - mytarget),
995 NULL,
996 FALSE,
997 &err);
998 consumed = (char*)tmp_consumed;
999 if (U_FAILURE(err))
1000 {
1001 log_err("FAILURE! %s\n", myErrorName(err));
1002 }
1003 else
1004 log_verbose("ucnv_toUnicode() o.k.\n");
1005
1006
1007 log_verbose("\n---Testing RoundTrip ...\n");
1008
1009
1010 u_strncpy(uchar3, my_ucs_file_buffer,i);
1011 uchar3[i] = 0;
1012
1013 if(u_strcmp(uchar1,uchar3)==0)
1014 log_verbose("Equality test o.k.\n");
1015 else
1016 log_err("Equality test failed\n");
1017
1018 /*sanity compare */
1019 if(uchar2 == NULL)
1020 {
1021 log_err("uchar2 was NULL (ccapitst.c line %d), couldn't do sanity check\n", __LINE__);
1022 }
1023 else
1024 {
1025 if(u_strcmp(uchar2, uchar3)==0)
1026 log_verbose("Equality test o.k.\n");
1027 else
1028 log_err("Equality test failed\n");
1029 }
1030
1031 fclose(ucs_file_in);
1032 ucnv_close(myConverter);
1033 if (uchar1 != 0) free(uchar1);
1034 if (uchar2 != 0) free(uchar2);
1035 if (uchar3 != 0) free(uchar3);
1036 }
1037
1038 free((void*)mytarget);
1039 free((void*)output_cp_buffer);
1040 free((void*)ucs_file_buffer);
1041 free((void*)my_ucs_file_buffer);
1042#endif
1043}
1044
1045static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA)
1046{
1047 return (MIA==(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP)?(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_SUBSTITUTE:(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP;
1048}
1049
1050
1051static UConverterToUCallback otherCharAction(UConverterToUCallback MIA)
1052{
1053 return (MIA==(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP)?(UConverterToUCallback)UCNV_TO_U_CALLBACK_SUBSTITUTE:(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP;
1054}
1055
1056static void TestFlushCache(void) {
1057#if !UCONFIG_NO_LEGACY_CONVERSION
1058 UErrorCode err = U_ZERO_ERROR;
1059 UConverter* someConverters[5];
1060 int flushCount = 0;
1061
1062 /* flush the converter cache to get a consistent state before the flushing is tested */
1063 ucnv_flushCache();
1064
1065 /*Testing ucnv_open()*/
1066 /* Note: These converters have been chosen because they do NOT
1067 encode the Latin characters (U+0041, ...), and therefore are
1068 highly unlikely to be chosen as system default codepages */
1069
1070 someConverters[0] = ucnv_open("ibm-1047", &err);
1071 if (U_FAILURE(err)) {
1072 log_data_err("FAILURE! %s\n", myErrorName(err));
1073 }
1074
1075 someConverters[1] = ucnv_open("ibm-1047", &err);
1076 if (U_FAILURE(err)) {
1077 log_data_err("FAILURE! %s\n", myErrorName(err));
1078 }
1079
1080 someConverters[2] = ucnv_open("ibm-1047", &err);
1081 if (U_FAILURE(err)) {
1082 log_data_err("FAILURE! %s\n", myErrorName(err));
1083 }
1084
1085 someConverters[3] = ucnv_open("gb18030", &err);
1086 if (U_FAILURE(err)) {
1087 log_data_err("FAILURE! %s\n", myErrorName(err));
1088 }
1089
1090 someConverters[4] = ucnv_open("ibm-954", &err);
1091 if (U_FAILURE(err)) {
1092 log_data_err("FAILURE! %s\n", myErrorName(err));
1093 }
1094
1095
1096 /* Testing ucnv_flushCache() */
1097 log_verbose("\n---Testing ucnv_flushCache...\n");
1098 if ((flushCount=ucnv_flushCache())==0)
1099 log_verbose("Flush cache ok\n");
1100 else
1101 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount);
1102
1103 /*testing ucnv_close() and ucnv_flushCache() */
1104 ucnv_close(someConverters[0]);
1105 ucnv_close(someConverters[1]);
1106
1107 if ((flushCount=ucnv_flushCache())==0)
1108 log_verbose("Flush cache ok\n");
1109 else
1110 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount);
1111
1112 ucnv_close(someConverters[2]);
1113 ucnv_close(someConverters[3]);
1114
1115 if ((flushCount=ucnv_flushCache())==2)
1116 log_verbose("Flush cache ok\n"); /*because first, second and third are same */
1117 else
1118 log_data_err("Flush Cache failed line %d, got %d expected 2 or there is an error in ucnv_close()\n",
1119 __LINE__,
1120 flushCount);
1121
1122 ucnv_close(someConverters[4]);
1123 if ( (flushCount=ucnv_flushCache())==1)
1124 log_verbose("Flush cache ok\n");
1125 else
1126 log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__, flushCount);
1127#endif
1128}
1129
1130/**
1131 * Test the converter alias API, specifically the fuzzy matching of
1132 * alias names and the alias table integrity. Make sure each
1133 * converter has at least one alias (itself), and that its listed
1134 * aliases map back to itself. Check some hard-coded UTF-8 and
1135 * ISO_2022 aliases to make sure they work.
1136 */
1137static void TestAlias() {
1138 int32_t i, ncnv;
1139 UErrorCode status = U_ZERO_ERROR;
1140
1141 /* Predetermined aliases that we expect to map back to ISO_2022
1142 * and UTF-8. UPDATE THIS DATA AS NECESSARY. */
1143 const char* ISO_2022_NAMES[] =
1144 {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2",
1145 "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"};
1146 int32_t ISO_2022_NAMES_LENGTH =
1147 sizeof(ISO_2022_NAMES) / sizeof(ISO_2022_NAMES[0]);
1148 const char *UTF8_NAMES[] =
1149 { "UTF-8", "utf-8", "utf8", "ibm-1208",
1150 "utf_8", "ibm1208", "cp1208" };
1151 int32_t UTF8_NAMES_LENGTH =
1152 sizeof(UTF8_NAMES) / sizeof(UTF8_NAMES[0]);
1153
1154 struct {
1155 const char *name;
1156 const char *alias;
1157 } CONVERTERS_NAMES[] = {
1158 { "UTF-32BE", "UTF32_BigEndian" },
1159 { "UTF-32LE", "UTF32_LittleEndian" },
1160 { "UTF-32", "ISO-10646-UCS-4" },
1161 { "UTF32_PlatformEndian", "UTF32_PlatformEndian" },
1162 { "UTF-32", "ucs-4" }
1163 };
1164 int32_t CONVERTERS_NAMES_LENGTH = sizeof(CONVERTERS_NAMES) / sizeof(*CONVERTERS_NAMES);
1165
1166 /* When there are bugs in gencnval or in ucnv_io, converters can
1167 appear to have no aliases. */
1168 ncnv = ucnv_countAvailable();
1169 log_verbose("%d converters\n", ncnv);
1170 for (i=0; i<ncnv; ++i) {
1171 const char *name = ucnv_getAvailableName(i);
1172 const char *alias0;
1173 uint16_t na = ucnv_countAliases(name, &status);
1174 uint16_t j;
1175 UConverter *cnv;
1176
1177 if (na == 0) {
1178 log_err("FAIL: Converter \"%s\" (i=%d)"
1179 " has no aliases; expect at least one\n",
1180 name, i);
1181 continue;
1182 }
1183 cnv = ucnv_open(name, &status);
1184 if (U_FAILURE(status)) {
1185 log_data_err("FAIL: Converter \"%s\" (i=%d)"
1186 " can't be opened.\n",
1187 name, i);
1188 }
1189 else {
1190 if (strcmp(ucnv_getName(cnv, &status), name) != 0
1191 && (strstr(name, "PlatformEndian") == 0 && strstr(name, "OppositeEndian") == 0)) {
1192 log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. "
1193 "The should be the same\n",
1194 name, ucnv_getName(cnv, &status));
1195 }
1196 }
1197 ucnv_close(cnv);
1198
1199 status = U_ZERO_ERROR;
1200 alias0 = ucnv_getAlias(name, 0, &status);
1201 for (j=1; j<na; ++j) {
1202 const char *alias;
1203 /* Make sure each alias maps back to the the same list of
1204 aliases. Assume that if alias 0 is the same, the whole
1205 list is the same (this should always be true). */
1206 const char *mapBack;
1207
1208 status = U_ZERO_ERROR;
1209 alias = ucnv_getAlias(name, j, &status);
1210 if (status == U_AMBIGUOUS_ALIAS_WARNING) {
1211 log_err("FAIL: Converter \"%s\"is ambiguous\n", name);
1212 }
1213
1214 if (alias == NULL) {
1215 log_err("FAIL: Converter \"%s\" -> "
1216 "alias[%d]=NULL\n",
1217 name, j);
1218 continue;
1219 }
1220
1221 mapBack = ucnv_getAlias(alias, 0, &status);
1222
1223 if (mapBack == NULL) {
1224 log_err("FAIL: Converter \"%s\" -> "
1225 "alias[%d]=\"%s\" -> "
1226 "alias[0]=NULL, exp. \"%s\"\n",
1227 name, j, alias, alias0);
1228 continue;
1229 }
1230
1231 if (0 != strcmp(alias0, mapBack)) {
1232 int32_t idx;
1233 UBool foundAlias = FALSE;
1234 if (status == U_AMBIGUOUS_ALIAS_WARNING) {
1235 /* Make sure that we only get this mismapping when there is
1236 an ambiguous alias, and the other converter has this alias too. */
1237 for (idx = 0; idx < ucnv_countAliases(mapBack, &status); idx++) {
1238 if (strcmp(ucnv_getAlias(mapBack, (uint16_t)idx, &status), alias) == 0) {
1239 foundAlias = TRUE;
1240 break;
1241 }
1242 }
1243 }
1244 /* else not ambiguous, and this is a real problem. foundAlias = FALSE */
1245
1246 if (!foundAlias) {
1247 log_err("FAIL: Converter \"%s\" -> "
1248 "alias[%d]=\"%s\" -> "
1249 "alias[0]=\"%s\", exp. \"%s\"\n",
1250 name, j, alias, mapBack, alias0);
1251 }
1252 }
1253 }
1254 }
1255
1256
1257 /* Check a list of predetermined aliases that we expect to map
1258 * back to ISO_2022 and UTF-8. */
1259 for (i=1; i<ISO_2022_NAMES_LENGTH; ++i) {
1260 const char* mapBack = ucnv_getAlias(ISO_2022_NAMES[i], 0, &status);
1261 if(!mapBack) {
1262 log_data_err("Couldn't get alias for %s. You probably have no data\n", ISO_2022_NAMES[i]);
1263 continue;
1264 }
1265 if (0 != strcmp(mapBack, ISO_2022_NAMES[0])) {
1266 log_err("FAIL: \"%s\" -> \"%s\", expect \"ISO_2022,locale=ja,version=2\"\n",
1267 ISO_2022_NAMES[i], mapBack);
1268 }
1269 }
1270
1271
1272 for (i=1; i<UTF8_NAMES_LENGTH; ++i) {
1273 const char* mapBack = ucnv_getAlias(UTF8_NAMES[i], 0, &status);
1274 if(!mapBack) {
1275 log_data_err("Couldn't get alias for %s. You probably have no data\n", UTF8_NAMES[i]);
1276 continue;
1277 }
1278 if (mapBack && 0 != strcmp(mapBack, UTF8_NAMES[0])) {
1279 log_err("FAIL: \"%s\" -> \"%s\", expect UTF-8\n",
1280 UTF8_NAMES[i], mapBack);
1281 }
1282 }
1283
1284 /*
1285 * Check a list of predetermined aliases that we expect to map
1286 * back to predermined converter names.
1287 */
1288
1289 for (i = 0; i < CONVERTERS_NAMES_LENGTH; ++i) {
1290 const char* mapBack = ucnv_getAlias(CONVERTERS_NAMES[i].alias, 0, &status);
1291 if(!mapBack) {
1292 log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES[i].name);
1293 continue;
1294 }
1295 if (0 != strcmp(mapBack, CONVERTERS_NAMES[i].name)) {
1296 log_err("FAIL: \"%s\" -> \"%s\", expect %s\n",
1297 CONVERTERS_NAMES[i].alias, mapBack, CONVERTERS_NAMES[i].name);
1298 }
1299 }
1300
1301}
1302
1303static void TestDuplicateAlias(void) {
1304 const char *alias;
1305 UErrorCode status = U_ZERO_ERROR;
1306
1307 status = U_ZERO_ERROR;
1308 alias = ucnv_getStandardName("Shift_JIS", "IBM", &status);
1309 if (alias == NULL || strcmp(alias, "ibm-943") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) {
1310 log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias);
1311 }
1312 status = U_ZERO_ERROR;
1313 alias = ucnv_getStandardName("ibm-943", "IANA", &status);
1314 if (alias == NULL || strcmp(alias, "Shift_JIS") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) {
1315 log_data_err("FAIL: Didn't get Shift_JIS for ibm-943 {IANA}. Got %s\n", alias);
1316 }
1317 status = U_ZERO_ERROR;
1318 alias = ucnv_getStandardName("ibm-943_P130-2000", "IANA", &status);
1319 if (alias != NULL || status == U_AMBIGUOUS_ALIAS_WARNING) {
1320 log_data_err("FAIL: Didn't get NULL for ibm-943 {IANA}. Got %s\n", alias);
1321 }
1322}
1323
1324
1325/* Test safe clone callback */
1326
1327static uint32_t TSCC_nextSerial()
1328{
1329 static uint32_t n = 1;
1330
1331 return (n++);
1332}
1333
1334typedef struct
1335{
1336 uint32_t magic; /* 0xC0FFEE to identify that the object is OK */
1337 uint32_t serial; /* minted from nextSerial, above */
1338 UBool wasClosed; /* close happened on the object */
1339} TSCCContext;
1340
1341static TSCCContext *TSCC_clone(TSCCContext *ctx)
1342{
1343 TSCCContext *newCtx = (TSCCContext *)malloc(sizeof(TSCCContext));
1344
1345 newCtx->serial = TSCC_nextSerial();
1346 newCtx->wasClosed = 0;
1347 newCtx->magic = 0xC0FFEE;
1348
1349 log_verbose("TSCC_clone: %p:%d -> new context %p:%d\n", ctx, ctx->serial, newCtx, newCtx->serial);
1350
1351 return newCtx;
1352}
1353
1354static void TSCC_fromU(const void *context,
1355 UConverterFromUnicodeArgs *fromUArgs,
1356 const UChar* codeUnits,
1357 int32_t length,
1358 UChar32 codePoint,
1359 UConverterCallbackReason reason,
1360 UErrorCode * err)
1361{
1362 TSCCContext *ctx = (TSCCContext*)context;
1363 UConverterFromUCallback junkFrom;
1364
1365 log_verbose("TSCC_fromU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, fromUArgs->converter);
1366
1367 if(ctx->magic != 0xC0FFEE) {
1368 log_err("TSCC_fromU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic);
1369 return;
1370 }
1371
1372 if(reason == UCNV_CLONE) {
1373 UErrorCode subErr = U_ZERO_ERROR;
1374 TSCCContext *newCtx;
1375 TSCCContext *junkCtx;
1376 TSCCContext **pjunkCtx = &junkCtx;
1377
1378 /* "recreate" it */
1379 log_verbose("TSCC_fromU: cloning..\n");
1380 newCtx = TSCC_clone(ctx);
1381
1382 if(newCtx == NULL) {
1383 log_err("TSCC_fromU: internal clone failed on %p\n", ctx);
1384 }
1385
1386 /* now, SET it */
1387 ucnv_getFromUCallBack(fromUArgs->converter, &junkFrom, (const void**)pjunkCtx);
1388 ucnv_setFromUCallBack(fromUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr);
1389
1390 if(U_FAILURE(subErr)) {
1391 *err = subErr;
1392 }
1393 }
1394
1395 if(reason == UCNV_CLOSE) {
1396 log_verbose("TSCC_fromU: Context %p:%d closing\n", ctx, ctx->serial);
1397 ctx->wasClosed = TRUE;
1398 }
1399}
1400
1401
1402static void TSCC_toU(const void *context,
1403 UConverterToUnicodeArgs *toUArgs,
1404 const char* codeUnits,
1405 int32_t length,
1406 UConverterCallbackReason reason,
1407 UErrorCode * err)
1408{
1409 TSCCContext *ctx = (TSCCContext*)context;
1410 UConverterToUCallback junkFrom;
1411
1412 log_verbose("TSCC_toU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, toUArgs->converter);
1413
1414 if(ctx->magic != 0xC0FFEE) {
1415 log_err("TSCC_toU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic);
1416 return;
1417 }
1418
1419 if(reason == UCNV_CLONE) {
1420 UErrorCode subErr = U_ZERO_ERROR;
1421 TSCCContext *newCtx;
1422 TSCCContext *junkCtx;
1423 TSCCContext **pjunkCtx = &junkCtx;
1424
1425 /* "recreate" it */
1426 log_verbose("TSCC_toU: cloning..\n");
1427 newCtx = TSCC_clone(ctx);
1428
1429 if(newCtx == NULL) {
1430 log_err("TSCC_toU: internal clone failed on %p\n", ctx);
1431 }
1432
1433 /* now, SET it */
1434 ucnv_getToUCallBack(toUArgs->converter, &junkFrom, (const void**)pjunkCtx);
1435 ucnv_setToUCallBack(toUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr);
1436
1437 if(U_FAILURE(subErr)) {
1438 *err = subErr;
1439 }
1440 }
1441
1442 if(reason == UCNV_CLOSE) {
1443 log_verbose("TSCC_toU: Context %p:%d closing\n", ctx, ctx->serial);
1444 ctx->wasClosed = TRUE;
1445 }
1446}
1447
1448static void TSCC_init(TSCCContext *q)
1449{
1450 q->magic = 0xC0FFEE;
1451 q->serial = TSCC_nextSerial();
1452 q->wasClosed = 0;
1453}
1454
1455static void TSCC_print_log(TSCCContext *q, const char *name)
1456{
1457 if(q==NULL) {
1458 log_verbose("TSCContext: %s is NULL!!\n", name);
1459 } else {
1460 if(q->magic != 0xC0FFEE) {
1461 log_err("TSCCContext: %p:%d's magic is %x, supposed to be 0xC0FFEE\n",
1462 q,q->serial, q->magic);
1463 }
1464 log_verbose("TSCCContext %p:%d=%s - magic %x, %s\n",
1465 q, q->serial, name, q->magic, q->wasClosed?"CLOSED":"open");
1466 }
1467}
1468
1469#if !UCONFIG_NO_LEGACY_CONVERSION
1470static void TestConvertSafeCloneCallback()
1471{
1472 UErrorCode err = U_ZERO_ERROR;
1473 TSCCContext from1, to1;
1474 TSCCContext *from2, *from3, *to2, *to3;
1475 TSCCContext **pfrom2 = &from2, **pfrom3 = &from3, **pto2 = &to2, **pto3 = &to3;
1476 char hunk[8192];
1477 int32_t hunkSize = 8192;
1478 UConverterFromUCallback junkFrom;
1479 UConverterToUCallback junkTo;
1480 UConverter *conv1, *conv2 = NULL;
1481
1482 conv1 = ucnv_open("iso-8859-3", &err);
1483
1484 if(U_FAILURE(err)) {
1485 log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err));
1486 return;
1487 }
1488
1489 log_verbose("Opened conv1=%p\n", conv1);
1490
1491 TSCC_init(&from1);
1492 TSCC_init(&to1);
1493
1494 TSCC_print_log(&from1, "from1");
1495 TSCC_print_log(&to1, "to1");
1496
1497 ucnv_setFromUCallBack(conv1, TSCC_fromU, &from1, NULL, NULL, &err);
1498 log_verbose("Set from1 on conv1\n");
1499 TSCC_print_log(&from1, "from1");
1500
1501 ucnv_setToUCallBack(conv1, TSCC_toU, &to1, NULL, NULL, &err);
1502 log_verbose("Set to1 on conv1\n");
1503 TSCC_print_log(&to1, "to1");
1504
1505 conv2 = ucnv_safeClone(conv1, hunk, &hunkSize, &err);
1506 if(U_FAILURE(err)) {
1507 log_err("safeClone failed: %s\n", u_errorName(err));
1508 return;
1509 }
1510 log_verbose("Cloned to conv2=%p.\n", conv2);
1511
1512/********** from *********************/
1513 ucnv_getFromUCallBack(conv2, &junkFrom, (const void**)pfrom2);
1514 ucnv_getFromUCallBack(conv1, &junkFrom, (const void**)pfrom3);
1515
1516 TSCC_print_log(from2, "from2");
1517 TSCC_print_log(from3, "from3(==from1)");
1518
1519 if(from2 == NULL) {
1520 log_err("FAIL! from2 is null \n");
1521 return;
1522 }
1523
1524 if(from3 == NULL) {
1525 log_err("FAIL! from3 is null \n");
1526 return;
1527 }
1528
1529 if(from3 != (&from1) ) {
1530 log_err("FAIL! conv1's FROM context changed!\n");
1531 }
1532
1533 if(from2 == (&from1) ) {
1534 log_err("FAIL! conv1's FROM context is the same as conv2's!\n");
1535 }
1536
1537 if(from1.wasClosed) {
1538 log_err("FAIL! from1 is closed \n");
1539 }
1540
1541 if(from2->wasClosed) {
1542 log_err("FAIL! from2 was closed\n");
1543 }
1544
1545/********** to *********************/
1546 ucnv_getToUCallBack(conv2, &junkTo, (const void**)pto2);
1547 ucnv_getToUCallBack(conv1, &junkTo, (const void**)pto3);
1548
1549 TSCC_print_log(to2, "to2");
1550 TSCC_print_log(to3, "to3(==to1)");
1551
1552 if(to2 == NULL) {
1553 log_err("FAIL! to2 is null \n");
1554 return;
1555 }
1556
1557 if(to3 == NULL) {
1558 log_err("FAIL! to3 is null \n");
1559 return;
1560 }
1561
1562 if(to3 != (&to1) ) {
1563 log_err("FAIL! conv1's TO context changed!\n");
1564 }
1565
1566 if(to2 == (&to1) ) {
1567 log_err("FAIL! conv1's TO context is the same as conv2's!\n");
1568 }
1569
1570 if(to1.wasClosed) {
1571 log_err("FAIL! to1 is closed \n");
1572 }
1573
1574 if(to2->wasClosed) {
1575 log_err("FAIL! to2 was closed\n");
1576 }
1577
1578/*************************************/
1579
1580 ucnv_close(conv1);
1581 log_verbose("ucnv_closed (conv1)\n");
1582 TSCC_print_log(&from1, "from1");
1583 TSCC_print_log(from2, "from2");
1584 TSCC_print_log(&to1, "to1");
1585 TSCC_print_log(to2, "to2");
1586
1587 if(from1.wasClosed == FALSE) {
1588 log_err("FAIL! from1 is NOT closed \n");
1589 }
1590
1591 if(from2->wasClosed) {
1592 log_err("FAIL! from2 was closed\n");
1593 }
1594
1595 if(to1.wasClosed == FALSE) {
1596 log_err("FAIL! to1 is NOT closed \n");
1597 }
1598
1599 if(to2->wasClosed) {
1600 log_err("FAIL! to2 was closed\n");
1601 }
1602
1603 ucnv_close(conv2);
1604 log_verbose("ucnv_closed (conv2)\n");
1605
1606 TSCC_print_log(&from1, "from1");
1607 TSCC_print_log(from2, "from2");
1608
1609 if(from1.wasClosed == FALSE) {
1610 log_err("FAIL! from1 is NOT closed \n");
1611 }
1612
1613 if(from2->wasClosed == FALSE) {
1614 log_err("FAIL! from2 was NOT closed\n");
1615 }
1616
1617 TSCC_print_log(&to1, "to1");
1618 TSCC_print_log(to2, "to2");
1619
1620 if(to1.wasClosed == FALSE) {
1621 log_err("FAIL! to1 is NOT closed \n");
1622 }
1623
1624 if(to2->wasClosed == FALSE) {
1625 log_err("FAIL! to2 was NOT closed\n");
1626 }
1627
1628 if(to2 != (&to1)) {
1629 free(to2); /* to1 is stack based */
1630 }
1631 if(from2 != (&from1)) {
1632 free(from2); /* from1 is stack based */
1633 }
1634}
1635#endif
1636
1637static UBool
1638containsAnyOtherByte(uint8_t *p, int32_t length, uint8_t b) {
1639 while(length>0) {
1640 if(*p!=b) {
1641 return TRUE;
1642 }
1643 ++p;
1644 --length;
1645 }
1646 return FALSE;
1647}
1648
1649static void TestConvertSafeClone()
1650{
1651 /* one 'regular' & all the 'private stateful' converters */
1652 static const char *const names[] = {
1653#if !UCONFIG_NO_LEGACY_CONVERSION
1654 "ibm-1047",
1655 "ISO_2022,locale=zh,version=1",
1656#endif
1657 "SCSU",
1658#if !UCONFIG_NO_LEGACY_CONVERSION
1659 "HZ",
1660 "lmbcs",
1661 "ISCII,version=0",
1662 "ISO_2022,locale=kr,version=1",
1663 "ISO_2022,locale=jp,version=2",
1664#endif
1665 "BOCU-1",
1666 "UTF-7",
1667#if !UCONFIG_NO_LEGACY_CONVERSION
1668 "IMAP-mailbox-name",
1669 "ibm-1047-s390"
1670#else
1671 "IMAP=mailbox-name"
1672#endif
1673 };
1674
1675 /* store the actual sizes of each converter */
1676 int32_t actualSizes[LENGTHOF(names)];
1677
1678 static const int32_t bufferSizes[] = {
1679 U_CNV_SAFECLONE_BUFFERSIZE,
1680 (int32_t)(3*sizeof(UConverter))/2, /* 1.5*sizeof(UConverter) */
1681 (int32_t)sizeof(UConverter)/2 /* 0.5*sizeof(UConverter) */
1682 };
1683
1684 char charBuffer[21]; /* Leave at an odd number for alignment testing */
1685 uint8_t buffer[3] [U_CNV_SAFECLONE_BUFFERSIZE];
1686 int32_t bufferSize, maxBufferSize;
1687 const char *maxName;
1688 UConverter * cnv, *cnv2;
1689 UErrorCode err;
1690
1691 char *pCharBuffer;
1692 const char *pConstCharBuffer;
1693 const char *charBufferLimit = charBuffer + sizeof(charBuffer)/sizeof(*charBuffer);
1694 UChar uniBuffer[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */
1695 UChar uniCharBuffer[20];
1696 char charSourceBuffer[] = { 0x1b, 0x24, 0x42 };
1697 const char *pCharSource = charSourceBuffer;
1698 const char *pCharSourceLimit = charSourceBuffer + sizeof(charSourceBuffer);
1699 UChar *pUCharTarget = uniCharBuffer;
1700 UChar *pUCharTargetLimit = uniCharBuffer + sizeof(uniCharBuffer)/sizeof(*uniCharBuffer);
1701 const UChar * pUniBuffer;
1702 const UChar *uniBufferLimit = uniBuffer + sizeof(uniBuffer)/sizeof(*uniBuffer);
1703 int32_t index, j;
1704
1705 err = U_ZERO_ERROR;
1706 cnv = ucnv_open(names[0], &err);
1707 if(U_SUCCESS(err)) {
1708 /* Check the various error & informational states: */
1709
1710 /* Null status - just returns NULL */
1711 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
1712 if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, 0))
1713 {
1714 log_err("FAIL: Cloned converter failed to deal correctly with null status\n");
1715 }
1716 /* error status - should return 0 & keep error the same */
1717 err = U_MEMORY_ALLOCATION_ERROR;
1718 if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR)
1719 {
1720 log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n");
1721 }
1722 err = U_ZERO_ERROR;
1723
1724 /* Null buffer size pointer - just returns NULL & set error to U_ILLEGAL_ARGUMENT_ERROR*/
1725 if (0 != ucnv_safeClone(cnv, buffer[0], 0, &err) || err != U_ILLEGAL_ARGUMENT_ERROR)
1726 {
1727 log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n");
1728 }
1729 err = U_ZERO_ERROR;
1730
1731 /* buffer size pointer is 0 - fill in pbufferSize with a size */
1732 bufferSize = 0;
1733 if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0)
1734 {
1735 log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n");
1736 }
1737 /* Verify our define is large enough */
1738 if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize)
1739 {
1740 log_err("FAIL: Pre-calculated buffer size is too small\n");
1741 }
1742 /* Verify we can use this run-time calculated size */
1743 if (0 == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err))
1744 {
1745 log_err("FAIL: Converter can't be cloned with run-time size\n");
1746 }
1747 if (cnv2) {
1748 ucnv_close(cnv2);
1749 }
1750
1751 /* size one byte too small - should allocate & let us know */
1752 --bufferSize;
1753 if (0 == (cnv2 = ucnv_safeClone(cnv, 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
1754 {
1755 log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n");
1756 }
1757 if (cnv2) {
1758 ucnv_close(cnv2);
1759 }
1760
1761 err = U_ZERO_ERROR;
1762 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
1763
1764 /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */
1765 if (0 == (cnv2 = ucnv_safeClone(cnv, 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
1766 {
1767 log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n");
1768 }
1769 if (cnv2) {
1770 ucnv_close(cnv2);
1771 }
1772
1773 err = U_ZERO_ERROR;
1774
1775 /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */
1776 if (0 != ucnv_safeClone(0, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR)
1777 {
1778 log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n");
1779 }
1780
1781 ucnv_close(cnv);
1782 }
1783
1784 maxBufferSize = 0;
1785 maxName = "";
1786
1787 /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/
1788
1789 for(j = 0; j < LENGTHOF(bufferSizes); ++j) {
1790 for (index = 0; index < LENGTHOF(names); index++)
1791 {
1792 err = U_ZERO_ERROR;
1793 cnv = ucnv_open(names[index], &err);
1794 if(U_FAILURE(err)) {
1795 log_data_err("ucnv_open(\"%s\") failed - %s\n", names[index], u_errorName(err));
1796 continue;
1797 }
1798
1799 if(j == 0) {
1800 /* preflight to get maxBufferSize */
1801 actualSizes[index] = 0;
1802 ucnv_safeClone(cnv, NULL, &actualSizes[index], &err);
1803 if(actualSizes[index] > maxBufferSize) {
1804 maxBufferSize = actualSizes[index];
1805 maxName = names[index];
1806 }
1807 }
1808
1809 memset(buffer, 0xaa, sizeof(buffer));
1810
1811 bufferSize = bufferSizes[j];
1812 cnv2 = ucnv_safeClone(cnv, buffer[1], &bufferSize, &err);
1813
1814 /* close the original immediately to make sure that the clone works by itself */
1815 ucnv_close(cnv);
1816
1817 if( actualSizes[index] <= (bufferSizes[j] - (int32_t)sizeof(UAlignedMemory)) &&
1818 err == U_SAFECLONE_ALLOCATED_WARNING
1819 ) {
1820 log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names[index]);
1821 }
1822
1823 /* check if the clone function overwrote any bytes that it is not supposed to touch */
1824 if(bufferSize <= bufferSizes[j]) {
1825 /* used the stack buffer */
1826 if( containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer[0]), 0xaa) ||
1827 containsAnyOtherByte(buffer[1]+bufferSize, (int32_t)(sizeof(buffer)-(sizeof(buffer[0])+bufferSize)), 0xaa)
1828 ) {
1829 log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n",
1830 names[index], bufferSize, bufferSizes[j]);
1831 }
1832 } else {
1833 /* heap-allocated the clone */
1834 if(containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer), 0xaa)) {
1835 log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n",
1836 names[index], bufferSize, bufferSizes[j]);
1837 }
1838 }
1839
1840 pCharBuffer = charBuffer;
1841 pUniBuffer = uniBuffer;
1842
1843 ucnv_fromUnicode(cnv2,
1844 &pCharBuffer,
1845 charBufferLimit,
1846 &pUniBuffer,
1847 uniBufferLimit,
1848 NULL,
1849 TRUE,
1850 &err);
1851 if(U_FAILURE(err)){
1852 log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err));
1853 }
1854 ucnv_toUnicode(cnv2,
1855 &pUCharTarget,
1856 pUCharTargetLimit,
1857 &pCharSource,
1858 pCharSourceLimit,
1859 NULL,
1860 TRUE,
1861 &err
1862 );
1863
1864 if(U_FAILURE(err)){
1865 log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err));
1866 }
1867
1868 pConstCharBuffer = charBuffer;
1869 if (uniBuffer [0] != ucnv_getNextUChar(cnv2, &pConstCharBuffer, pCharBuffer, &err))
1870 {
1871 log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err));
1872 }
1873 ucnv_close(cnv2);
1874 }
1875 }
1876
1877 log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1878 sizeof(UConverter), maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE);
1879 if(maxBufferSize > U_CNV_SAFECLONE_BUFFERSIZE) {
1880 log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1881 maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE);
1882 }
1883}
1884
1885static void TestCCSID() {
1886#if !UCONFIG_NO_LEGACY_CONVERSION
1887 UConverter *cnv;
1888 UErrorCode errorCode;
1889 int32_t ccsids[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 };
1890 int32_t i, ccsid;
1891
1892 for(i=0; i<(int32_t)(sizeof(ccsids)/sizeof(int32_t)); ++i) {
1893 ccsid=ccsids[i];
1894
1895 errorCode=U_ZERO_ERROR;
1896 cnv=ucnv_openCCSID(ccsid, UCNV_IBM, &errorCode);
1897 if(U_FAILURE(errorCode)) {
1898 log_data_err("error: ucnv_openCCSID(%ld) failed (%s)\n", ccsid, u_errorName(errorCode));
1899 continue;
1900 }
1901
1902 if(ccsid!=ucnv_getCCSID(cnv, &errorCode)) {
1903 log_err("error: ucnv_getCCSID(ucnv_openCCSID(%ld))=%ld\n", ccsid, ucnv_getCCSID(cnv, &errorCode));
1904 }
1905
1906 /* skip gb18030(ccsid 1392) */
1907 if(ccsid != 1392 && UCNV_IBM!=ucnv_getPlatform(cnv, &errorCode)) {
1908 log_err("error: ucnv_getPlatform(ucnv_openCCSID(%ld))=%ld!=UCNV_IBM\n", ccsid, ucnv_getPlatform(cnv, &errorCode));
1909 }
1910
1911 ucnv_close(cnv);
1912 }
1913#endif
1914}
1915
1916/* jitterbug 932: ucnv_convert() bugs --------------------------------------- */
1917
1918/* CHUNK_SIZE defined in common\ucnv.c: */
1919#define CHUNK_SIZE 1024
1920
1921static void bug1(void);
1922static void bug2(void);
1923static void bug3(void);
1924
1925static void
1926TestJ932(void)
1927{
1928 bug1(); /* Unicode intermediate buffer straddle bug */
1929 bug2(); /* pre-flighting size incorrect caused by simple overflow */
1930 bug3(); /* pre-flighting size incorrect caused by expansion overflow */
1931}
1932
1933/*
1934 * jitterbug 932: test chunking boundary conditions in
1935
1936 int32_t ucnv_convert(const char *toConverterName,
1937 const char *fromConverterName,
1938 char *target,
1939 int32_t targetSize,
1940 const char *source,
1941 int32_t sourceSize,
1942 UErrorCode * err)
1943
1944 * See discussions on the icu mailing list in
1945 * 2001-April with the subject "converter 'flush' question".
1946 *
1947 * Bug report and test code provided by Edward J. Batutis.
1948 */
1949static void bug1()
1950{
1951#if !UCONFIG_NO_LEGACY_CONVERSION
1952 char char_in[CHUNK_SIZE+32];
1953 char char_out[CHUNK_SIZE*2];
1954
1955 /* GB 18030 equivalent of U+10000 is 90308130 */
1956 static const char test_seq[]={ (char)0x90u, 0x30, (char)0x81u, 0x30 };
1957
1958 UErrorCode err = U_ZERO_ERROR;
1959 int32_t i, test_seq_len = sizeof(test_seq);
1960
1961 /*
1962 * causes straddle bug in Unicode intermediate buffer by sliding the test sequence forward
1963 * until the straddle bug appears. I didn't want to hard-code everything so this test could
1964 * be expanded - however this is the only type of straddle bug I can think of at the moment -
1965 * a high surrogate in the last position of the Unicode intermediate buffer. Apparently no
1966 * other Unicode sequences cause a bug since combining sequences are not supported by the
1967 * converters.
1968 */
1969
1970 for (i = test_seq_len; i >= 0; i--) {
1971 /* put character sequence into input buffer */
1972 memset(char_in, 0x61, sizeof(char_in)); /* GB 18030 'a' */
1973 memcpy(char_in + (CHUNK_SIZE - i), test_seq, test_seq_len);
1974
1975 /* do the conversion */
1976 ucnv_convert("us-ascii", /* out */
1977 "gb18030", /* in */
1978 char_out,
1979 sizeof(char_out),
1980 char_in,
1981 sizeof(char_in),
1982 &err);
1983
1984 /* bug1: */
1985 if (err == U_TRUNCATED_CHAR_FOUND) {
1986 /* this happens when surrogate pair straddles the intermediate buffer in
1987 * T_UConverter_fromCodepageToCodepage */
1988 log_err("error j932 bug 1: expected success, got U_TRUNCATED_CHAR_FOUND\n");
1989 }
1990 }
1991#endif
1992}
1993
1994/* bug2: pre-flighting loop bug: simple overflow causes bug */
1995static void bug2()
1996{
1997 /* US-ASCII "1234567890" */
1998 static const char source[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 };
1999 static const char sourceUTF8[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (char)0xef, (char)0x80, (char)0x80 };
2000 static const char sourceUTF32[]={ 0x00, 0x00, 0x00, 0x30,
2001 0x00, 0x00, 0x00, 0x31,
2002 0x00, 0x00, 0x00, 0x32,
2003 0x00, 0x00, 0x00, 0x33,
2004 0x00, 0x00, 0x00, 0x34,
2005 0x00, 0x00, 0x00, 0x35,
2006 0x00, 0x00, 0x00, 0x36,
2007 0x00, 0x00, 0x00, 0x37,
2008 0x00, 0x00, 0x00, 0x38,
2009 0x00, 0x00, (char)0xf0, 0x00};
2010 static char target[5];
2011
2012 UErrorCode err = U_ZERO_ERROR;
2013 int32_t size;
2014
2015 /* do the conversion */
2016 size = ucnv_convert("iso-8859-1", /* out */
2017 "us-ascii", /* in */
2018 target,
2019 sizeof(target),
2020 source,
2021 sizeof(source),
2022 &err);
2023
2024 if ( size != 10 ) {
2025 /* bug2: size is 5, should be 10 */
2026 log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting size %d instead of 10\n", size);
2027 }
2028
2029 err = U_ZERO_ERROR;
2030 /* do the conversion */
2031 size = ucnv_convert("UTF-32BE", /* out */
2032 "UTF-8", /* in */
2033 target,
2034 sizeof(target),
2035 sourceUTF8,
2036 sizeof(sourceUTF8),
2037 &err);
2038
2039 if ( size != 32 ) {
2040 /* bug2: size is 5, should be 32 */
2041 log_err("error j932 bug 2 UTF-8->UTF-32BE: got preflighting size %d instead of 32\n", size);
2042 }
2043
2044 err = U_ZERO_ERROR;
2045 /* do the conversion */
2046 size = ucnv_convert("UTF-8", /* out */
2047 "UTF-32BE", /* in */
2048 target,
2049 sizeof(target),
2050 sourceUTF32,
2051 sizeof(sourceUTF32),
2052 &err);
2053
2054 if ( size != 12 ) {
2055 /* bug2: size is 5, should be 12 */
2056 log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size);
2057 }
2058}
2059
2060/*
2061 * bug3: when the characters expand going from source to target codepage
2062 * you get bug3 in addition to bug2
2063 */
2064static void bug3()
2065{
2066#if !UCONFIG_NO_LEGACY_CONVERSION
2067 char char_in[CHUNK_SIZE*4];
2068 char target[5];
2069 UErrorCode err = U_ZERO_ERROR;
2070 int32_t size;
2071
2072 /*
2073 * first get the buggy size from bug2 then
2074 * compare it to buggy size with an expansion
2075 */
2076 memset(char_in, 0x61, sizeof(char_in)); /* US-ASCII 'a' */
2077
2078 /* do the conversion */
2079 size = ucnv_convert("lmbcs", /* out */
2080 "us-ascii", /* in */
2081 target,
2082 sizeof(target),
2083 char_in,
2084 sizeof(char_in),
2085 &err);
2086
2087 if ( size != sizeof(char_in) ) {
2088 /*
2089 * bug2: size is 0x2805 (CHUNK_SIZE*2+5 - maybe 5 is the size of the overflow buffer
2090 * in the converter?), should be CHUNK_SIZE*4
2091 *
2092 * Markus 2001-05-18: 5 is the size of our target[] here, ucnv_convert() did not reset targetSize...
2093 */
2094 log_data_err("error j932 bug 2/3a: expected preflighting size 0x%04x, got 0x%04x\n", sizeof(char_in), size);
2095 }
2096
2097 /*
2098 * now do the conversion with expansion
2099 * ascii 0x08 expands to 0x0F 0x28 in lmbcs
2100 */
2101 memset(char_in, 8, sizeof(char_in));
2102 err = U_ZERO_ERROR;
2103
2104 /* do the conversion */
2105 size = ucnv_convert("lmbcs", /* out */
2106 "us-ascii", /* in */
2107 target,
2108 sizeof(target),
2109 char_in,
2110 sizeof(char_in),
2111 &err);
2112
2113 /* expect 2X expansion */
2114 if ( size != sizeof(char_in) * 2 ) {
2115 /*
2116 * bug3:
2117 * bug2 would lead us to expect 0x2805, but it isn't that either, it is 0x3c05:
2118 */
2119 log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in) * 2, size);
2120 }
2121#endif
2122}
2123
2124static void
2125convertExStreaming(UConverter *srcCnv, UConverter *targetCnv,
2126 const char *src, int32_t srcLength,
2127 const char *expectTarget, int32_t expectTargetLength,
2128 int32_t chunkSize,
2129 const char *testName,
2130 UErrorCode expectCode) {
2131 UChar pivotBuffer[CHUNK_SIZE];
2132 UChar *pivotSource, *pivotTarget;
2133 const UChar *pivotLimit;
2134
2135 char targetBuffer[CHUNK_SIZE];
2136 char *target;
2137 const char *srcLimit, *finalSrcLimit, *targetLimit;
2138
2139 int32_t targetLength;
2140
2141 UBool flush;
2142
2143 UErrorCode errorCode;
2144
2145 /* setup */
2146 if(chunkSize>CHUNK_SIZE) {
2147 chunkSize=CHUNK_SIZE;
2148 }
2149
2150 pivotSource=pivotTarget=pivotBuffer;
2151 pivotLimit=pivotBuffer+chunkSize;
2152
2153 finalSrcLimit=src+srcLength;
2154 target=targetBuffer;
2155 targetLimit=targetBuffer+chunkSize;
2156
2157 ucnv_resetToUnicode(srcCnv);
2158 ucnv_resetFromUnicode(targetCnv);
2159
2160 errorCode=U_ZERO_ERROR;
2161 flush=FALSE;
2162
2163 /* convert, streaming-style (both converters and pivot keep state) */
2164 for(;;) {
2165 /* for testing, give ucnv_convertEx() at most <chunkSize> input/pivot/output units at a time */
2166 if(src+chunkSize<=finalSrcLimit) {
2167 srcLimit=src+chunkSize;
2168 } else {
2169 srcLimit=finalSrcLimit;
2170 }
2171 ucnv_convertEx(targetCnv, srcCnv,
2172 &target, targetLimit,
2173 &src, srcLimit,
2174 pivotBuffer, &pivotSource, &pivotTarget, pivotLimit,
2175 FALSE, flush, &errorCode);
2176 targetLength=(int32_t)(target-targetBuffer);
2177 if(target>targetLimit) {
2178 log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n",
2179 testName, chunkSize, target, targetLimit);
2180 break; /* TODO: major problem! */
2181 }
2182 if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
2183 /* continue converting another chunk */
2184 errorCode=U_ZERO_ERROR;
2185 if(targetLength+chunkSize<=sizeof(targetBuffer)) {
2186 targetLimit=target+chunkSize;
2187 } else {
2188 targetLimit=targetBuffer+sizeof(targetBuffer);
2189 }
2190 } else if(U_FAILURE(errorCode)) {
2191 /* failure */
2192 break;
2193 } else if(flush) {
2194 /* all done */
2195 break;
2196 } else if(src==finalSrcLimit && pivotSource==pivotTarget) {
2197 /* all consumed, now flush without input (separate from conversion for testing) */
2198 flush=TRUE;
2199 }
2200 }
2201
2202 if(!(errorCode==expectCode || (expectCode==U_ZERO_ERROR && errorCode==U_STRING_NOT_TERMINATED_WARNING))) {
2203 log_err("ucnv_convertEx(%s) chunk[%d] results in %s instead of %s\n",
2204 testName, chunkSize, u_errorName(errorCode), u_errorName(expectCode));
2205 } else if(targetLength!=expectTargetLength) {
2206 log_err("ucnv_convertEx(%s) chunk[%d] writes %d bytes instead of %d\n",
2207 testName, chunkSize, targetLength, expectTargetLength);
2208 } else if(memcmp(targetBuffer, expectTarget, targetLength)!=0) {
2209 log_err("ucnv_convertEx(%s) chunk[%d] writes different bytes than expected\n",
2210 testName, chunkSize);
2211 }
2212}
2213
2214static void
2215convertExMultiStreaming(UConverter *srcCnv, UConverter *targetCnv,
2216 const char *src, int32_t srcLength,
2217 const char *expectTarget, int32_t expectTargetLength,
2218 const char *testName,
2219 UErrorCode expectCode) {
2220 convertExStreaming(srcCnv, targetCnv,
2221 src, srcLength,
2222 expectTarget, expectTargetLength,
2223 1, testName, expectCode);
2224 convertExStreaming(srcCnv, targetCnv,
2225 src, srcLength,
2226 expectTarget, expectTargetLength,
2227 3, testName, expectCode);
2228 convertExStreaming(srcCnv, targetCnv,
2229 src, srcLength,
2230 expectTarget, expectTargetLength,
2231 7, testName, expectCode);
2232}
2233
2234static void TestConvertEx() {
2235#if !UCONFIG_NO_LEGACY_CONVERSION
2236 static const uint8_t
2237 utf8[]={
2238 /* 4e00 30a1 ff61 0410 */
2239 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2240 },
2241 shiftJIS[]={
2242 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2243 },
2244 errorTarget[]={
2245 /*
2246 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2247 * SUB, SUB, 0x40, SUB, SUB, 0x40
2248 */
2249 0xfc, 0xfc, 0xfc, 0xfc, 0x40, 0xfc, 0xfc, 0xfc, 0xfc, 0x40
2250 };
2251
2252 char srcBuffer[100], targetBuffer[100];
2253
2254 const char *src;
2255 char *target;
2256
2257 UChar pivotBuffer[100];
2258 UChar *pivotSource, *pivotTarget;
2259
2260 UConverter *cnv1, *cnv2;
2261 UErrorCode errorCode;
2262
2263 errorCode=U_ZERO_ERROR;
2264 cnv1=ucnv_open("UTF-8", &errorCode);
2265 if(U_FAILURE(errorCode)) {
2266 log_err("unable to open a UTF-8 converter - %s\n", u_errorName(errorCode));
2267 return;
2268 }
2269
2270 cnv2=ucnv_open("Shift-JIS", &errorCode);
2271 if(U_FAILURE(errorCode)) {
2272 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode));
2273 ucnv_close(cnv1);
2274 return;
2275 }
2276
2277 /* test ucnv_convertEx() with streaming conversion style */
2278 convertExMultiStreaming(cnv1, cnv2,
2279 (const char *)utf8, sizeof(utf8), (const char *)shiftJIS, sizeof(shiftJIS),
2280 "UTF-8 -> Shift-JIS", U_ZERO_ERROR);
2281
2282 convertExMultiStreaming(cnv2, cnv1,
2283 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8),
2284 "Shift-JIS -> UTF-8", U_ZERO_ERROR);
2285
2286 /* U_ZERO_ERROR because by default the SUB callbacks are set */
2287 convertExMultiStreaming(cnv1, cnv2,
2288 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)errorTarget, sizeof(errorTarget),
2289 "shiftJIS[] UTF-8 -> Shift-JIS", U_ZERO_ERROR);
2290
2291 /* test some simple conversions */
2292
2293 /* NUL-terminated source and target */
2294 errorCode=U_STRING_NOT_TERMINATED_WARNING;
2295 memcpy(srcBuffer, utf8, sizeof(utf8));
2296 srcBuffer[sizeof(utf8)]=0;
2297 src=srcBuffer;
2298 target=targetBuffer;
2299 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2300 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode);
2301 if( errorCode!=U_ZERO_ERROR ||
2302 target-targetBuffer!=sizeof(shiftJIS) ||
2303 *target!=0 ||
2304 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0
2305 ) {
2306 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s - writes %d bytes, expect %d\n",
2307 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS));
2308 }
2309
2310 /* NUL-terminated source and U_STRING_NOT_TERMINATED_WARNING */
2311 errorCode=U_AMBIGUOUS_ALIAS_WARNING;
2312 memset(targetBuffer, 0xff, sizeof(targetBuffer));
2313 src=srcBuffer;
2314 target=targetBuffer;
2315 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(shiftJIS), &src, NULL,
2316 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode);
2317 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
2318 target-targetBuffer!=sizeof(shiftJIS) ||
2319 *target!=(char)0xff ||
2320 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0
2321 ) {
2322 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s, expect U_STRING_NOT_TERMINATED_WARNING - writes %d bytes, expect %d\n",
2323 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS));
2324 }
2325
2326 /* bad arguments */
2327 errorCode=U_MESSAGE_PARSE_ERROR;
2328 src=srcBuffer;
2329 target=targetBuffer;
2330 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2331 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode);
2332 if(errorCode!=U_MESSAGE_PARSE_ERROR) {
2333 log_err("ucnv_convertEx(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode));
2334 }
2335
2336 /* pivotLimit==pivotStart */
2337 errorCode=U_ZERO_ERROR;
2338 pivotSource=pivotTarget=pivotBuffer;
2339 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2340 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer, TRUE, TRUE, &errorCode);
2341 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2342 log_err("ucnv_convertEx(pivotLimit==pivotStart) sets %s\n", u_errorName(errorCode));
2343 }
2344
2345 /* *pivotSource==NULL */
2346 errorCode=U_ZERO_ERROR;
2347 pivotSource=NULL;
2348 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2349 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode);
2350 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2351 log_err("ucnv_convertEx(*pivotSource==NULL) sets %s\n", u_errorName(errorCode));
2352 }
2353
2354 /* *source==NULL */
2355 errorCode=U_ZERO_ERROR;
2356 src=NULL;
2357 pivotSource=pivotBuffer;
2358 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2359 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode);
2360 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2361 log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode));
2362 }
2363
2364 /* streaming conversion without a pivot buffer */
2365 errorCode=U_ZERO_ERROR;
2366 src=srcBuffer;
2367 pivotSource=pivotBuffer;
2368 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2369 NULL, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, FALSE, &errorCode);
2370 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2371 log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode));
2372 }
2373
2374 ucnv_close(cnv1);
2375 ucnv_close(cnv2);
2376#endif
2377}
2378
2379/* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */
2380static const char *const badUTF8[]={
2381 /* trail byte */
2382 "\x80",
2383
2384 /* truncated multi-byte sequences */
2385 "\xd0",
2386 "\xe0",
2387 "\xe1",
2388 "\xed",
2389 "\xee",
2390 "\xf0",
2391 "\xf1",
2392 "\xf4",
2393 "\xf8",
2394 "\xfc",
2395
2396 "\xe0\x80",
2397 "\xe0\xa0",
2398 "\xe1\x80",
2399 "\xed\x80",
2400 "\xed\xa0",
2401 "\xee\x80",
2402 "\xf0\x80",
2403 "\xf0\x90",
2404 "\xf1\x80",
2405 "\xf4\x80",
2406 "\xf4\x90",
2407 "\xf8\x80",
2408 "\xfc\x80",
2409
2410 "\xf0\x80\x80",
2411 "\xf0\x90\x80",
2412 "\xf1\x80\x80",
2413 "\xf4\x80\x80",
2414 "\xf4\x90\x80",
2415 "\xf8\x80\x80",
2416 "\xfc\x80\x80",
2417
2418 "\xf8\x80\x80\x80",
2419 "\xfc\x80\x80\x80",
2420
2421 "\xfc\x80\x80\x80\x80",
2422
2423 /* complete sequences but non-shortest forms or out of range etc. */
2424 "\xc0\x80",
2425 "\xe0\x80\x80",
2426 "\xed\xa0\x80",
2427 "\xf0\x80\x80\x80",
2428 "\xf4\x90\x80\x80",
2429 "\xf8\x80\x80\x80\x80",
2430 "\xfc\x80\x80\x80\x80\x80",
2431 "\xfe",
2432 "\xff"
2433};
2434
2435#define ARG_CHAR_ARR_SIZE 8
2436
2437/* get some character that can be converted and convert it */
2438static UBool getTestChar(UConverter *cnv, const char *converterName,
2439 char charUTF8[4], int32_t *pCharUTF8Length,
2440 char char0[ARG_CHAR_ARR_SIZE], int32_t *pChar0Length,
2441 char char1[ARG_CHAR_ARR_SIZE], int32_t *pChar1Length) {
2442 UChar utf16[U16_MAX_LENGTH];
2443 int32_t utf16Length;
2444
2445 const UChar *utf16Source;
2446 char *target;
2447
2448 USet *set;
2449 UChar32 c;
2450 UErrorCode errorCode;
2451
2452 errorCode=U_ZERO_ERROR;
2453 set=uset_open(1, 0);
2454 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
2455 c=uset_charAt(set, uset_size(set)/2);
2456 uset_close(set);
2457
2458 utf16Length=0;
2459 U16_APPEND_UNSAFE(utf16, utf16Length, c);
2460 *pCharUTF8Length=0;
2461 U8_APPEND_UNSAFE(charUTF8, *pCharUTF8Length, c);
2462
2463 utf16Source=utf16;
2464 target=char0;
2465 ucnv_fromUnicode(cnv,
2466 &target, char0+ARG_CHAR_ARR_SIZE,
2467 &utf16Source, utf16+utf16Length,
2468 NULL, FALSE, &errorCode);
2469 *pChar0Length=(int32_t)(target-char0);
2470
2471 utf16Source=utf16;
2472 target=char1;
2473 ucnv_fromUnicode(cnv,
2474 &target, char1+ARG_CHAR_ARR_SIZE,
2475 &utf16Source, utf16+utf16Length,
2476 NULL, FALSE, &errorCode);
2477 *pChar1Length=(int32_t)(target-char1);
2478
2479 if(U_FAILURE(errorCode)) {
2480 log_err("unable to get test character for %s - %s\n", converterName, u_errorName(errorCode));
2481 return FALSE;
2482 }
2483 return TRUE;
2484}
2485
2486static void testFromTruncatedUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName,
2487 char charUTF8[4], int32_t charUTF8Length,
2488 char char0[8], int32_t char0Length,
2489 char char1[8], int32_t char1Length) {
2490 char utf8[16];
2491 int32_t utf8Length;
2492
2493 char output[16];
2494 int32_t outputLength;
2495
2496 char invalidChars[8];
2497 int8_t invalidLength;
2498
2499 const char *source;
2500 char *target;
2501
2502 UChar pivotBuffer[8];
2503 UChar *pivotSource, *pivotTarget;
2504
2505 UErrorCode errorCode;
2506 int32_t i;
2507
2508 /* test truncated sequences */
2509 errorCode=U_ZERO_ERROR;
2510 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2511
2512 memcpy(utf8, charUTF8, charUTF8Length);
2513
2514 for(i=0; i<LENGTHOF(badUTF8); ++i) {
2515 /* truncated sequence? */
2516 int32_t length=strlen(badUTF8[i]);
2517 if(length>=(1+U8_COUNT_TRAIL_BYTES(badUTF8[i][0]))) {
2518 continue;
2519 }
2520
2521 /* assemble a string with the test character and the truncated sequence */
2522 memcpy(utf8+charUTF8Length, badUTF8[i], length);
2523 utf8Length=charUTF8Length+length;
2524
2525 /* convert and check the invalidChars */
2526 source=utf8;
2527 target=output;
2528 pivotSource=pivotTarget=pivotBuffer;
2529 errorCode=U_ZERO_ERROR;
2530 ucnv_convertEx(cnv, utf8Cnv,
2531 &target, output+sizeof(output),
2532 &source, utf8+utf8Length,
2533 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+LENGTHOF(pivotBuffer),
2534 TRUE, TRUE, /* reset & flush */
2535 &errorCode);
2536 outputLength=(int32_t)(target-output);
2537 if(errorCode!=U_TRUNCATED_CHAR_FOUND || pivotSource!=pivotBuffer) {
2538 log_err("unexpected error %s from %s badUTF8[%ld]\n", u_errorName(errorCode), converterName, (long)i);
2539 continue;
2540 }
2541
2542 errorCode=U_ZERO_ERROR;
2543 invalidLength=(int8_t)sizeof(invalidChars);
2544 ucnv_getInvalidChars(utf8Cnv, invalidChars, &invalidLength, &errorCode);
2545 if(invalidLength!=length || 0!=memcmp(invalidChars, badUTF8[i], length)) {
2546 log_err("wrong invalidChars from %s badUTF8[%ld]\n", converterName, (long)i);
2547 }
2548 }
2549}
2550
2551static void testFromBadUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName,
2552 char charUTF8[4], int32_t charUTF8Length,
2553 char char0[8], int32_t char0Length,
2554 char char1[8], int32_t char1Length) {
2555 char utf8[600], expect[600];
2556 int32_t utf8Length, expectLength;
2557
2558 char testName[32];
2559
2560 UErrorCode errorCode;
2561 int32_t i;
2562
2563 errorCode=U_ZERO_ERROR;
2564 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, NULL, &errorCode);
2565
2566 /*
2567 * assemble an input string with the test character between each
2568 * bad sequence,
2569 * and an expected string with repeated test character output
2570 */
2571 memcpy(utf8, charUTF8, charUTF8Length);
2572 utf8Length=charUTF8Length;
2573
2574 memcpy(expect, char0, char0Length);
2575 expectLength=char0Length;
2576
2577 for(i=0; i<LENGTHOF(badUTF8); ++i) {
2578 int32_t length=strlen(badUTF8[i]);
2579 memcpy(utf8+utf8Length, badUTF8[i], length);
2580 utf8Length+=length;
2581
2582 memcpy(utf8+utf8Length, charUTF8, charUTF8Length);
2583 utf8Length+=charUTF8Length;
2584
2585 memcpy(expect+expectLength, char1, char1Length);
2586 expectLength+=char1Length;
2587 }
2588
2589 /* expect that each bad UTF-8 sequence is detected and skipped */
2590 strcpy(testName, "from bad UTF-8 to ");
2591 strcat(testName, converterName);
2592
2593 convertExMultiStreaming(utf8Cnv, cnv,
2594 utf8, utf8Length,
2595 expect, expectLength,
2596 testName,
2597 U_ZERO_ERROR);
2598}
2599
2600/* Test illegal UTF-8 input. */
2601static void TestConvertExFromUTF8() {
2602 static const char *const converterNames[]={
2603#if !UCONFIG_NO_LEGACY_CONVERSION
2604 "windows-1252",
2605 "shift-jis",
2606#endif
2607 "us-ascii",
2608 "iso-8859-1",
2609 "utf-8"
2610 };
2611
2612 UConverter *utf8Cnv, *cnv;
2613 UErrorCode errorCode;
2614 int32_t i;
2615
2616 /* fromUnicode versions of some character, from initial state and later */
2617 char charUTF8[4], char0[8], char1[8];
2618 int32_t charUTF8Length, char0Length, char1Length;
2619
2620 errorCode=U_ZERO_ERROR;
2621 utf8Cnv=ucnv_open("UTF-8", &errorCode);
2622 if(U_FAILURE(errorCode)) {
2623 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode));
2624 return;
2625 }
2626
2627 for(i=0; i<LENGTHOF(converterNames); ++i) {
2628 errorCode=U_ZERO_ERROR;
2629 cnv=ucnv_open(converterNames[i], &errorCode);
2630 if(U_FAILURE(errorCode)) {
2631 log_data_err("unable to open %s converter - %s\n", converterNames[i], u_errorName(errorCode));
2632 continue;
2633 }
2634 if(!getTestChar(cnv, converterNames[i], charUTF8, &charUTF8Length, char0, &char0Length, char1, &char1Length)) {
2635 continue;
2636 }
2637 testFromTruncatedUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length);
2638 testFromBadUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length);
2639 ucnv_close(cnv);
2640 }
2641 ucnv_close(utf8Cnv);
2642}
2643
2644static void TestConvertExFromUTF8_C5F0() {
2645 static const char *const converterNames[]={
2646#if !UCONFIG_NO_LEGACY_CONVERSION
2647 "windows-1251",
2648 "shift-jis",
2649#endif
2650 "us-ascii",
2651 "iso-8859-1",
2652 "utf-8"
2653 };
2654
2655 UConverter *utf8Cnv, *cnv;
2656 UErrorCode errorCode;
2657 int32_t i;
2658
2659 static const char bad_utf8[2]={ (char)0xC5, (char)0xF0 };
2660 /* Expect "&#65533;&#65533;" (2x U+FFFD as decimal NCRs) */
2661 static const char twoNCRs[16]={
2662 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B,
2663 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B
2664 };
2665 static const char twoFFFD[6]={
2666 (char)0xef, (char)0xbf, (char)0xbd,
2667 (char)0xef, (char)0xbf, (char)0xbd
2668 };
2669 const char *expected;
2670 int32_t expectedLength;
2671 char dest[20]; /* longer than longest expectedLength */
2672
2673 const char *src;
2674 char *target;
2675
2676 UChar pivotBuffer[128];
2677 UChar *pivotSource, *pivotTarget;
2678
2679 errorCode=U_ZERO_ERROR;
2680 utf8Cnv=ucnv_open("UTF-8", &errorCode);
2681 if(U_FAILURE(errorCode)) {
2682 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode));
2683 return;
2684 }
2685
2686 for(i=0; i<LENGTHOF(converterNames); ++i) {
2687 errorCode=U_ZERO_ERROR;
2688 cnv=ucnv_open(converterNames[i], &errorCode);
2689 ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
2690 NULL, NULL, &errorCode);
2691 if(U_FAILURE(errorCode)) {
2692 log_data_err("unable to open %s converter - %s\n",
2693 converterNames[i], u_errorName(errorCode));
2694 continue;
2695 }
2696 src=bad_utf8;
2697 target=dest;
2698 uprv_memset(dest, 9, sizeof(dest));
2699 if(i==LENGTHOF(converterNames)-1) {
2700 /* conversion to UTF-8 yields two U+FFFD directly */
2701 expected=twoFFFD;
2702 expectedLength=6;
2703 } else {
2704 /* conversion to a non-Unicode charset yields two NCRs */
2705 expected=twoNCRs;
2706 expectedLength=16;
2707 }
2708 pivotBuffer[0]=0;
2709 pivotBuffer[1]=1;
2710 pivotBuffer[2]=2;
2711 pivotSource=pivotTarget=pivotBuffer;
2712 ucnv_convertEx(
2713 cnv, utf8Cnv,
2714 &target, dest+expectedLength,
2715 &src, bad_utf8+sizeof(bad_utf8),
2716 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+LENGTHOF(pivotBuffer),
2717 TRUE, TRUE, &errorCode);
2718 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || src!=bad_utf8+2 ||
2719 target!=dest+expectedLength || 0!=uprv_memcmp(dest, expected, expectedLength) ||
2720 dest[expectedLength]!=9
2721 ) {
2722 log_err("ucnv_convertEx(UTF-8 C5 F0 -> %s/decimal NCRs) failed\n", converterNames[i]);
2723 }
2724 ucnv_close(cnv);
2725 }
2726 ucnv_close(utf8Cnv);
2727}
2728
2729static void
2730TestConvertAlgorithmic() {
2731#if !UCONFIG_NO_LEGACY_CONVERSION
2732 static const uint8_t
2733 utf8[]={
2734 /* 4e00 30a1 ff61 0410 */
2735 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2736 },
2737 shiftJIS[]={
2738 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2739 },
2740 /*errorTarget[]={*/
2741 /*
2742 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2743 * SUB, SUB, 0x40, SUB, SUB, 0x40
2744 */
2745 /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/
2746 /*},*/
2747 utf16[]={
2748 0xfe, 0xff /* BOM only, no text */
2749 },
2750 utf32[]={
2751 0xff, 0xfe, 0, 0 /* BOM only, no text */
2752 };
2753
2754 char target[100], utf8NUL[100], shiftJISNUL[100];
2755
2756 UConverter *cnv;
2757 UErrorCode errorCode;
2758
2759 int32_t length;
2760
2761 errorCode=U_ZERO_ERROR;
2762 cnv=ucnv_open("Shift-JIS", &errorCode);
2763 if(U_FAILURE(errorCode)) {
2764 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode));
2765 ucnv_close(cnv);
2766 return;
2767 }
2768
2769 memcpy(utf8NUL, utf8, sizeof(utf8));
2770 utf8NUL[sizeof(utf8)]=0;
2771 memcpy(shiftJISNUL, shiftJIS, sizeof(shiftJIS));
2772 shiftJISNUL[sizeof(shiftJIS)]=0;
2773
2774 /*
2775 * The to/from algorithmic convenience functions share a common implementation,
2776 * so we need not test all permutations of them.
2777 */
2778
2779 /* length in, not terminated out */
2780 errorCode=U_ZERO_ERROR;
2781 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF8, target, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), &errorCode);
2782 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
2783 length!=sizeof(shiftJIS) ||
2784 memcmp(target, shiftJIS, length)!=0
2785 ) {
2786 log_err("ucnv_fromAlgorithmic(UTF-8 -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect %d\n",
2787 u_errorName(errorCode), length, sizeof(shiftJIS));
2788 }
2789
2790 /* terminated in and out */
2791 memset(target, 0x55, sizeof(target));
2792 errorCode=U_STRING_NOT_TERMINATED_WARNING;
2793 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, -1, &errorCode);
2794 if( errorCode!=U_ZERO_ERROR ||
2795 length!=sizeof(utf8) ||
2796 memcmp(target, utf8, length)!=0
2797 ) {
2798 log_err("ucnv_toAlgorithmic(Shift-JIS -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect %d\n",
2799 u_errorName(errorCode), length, sizeof(shiftJIS));
2800 }
2801
2802 /* empty string, some target buffer */
2803 errorCode=U_STRING_NOT_TERMINATED_WARNING;
2804 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, 0, &errorCode);
2805 if( errorCode!=U_ZERO_ERROR ||
2806 length!=0
2807 ) {
2808 log_err("ucnv_toAlgorithmic(empty string -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect 0\n",
2809 u_errorName(errorCode), length);
2810 }
2811
2812 /* pseudo-empty string, no target buffer */
2813 errorCode=U_ZERO_ERROR;
2814 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode);
2815 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
2816 length!=0
2817 ) {
2818 log_err("ucnv_fromAlgorithmic(UTF-16 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
2819 u_errorName(errorCode), length);
2820 }
2821
2822 errorCode=U_ZERO_ERROR;
2823 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF32, target, 0, (const char *)utf32, 4, &errorCode);
2824 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
2825 length!=0
2826 ) {
2827 log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
2828 u_errorName(errorCode), length);
2829 }
2830
2831 /* bad arguments */
2832 errorCode=U_MESSAGE_PARSE_ERROR;
2833 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode);
2834 if(errorCode!=U_MESSAGE_PARSE_ERROR) {
2835 log_err("ucnv_fromAlgorithmic(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode));
2836 }
2837
2838 /* source==NULL */
2839 errorCode=U_ZERO_ERROR;
2840 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, NULL, 2, &errorCode);
2841 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2842 log_err("ucnv_fromAlgorithmic(source==NULL) sets %s\n", u_errorName(errorCode));
2843 }
2844
2845 /* illegal alg. type */
2846 errorCode=U_ZERO_ERROR;
2847 length=ucnv_fromAlgorithmic(cnv, (UConverterType)99, target, 0, (const char *)utf16, 2, &errorCode);
2848 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2849 log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode));
2850 }
2851ucnv_close(cnv);
2852#endif
2853}
2854
2855static void TestLMBCSMaxChar(void) {
2856 static const struct {
2857 int8_t maxSize;
2858 const char *name;
2859 } converter[] = {
2860 /* some non-LMBCS converters - perfect test setup here */
2861 { 1, "US-ASCII"},
2862 { 1, "ISO-8859-1"},
2863
2864 { 2, "UTF-16"},
2865 { 2, "UTF-16BE"},
2866 { 3, "UTF-8"},
2867 { 3, "CESU-8"},
2868 { 3, "SCSU"},
2869 { 4, "UTF-32"},
2870 { 4, "UTF-7"},
2871 { 4, "IMAP-mailbox-name"},
2872 { 4, "BOCU-1"},
2873
2874 { 1, "windows-1256"},
2875 { 2, "Shift-JIS"},
2876 { 2, "ibm-16684"},
2877 { 3, "ibm-930"},
2878 { 3, "ibm-1390"},
2879 { 4, "*test3"},
2880 { 16,"*test4"},
2881
2882 { 4, "ISCII"},
2883 { 4, "HZ"},
2884
2885 { 3, "ISO-2022"},
2886 { 3, "ISO-2022-KR"},
2887 { 6, "ISO-2022-JP"},
2888 { 8, "ISO-2022-CN"},
2889
2890 /* LMBCS */
2891 { 3, "LMBCS-1"},
2892 { 3, "LMBCS-2"},
2893 { 3, "LMBCS-3"},
2894 { 3, "LMBCS-4"},
2895 { 3, "LMBCS-5"},
2896 { 3, "LMBCS-6"},
2897 { 3, "LMBCS-8"},
2898 { 3, "LMBCS-11"},
2899 { 3, "LMBCS-16"},
2900 { 3, "LMBCS-17"},
2901 { 3, "LMBCS-18"},
2902 { 3, "LMBCS-19"}
2903 };
2904 int32_t idx;
2905
2906 for (idx = 0; idx < LENGTHOF(converter); idx++) {
2907 UErrorCode status = U_ZERO_ERROR;
2908 UConverter *cnv = cnv_open(converter[idx].name, &status);
2909 if (U_FAILURE(status)) {
2910 continue;
2911 }
2912 if (converter[idx].maxSize != ucnv_getMaxCharSize(cnv)) {
2913 log_err("error: ucnv_getMaxCharSize(%s) expected %d, got %d\n",
2914 converter[idx].name, converter[idx].maxSize, ucnv_getMaxCharSize(cnv));
2915 }
2916 ucnv_close(cnv);
2917 }
2918
2919 /* mostly test that the macro compiles */
2920 if(UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10) {
2921 log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n");
2922 }
2923}
2924
2925
2926static void TestJ1968(void) {
2927 UErrorCode err = U_ZERO_ERROR;
2928 UConverter *cnv;
2929 char myConvName[] = "My really really really really really really really really really really really"
2930 " really really really really really really really really really really really"
2931 " really really really really really really really really long converter name";
2932 UChar myConvNameU[sizeof(myConvName)];
2933
2934 u_charsToUChars(myConvName, myConvNameU, sizeof(myConvName));
2935
2936 err = U_ZERO_ERROR;
2937 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH+1] = 0;
2938 cnv = ucnv_openU(myConvNameU, &err);
2939 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
2940 log_err("1U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
2941 }
2942
2943 err = U_ZERO_ERROR;
2944 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0;
2945 cnv = ucnv_openU(myConvNameU, &err);
2946 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
2947 log_err("2U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
2948 }
2949
2950 err = U_ZERO_ERROR;
2951 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0;
2952 cnv = ucnv_openU(myConvNameU, &err);
2953 if (cnv || err != U_FILE_ACCESS_ERROR) {
2954 log_err("3U) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
2955 }
2956
2957
2958
2959
2960 err = U_ZERO_ERROR;
2961 cnv = ucnv_open(myConvName, &err);
2962 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
2963 log_err("1) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
2964 }
2965
2966 err = U_ZERO_ERROR;
2967 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = ',';
2968 cnv = ucnv_open(myConvName, &err);
2969 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
2970 log_err("2) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
2971 }
2972
2973 err = U_ZERO_ERROR;
2974 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ',';
2975 cnv = ucnv_open(myConvName, &err);
2976 if (cnv || err != U_FILE_ACCESS_ERROR) {
2977 log_err("3) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
2978 }
2979
2980 err = U_ZERO_ERROR;
2981 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ',';
2982 strncpy(myConvName + UCNV_MAX_CONVERTER_NAME_LENGTH, "locale=", 7);
2983 cnv = ucnv_open(myConvName, &err);
2984 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
2985 log_err("4) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
2986 }
2987
2988 /* The comma isn't really a part of the converter name. */
2989 err = U_ZERO_ERROR;
2990 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0;
2991 cnv = ucnv_open(myConvName, &err);
2992 if (cnv || err != U_FILE_ACCESS_ERROR) {
2993 log_err("5) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
2994 }
2995
2996 err = U_ZERO_ERROR;
2997 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ' ';
2998 cnv = ucnv_open(myConvName, &err);
2999 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
3000 log_err("6) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
3001 }
3002
3003 err = U_ZERO_ERROR;
3004 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0;
3005 cnv = ucnv_open(myConvName, &err);
3006 if (cnv || err != U_FILE_ACCESS_ERROR) {
3007 log_err("7) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
3008 }
3009
3010}
3011
3012#if !UCONFIG_NO_LEGACY_CONVERSION
3013static void
3014testSwap(const char *name, UBool swap) {
3015 /*
3016 * Test Unicode text.
3017 * Contains characters that are the highest for some of the
3018 * tested conversions, to make sure that the ucnvmbcs.c code that modifies the
3019 * tables copies the entire tables.
3020 */
3021 static const UChar text[]={
3022 0x61, 0xd, 0x62, 0xa, 0x4e00, 0x3000, 0xfffd, 0xa, 0x20, 0x85, 0xff5e, 0x7a
3023 };
3024
3025 UChar uNormal[32], uSwapped[32];
3026 char normal[32], swapped[32];
3027 const UChar *pcu;
3028 UChar *pu;
3029 char *pc;
3030 int32_t i, normalLength, swappedLength;
3031 UChar u;
3032 char c;
3033
3034 const char *swappedName;
3035 UConverter *cnv, *swapCnv;
3036 UErrorCode errorCode;
3037
3038 /* if the swap flag is FALSE, then the test encoding is not EBCDIC and must not swap */
3039
3040 /* open both the normal and the LF/NL-swapping converters */
3041 strcpy(swapped, name);
3042 strcat(swapped, UCNV_SWAP_LFNL_OPTION_STRING);
3043
3044 errorCode=U_ZERO_ERROR;
3045 swapCnv=ucnv_open(swapped, &errorCode);
3046 cnv=ucnv_open(name, &errorCode);
3047 if(U_FAILURE(errorCode)) {
3048 log_data_err("TestEBCDICSwapLFNL error: unable to open %s or %s (%s)\n", name, swapped, u_errorName(errorCode));
3049 goto cleanup;
3050 }
3051
3052 /* the name must contain the swap option if and only if we expect the converter to swap */
3053 swappedName=ucnv_getName(swapCnv, &errorCode);
3054 if(U_FAILURE(errorCode)) {
3055 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl) failed (%s)\n", name, u_errorName(errorCode));
3056 goto cleanup;
3057 }
3058
3059 pc=strstr(swappedName, UCNV_SWAP_LFNL_OPTION_STRING);
3060 if(swap != (pc!=NULL)) {
3061 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl)=%s should (%d) contain 'swaplfnl'\n", name, swappedName, swap);
3062 goto cleanup;
3063 }
3064
3065 /* convert to EBCDIC */
3066 pcu=text;
3067 pc=normal;
3068 ucnv_fromUnicode(cnv, &pc, normal+LENGTHOF(normal), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode);
3069 normalLength=(int32_t)(pc-normal);
3070
3071 pcu=text;
3072 pc=swapped;
3073 ucnv_fromUnicode(swapCnv, &pc, swapped+LENGTHOF(swapped), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode);
3074 swappedLength=(int32_t)(pc-swapped);
3075
3076 if(U_FAILURE(errorCode)) {
3077 log_err("TestEBCDICSwapLFNL error converting to %s - (%s)\n", name, u_errorName(errorCode));
3078 goto cleanup;
3079 }
3080
3081 /* compare EBCDIC output */
3082 if(normalLength!=swappedLength) {
3083 log_err("TestEBCDICSwapLFNL error converting to %s - output lengths %d vs. %d\n", name, normalLength, swappedLength);
3084 goto cleanup;
3085 }
3086 for(i=0; i<normalLength; ++i) {
3087 /* swap EBCDIC LF/NL for comparison */
3088 c=normal[i];
3089 if(swap) {
3090 if(c==0x15) {
3091 c=0x25;
3092 } else if(c==0x25) {
3093 c=0x15;
3094 }
3095 }
3096
3097 if(c!=swapped[i]) {
3098 log_err("TestEBCDICSwapLFNL error converting to %s - did not swap properly, output[%d]=0x%02x\n", name, i, (uint8_t)swapped[i]);
3099 goto cleanup;
3100 }
3101 }
3102
3103 /* convert back to Unicode (may not roundtrip) */
3104 pc=normal;
3105 pu=uNormal;
3106 ucnv_toUnicode(cnv, &pu, uNormal+LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, TRUE, &errorCode);
3107 normalLength=(int32_t)(pu-uNormal);
3108
3109 pc=normal;
3110 pu=uSwapped;
3111 ucnv_toUnicode(swapCnv, &pu, uSwapped+LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, TRUE, &errorCode);
3112 swappedLength=(int32_t)(pu-uSwapped);
3113
3114 if(U_FAILURE(errorCode)) {
3115 log_err("TestEBCDICSwapLFNL error converting from %s - (%s)\n", name, u_errorName(errorCode));
3116 goto cleanup;
3117 }
3118
3119 /* compare EBCDIC output */
3120 if(normalLength!=swappedLength) {
3121 log_err("TestEBCDICSwapLFNL error converting from %s - output lengths %d vs. %d\n", name, normalLength, swappedLength);
3122 goto cleanup;
3123 }
3124 for(i=0; i<normalLength; ++i) {
3125 /* swap EBCDIC LF/NL for comparison */
3126 u=uNormal[i];
3127 if(swap) {
3128 if(u==0xa) {
3129 u=0x85;
3130 } else if(u==0x85) {
3131 u=0xa;
3132 }
3133 }
3134
3135 if(u!=uSwapped[i]) {
3136 log_err("TestEBCDICSwapLFNL error converting from %s - did not swap properly, output[%d]=U+%04x\n", name, i, uSwapped[i]);
3137 goto cleanup;
3138 }
3139 }
3140
3141 /* clean up */
3142cleanup:
3143 ucnv_close(cnv);
3144 ucnv_close(swapCnv);
3145}
3146
3147static void
3148TestEBCDICSwapLFNL() {
3149 static const struct {
3150 const char *name;
3151 UBool swap;
3152 } tests[]={
3153 { "ibm-37", TRUE },
3154 { "ibm-1047", TRUE },
3155 { "ibm-1140", TRUE },
3156 { "ibm-930", TRUE },
3157 { "iso-8859-3", FALSE }
3158 };
3159
3160 int i;
3161
3162 for(i=0; i<LENGTHOF(tests); ++i) {
3163 testSwap(tests[i].name, tests[i].swap);
3164 }
3165}
3166#else
3167static void
3168TestEBCDICSwapLFNL() {
3169 /* test nothing... */
3170}
3171#endif
3172
3173static const UVersionInfo ICU_34 = {3,4,0,0};
3174
3175static void TestFromUCountPending(){
3176#if !UCONFIG_NO_LEGACY_CONVERSION
3177 UErrorCode status = U_ZERO_ERROR;
3178/* const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; */
3179 static const struct {
3180 UChar input[6];
3181 int32_t len;
3182 int32_t exp;
3183 }fromUnicodeTests[] = {
3184 /*m:n conversion*/
3185 {{0xdbc4},1,1},
3186 {{ 0xdbc4, 0xde34, 0xd84d},3,1},
3187 {{ 0xdbc4, 0xde34, 0xd900},3,3},
3188 };
3189 int i;
3190 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status);
3191 if(U_FAILURE(status)){
3192 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
3193 return;
3194 }
3195 for(i=0; i<LENGTHOF(fromUnicodeTests); ++i) {
3196 char tgt[10];
3197 char* target = tgt;
3198 char* targetLimit = target + 10;
3199 const UChar* source = fromUnicodeTests[i].input;
3200 const UChar* sourceLimit = source + fromUnicodeTests[i].len;
3201 int32_t len = 0;
3202 ucnv_reset(cnv);
3203 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3204 len = ucnv_fromUCountPending(cnv, &status);
3205 if(U_FAILURE(status)){
3206 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3207 status = U_ZERO_ERROR;
3208 continue;
3209 }
3210 if(len != fromUnicodeTests[i].exp){
3211 log_err("Did not get the expeced output for ucnv_fromUInputConsumed.\n");
3212 }
3213 }
3214 status = U_ZERO_ERROR;
3215 {
3216 /*
3217 * The converter has to read the tail before it knows that
3218 * only head alone matches.
3219 * At the end, the output for head will overflow the target,
3220 * middle will be pending, and tail will not have been consumed.
3221 */
3222 /*
3223 \U00101234 -> x (<U101234> \x07 |0)
3224 \U00101234\U00050005 -> y (<U101234>+<U50005> \x07+\x00+\x01\x02\x0e+\x05 |0)
3225 \U00101234\U00050005\U00060006 -> z (<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0)
3226 \U00060007 -> unassigned
3227 */
3228 static const UChar head[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */
3229 static const UChar middle[] = {0xD940,0x0000}; /* first half of \U00060006 or \U00060007 */
3230 static const UChar tail[] = {0xDC07,0x0000};/* second half of \U00060007 */
3231 char tgt[10];
3232 char* target = tgt;
3233 char* targetLimit = target + 2; /* expect overflow from converting \U00101234\U00050005 */
3234 const UChar* source = head;
3235 const UChar* sourceLimit = source + u_strlen(head);
3236 int32_t len = 0;
3237 ucnv_reset(cnv);
3238 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3239 len = ucnv_fromUCountPending(cnv, &status);
3240 if(U_FAILURE(status)){
3241 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3242 status = U_ZERO_ERROR;
3243 }
3244 if(len!=4){
3245 log_err("ucnv_fromUInputHeld did not return correct length for head\n");
3246 }
3247 source = middle;
3248 sourceLimit = source + u_strlen(middle);
3249 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3250 len = ucnv_fromUCountPending(cnv, &status);
3251 if(U_FAILURE(status)){
3252 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3253 status = U_ZERO_ERROR;
3254 }
3255 if(len!=5){
3256 log_err("ucnv_fromUInputHeld did not return correct length for middle\n");
3257 }
3258 source = tail;
3259 sourceLimit = source + u_strlen(tail);
3260 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3261 if(status != U_BUFFER_OVERFLOW_ERROR){
3262 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3263 }
3264 status = U_ZERO_ERROR;
3265 len = ucnv_fromUCountPending(cnv, &status);
3266 /* middle[1] is pending, tail has not been consumed */
3267 if(U_FAILURE(status)){
3268 log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status));
3269 }
3270 if(len!=1){
3271 log_err("ucnv_fromUInputHeld did not return correct length for tail\n");
3272 }
3273 }
3274 ucnv_close(cnv);
3275#endif
3276}
3277
3278static void
3279TestToUCountPending(){
3280#if !UCONFIG_NO_LEGACY_CONVERSION
3281 UErrorCode status = U_ZERO_ERROR;
3282 static const struct {
3283 char input[6];
3284 int32_t len;
3285 int32_t exp;
3286 }toUnicodeTests[] = {
3287 /*m:n conversion*/
3288 {{0x05, 0x01, 0x02},3,3},
3289 {{0x01, 0x02},2,2},
3290 {{0x07, 0x00, 0x01, 0x02},4,4},
3291 };
3292
3293 int i;
3294 UConverterToUCallback *oldToUAction= NULL;
3295 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status);
3296 if(U_FAILURE(status)){
3297 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
3298 return;
3299 }
3300 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status);
3301 for(i=0; i<LENGTHOF(toUnicodeTests); ++i) {
3302 UChar tgt[20];
3303 UChar* target = tgt;
3304 UChar* targetLimit = target + 20;
3305 const char* source = toUnicodeTests[i].input;
3306 const char* sourceLimit = source + toUnicodeTests[i].len;
3307 int32_t len = 0;
3308 ucnv_reset(cnv);
3309 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3310 len = ucnv_toUCountPending(cnv,&status);
3311 if(U_FAILURE(status)){
3312 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3313 status = U_ZERO_ERROR;
3314 continue;
3315 }
3316 if(len != toUnicodeTests[i].exp){
3317 log_err("Did not get the expeced output for ucnv_toUInputConsumed.\n");
3318 }
3319 }
3320 status = U_ZERO_ERROR;
3321 ucnv_close(cnv);
3322
3323 {
3324 /*
3325 * The converter has to read the tail before it knows that
3326 * only head alone matches.
3327 * At the end, the output for head will overflow the target,
3328 * mid will be pending, and tail will not have been consumed.
3329 */
3330 char head[] = { 0x01, 0x02, 0x03, 0x0a , 0x00};
3331 char mid[] = { 0x01, 0x02, 0x03, 0x0b, 0x00 };
3332 char tail[] = { 0x01, 0x02, 0x03, 0x0d, 0x00 };
3333 /*
3334 0x01, 0x02, 0x03, 0x0a -> x (<U23456> \x01\x02\x03\x0a |0)
3335 0x01, 0x02, 0x03, 0x0b -> y (<U000b> \x01\x02\x03\x0b |0)
3336 0x01, 0x02, 0x03, 0x0d -> z (<U34567> \x01\x02\x03\x0d |3)
3337 0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar")
3338 */
3339 UChar tgt[10];
3340 UChar* target = tgt;
3341 UChar* targetLimit = target + 1; /* expect overflow from converting */
3342 const char* source = head;
3343 const char* sourceLimit = source + strlen(head);
3344 int32_t len = 0;
3345 cnv = ucnv_openPackage(loadTestData(&status), "test4", &status);
3346 if(U_FAILURE(status)){
3347 log_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
3348 return;
3349 }
3350 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status);
3351 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3352 len = ucnv_toUCountPending(cnv,&status);
3353 if(U_FAILURE(status)){
3354 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3355 }
3356 if(len != 4){
3357 log_err("Did not get the expected len for head.\n");
3358 }
3359 source=mid;
3360 sourceLimit = source+strlen(mid);
3361 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3362 len = ucnv_toUCountPending(cnv,&status);
3363 if(U_FAILURE(status)){
3364 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3365 }
3366 if(len != 8){
3367 log_err("Did not get the expected len for mid.\n");
3368 }
3369
3370 source=tail;
3371 sourceLimit = source+strlen(tail);
3372 targetLimit = target;
3373 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status);
3374 if(status != U_BUFFER_OVERFLOW_ERROR){
3375 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3376 }
3377 status = U_ZERO_ERROR;
3378 len = ucnv_toUCountPending(cnv,&status);
3379 /* mid[4] is pending, tail has not been consumed */
3380 if(U_FAILURE(status)){
3381 log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status));
3382 }
3383 if(len != 4){
3384 log_err("Did not get the expected len for tail.\n");
3385 }
3386 ucnv_close(cnv);
3387 }
3388#endif
3389}
3390
3391static void TestOneDefaultNameChange(const char *name, const char *expected) {
3392 UErrorCode status = U_ZERO_ERROR;
3393 UConverter *cnv;
3394 ucnv_setDefaultName(name);
3395 if(strcmp(ucnv_getDefaultName(), expected)==0)
3396 log_verbose("setDefaultName of %s works.\n", name);
3397 else
3398 log_err("setDefaultName of %s failed\n", name);
3399 cnv=ucnv_open(NULL, &status);
3400 if (U_FAILURE(status) || cnv == NULL) {
3401 log_err("opening the default converter of %s failed\n", name);
3402 return;
3403 }
3404 if(strcmp(ucnv_getName(cnv, &status), expected)==0)
3405 log_verbose("ucnv_getName of %s works.\n", name);
3406 else
3407 log_err("ucnv_getName of %s failed\n", name);
3408 ucnv_close(cnv);
3409}
3410
3411static void TestDefaultName(void) {
3412 /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/
3413 static char defaultName[UCNV_MAX_CONVERTER_NAME_LENGTH + 1];
3414 strcpy(defaultName, ucnv_getDefaultName());
3415
3416 log_verbose("getDefaultName returned %s\n", defaultName);
3417
3418 /*change the default name by setting it */
3419 TestOneDefaultNameChange("UTF-8", "UTF-8");
3420#if U_CHARSET_IS_UTF8
3421 TestOneDefaultNameChange("ISCII,version=1", "UTF-8");
3422 TestOneDefaultNameChange("ISCII,version=2", "UTF-8");
3423 TestOneDefaultNameChange("ISO-8859-1", "UTF-8");
3424#else
3425# if !UCONFIG_NO_LEGACY_CONVERSION
3426 TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1");
3427 TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2");
3428# endif
3429 TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1");
3430#endif
3431
3432 /*set the default name back*/
3433 ucnv_setDefaultName(defaultName);
3434}
3435
3436/* Test that ucnv_compareNames() matches names according to spec. ----------- */
3437
3438static int
3439sign(int n) {
3440 if(n==0) {
3441 return 0;
3442 } else if(n<0) {
3443 return -1;
3444 } else /* n>0 */ {
3445 return 1;
3446 }
3447}
3448
3449static void
3450compareNames(const char **names) {
3451 const char *relation, *name1, *name2;
3452 int rel, result;
3453
3454 relation=*names++;
3455 if(*relation=='=') {
3456 rel = 0;
3457 } else if(*relation=='<') {
3458 rel = -1;
3459 } else {
3460 rel = 1;
3461 }
3462
3463 name1=*names++;
3464 if(name1==NULL) {
3465 return;
3466 }
3467 while((name2=*names++)!=NULL) {
3468 result=ucnv_compareNames(name1, name2);
3469 if(sign(result)!=rel) {
3470 log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1, name2, result, rel);
3471 }
3472 name1=name2;
3473 }
3474}
3475
3476static void
3477TestCompareNames() {
3478 static const char *equalUTF8[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL };
3479 static const char *equalIBM[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL };
3480 static const char *lessMac[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL };
3481 static const char *lessUTF080[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL };
3482
3483 compareNames(equalUTF8);
3484 compareNames(equalIBM);
3485 compareNames(lessMac);
3486 compareNames(lessUTF080);
3487}
3488
3489static void
3490TestSubstString() {
3491 static const UChar surrogate[1]={ 0xd900 };
3492 char buffer[16];
3493
3494 static const UChar sub[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3495 static const char subChars[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3496 UConverter *cnv;
3497 UErrorCode errorCode;
3498 int32_t length;
3499 int8_t len8;
3500
3501 /* UTF-16/32: test that the BOM is output before the sub character */
3502 errorCode=U_ZERO_ERROR;
3503 cnv=ucnv_open("UTF-16", &errorCode);
3504 if(U_FAILURE(errorCode)) {
3505 log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode));
3506 return;
3507 }
3508 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode);
3509 ucnv_close(cnv);
3510 if(U_FAILURE(errorCode) ||
3511 length!=4 ||
3512 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode)
3513 ) {
3514 log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n");
3515 }
3516
3517 errorCode=U_ZERO_ERROR;
3518 cnv=ucnv_open("UTF-32", &errorCode);
3519 if(U_FAILURE(errorCode)) {
3520 log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode));
3521 return;
3522 }
3523 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode);
3524 ucnv_close(cnv);
3525 if(U_FAILURE(errorCode) ||
3526 length!=8 ||
3527 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode)
3528 ) {
3529 log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n");
3530 }
3531
3532 /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */
3533 errorCode=U_ZERO_ERROR;
3534 cnv=ucnv_open("ISO-8859-1", &errorCode);
3535 if(U_FAILURE(errorCode)) {
3536 log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode));
3537 return;
3538 }
3539 ucnv_setSubstString(cnv, sub, LENGTHOF(sub), &errorCode);
3540 if(U_FAILURE(errorCode)) {
3541 log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode));
3542 } else {
3543 len8 = sizeof(buffer);
3544 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode);
3545 /* Stateless converter, we expect the string converted to charset bytes. */
3546 if(U_FAILURE(errorCode) || len8!=sizeof(subChars) || 0!=uprv_memcmp(buffer, subChars, len8)) {
3547 log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode));
3548 }
3549 }
3550 ucnv_close(cnv);
3551
3552#if !UCONFIG_NO_LEGACY_CONVERSION
3553 errorCode=U_ZERO_ERROR;
3554 cnv=ucnv_open("HZ", &errorCode);
3555 if(U_FAILURE(errorCode)) {
3556 log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode));
3557 return;
3558 }
3559 ucnv_setSubstString(cnv, sub, LENGTHOF(sub), &errorCode);
3560 if(U_FAILURE(errorCode)) {
3561 log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode));
3562 } else {
3563 len8 = sizeof(buffer);
3564 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode);
3565 /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */
3566 if(U_FAILURE(errorCode) || len8!=0) {
3567 log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode));
3568 }
3569 }
3570 ucnv_close(cnv);
3571#endif
3572 /*
3573 * Further testing of ucnv_setSubstString() is done via intltest convert.
3574 * We do not test edge cases of illegal arguments and similar because the
3575 * function implementation uses all of its parameters in calls to other
3576 * functions with UErrorCode parameters.
3577 */
3578}
3579
3580static void
3581InvalidArguments() {
3582 UConverter *cnv;
3583 UErrorCode errorCode;
3584 char charBuffer[2] = {1, 1};
3585 char ucharAsCharBuffer[2] = {2, 2};
3586 char *charsPtr = charBuffer;
3587 UChar *ucharsPtr = (UChar *)ucharAsCharBuffer;
3588 UChar *ucharsBadPtr = (UChar *)(ucharAsCharBuffer + 1);
3589
3590 errorCode=U_ZERO_ERROR;
3591 cnv=ucnv_open("UTF-8", &errorCode);
3592 if(U_FAILURE(errorCode)) {
3593 log_err("ucnv_open() failed - %s\n", u_errorName(errorCode));
3594 return;
3595 }
3596
3597 errorCode=U_ZERO_ERROR;
3598 /* This one should fail because an incomplete UChar is being passed in */
3599 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsPtr, ucharsBadPtr, NULL, TRUE, &errorCode);
3600 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3601 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode));
3602 }
3603
3604 errorCode=U_ZERO_ERROR;
3605 /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3606 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsBadPtr, ucharsPtr, NULL, TRUE, &errorCode);
3607 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3608 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode));
3609 }
3610
3611 errorCode=U_ZERO_ERROR;
3612 /* This one should fail because an incomplete UChar is being passed in */
3613 ucnv_toUnicode(cnv, &ucharsPtr, ucharsBadPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode);
3614 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3615 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode));
3616 }
3617
3618 errorCode=U_ZERO_ERROR;
3619 /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3620 ucnv_toUnicode(cnv, &ucharsBadPtr, ucharsPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode);
3621 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3622 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode));
3623 }
3624
3625 if (charBuffer[0] != 1 || charBuffer[1] != 1
3626 || ucharAsCharBuffer[0] != 2 || ucharAsCharBuffer[1] != 2)
3627 {
3628 log_err("Data was incorrectly written to buffers\n");
3629 }
3630
3631 ucnv_close(cnv);
3632}
3633
3634static void TestGetName() {
3635 static const char *const names[] = {
3636 "Unicode", "UTF-16",
3637 "UnicodeBigUnmarked", "UTF-16BE",
3638 "UnicodeBig", "UTF-16BE,version=1",
3639 "UnicodeLittleUnmarked", "UTF-16LE",
3640 "UnicodeLittle", "UTF-16LE,version=1",
3641 "x-UTF-16LE-BOM", "UTF-16LE,version=1"
3642 };
3643 int32_t i;
3644 for(i = 0; i < LENGTHOF(names); i += 2) {
3645 UErrorCode errorCode = U_ZERO_ERROR;
3646 UConverter *cnv = ucnv_open(names[i], &errorCode);
3647 if(U_SUCCESS(errorCode)) {
3648 const char *name = ucnv_getName(cnv, &errorCode);
3649 if(U_FAILURE(errorCode) || 0 != strcmp(name, names[i+1])) {
3650 log_err("ucnv_getName(%s) = %s != %s -- %s\n",
3651 names[i], name, names[i+1], u_errorName(errorCode));
3652 }
3653 ucnv_close(cnv);
3654 }
3655 }
3656}
3657
3658static void TestUTFBOM() {
3659 static const UChar a16[] = { 0x61 };
3660 static const char *const names[] = {
3661 "UTF-16",
3662 "UTF-16,version=1",
3663 "UTF-16BE",
3664 "UnicodeBig",
3665 "UTF-16LE",
3666 "UnicodeLittle"
3667 };
3668 static const uint8_t expected[][5] = {
3669#if U_IS_BIG_ENDIAN
3670 { 4, 0xfe, 0xff, 0, 0x61 },
3671 { 4, 0xfe, 0xff, 0, 0x61 },
3672#else
3673 { 4, 0xff, 0xfe, 0x61, 0 },
3674 { 4, 0xff, 0xfe, 0x61, 0 },
3675#endif
3676
3677 { 2, 0, 0x61 },
3678 { 4, 0xfe, 0xff, 0, 0x61 },
3679
3680 { 2, 0x61, 0 },
3681 { 4, 0xff, 0xfe, 0x61, 0 }
3682 };
3683
3684 char bytes[10];
3685 int32_t i;
3686
3687 for(i = 0; i < LENGTHOF(names); ++i) {
3688 UErrorCode errorCode = U_ZERO_ERROR;
3689 UConverter *cnv = ucnv_open(names[i], &errorCode);
3690 int32_t length = 0;
3691 const uint8_t *exp = expected[i];
3692 if (U_FAILURE(errorCode)) {
3693 log_err_status(errorCode, "Unable to open converter: %s got error code: %s\n", names[i], u_errorName(errorCode));
3694 continue;
3695 }
3696 length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &errorCode);
3697
3698 if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1, length)) {
3699 log_err("unexpected %s BOM writing behavior -- %s\n",
3700 names[i], u_errorName(errorCode));
3701 }
3702 ucnv_close(cnv);
3703 }
3704}