]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/nucnvtst.c
ICU-8.11.2.tar.gz
[apple/icu.git] / icuSources / test / cintltst / nucnvtst.c
CommitLineData
b75a7d8f
A
1/********************************************************************
2 * COPYRIGHT:
d5d484b0 3 * Copyright (c) 1997-2006,2008 International Business Machines Corporation and
b75a7d8f
A
4 * others. All Rights Reserved.
5 ********************************************************************/
73c04bcf 6/*******************************************************************************
b75a7d8f
A
7*
8* File CCONVTST.C
9*
10* Modification History:
11* Name Description
12* Steven R. Loomis 7/8/1999 Adding input buffer test
73c04bcf 13********************************************************************************
b75a7d8f
A
14*/
15#include <stdio.h>
16#include "cstring.h"
17#include "unicode/uloc.h"
18#include "unicode/ucnv.h"
19#include "unicode/ucnv_err.h"
20#include "cintltst.h"
21#include "unicode/utypes.h"
22#include "unicode/ustring.h"
23#include "unicode/ucol.h"
24#include "cmemory.h"
25
374ca955 26static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
b75a7d8f
A
27static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
28#if !UCONFIG_NO_COLLATION
29static void TestJitterbug981(void);
30#endif
31static void TestJitterbug1293(void);
32static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
33static void TestConverterTypesAndStarters(void);
34static void TestAmbiguous(void);
35static void TestSignatureDetection(void);
36static void TestUTF7(void);
37static void TestIMAP(void);
38static void TestUTF8(void);
39static void TestCESU8(void);
40static void TestUTF16(void);
41static void TestUTF16BE(void);
42static void TestUTF16LE(void);
43static void TestUTF32(void);
44static void TestUTF32BE(void);
45static void TestUTF32LE(void);
46static void TestLATIN1(void);
73c04bcf
A
47
48#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
49static void TestSBCS(void);
50static void TestDBCS(void);
51static void TestMBCS(void);
73c04bcf 52
374ca955 53#ifdef U_ENABLE_GENERIC_ISO_2022
b75a7d8f 54static void TestISO_2022(void);
374ca955 55#endif
73c04bcf 56
b75a7d8f
A
57static void TestISO_2022_JP(void);
58static void TestISO_2022_JP_1(void);
59static void TestISO_2022_JP_2(void);
60static void TestISO_2022_KR(void);
61static void TestISO_2022_KR_1(void);
62static void TestISO_2022_CN(void);
63static void TestISO_2022_CN_EXT(void);
64static void TestJIS(void);
65static void TestHZ(void);
73c04bcf
A
66#endif
67
b75a7d8f 68static void TestSCSU(void);
73c04bcf
A
69
70#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
71static void TestEBCDIC_STATEFUL(void);
72static void TestGB18030(void);
73static void TestLMBCS(void);
74static void TestJitterbug255(void);
75static void TestEBCDICUS4XML(void);
76static void TestJitterbug915(void);
77static void TestISCII(void);
73c04bcf
A
78
79static void TestCoverageMBCS(void);
80static void TestJitterbug2346(void);
81static void TestJitterbug2411(void);
d5d484b0 82static void TestJitterbug6175(void);
73c04bcf
A
83#endif
84
85static void TestRoundTrippingAllUTF(void);
b75a7d8f
A
86static void TestConv(const uint16_t in[],
87 int len,
88 const char* conv,
89 const char* lang,
90 char byteArr[],
91 int byteArrLen);
b75a7d8f
A
92void addTestNewConvert(TestNode** root);
93
94/* open a converter, using test data if it begins with '@' */
95static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
96
97
98#define NEW_MAX_BUFFER 999
99
100static int32_t gInBufferSize = NEW_MAX_BUFFER;
101static int32_t gOutBufferSize = NEW_MAX_BUFFER;
102static char gNuConvTestName[1024];
103
104#define nct_min(x,y) ((x<y) ? x : y)
105
106static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
107{
108 if(cnv && cnv[0] == '@') {
374ca955 109 return ucnv_openPackage(loadTestData(err), cnv+1, err);
b75a7d8f
A
110 } else {
111 return ucnv_open(cnv, err);
112 }
113}
114
115static void printSeq(const unsigned char* a, int len)
116{
117 int i=0;
118 log_verbose("{");
119 while (i<len)
120 log_verbose("0x%02x ", a[i++]);
121 log_verbose("}\n");
122}
123
124static void printUSeq(const UChar* a, int len)
125{
126 int i=0;
127 log_verbose("{U+");
128 while (i<len) log_verbose("0x%04x ", a[i++]);
129 log_verbose("}\n");
130}
131
132static void printSeqErr(const unsigned char* a, int len)
133{
134 int i=0;
135 fprintf(stderr, "{");
136 while (i<len)
137 fprintf(stderr, "0x%02x ", a[i++]);
138 fprintf(stderr, "}\n");
139}
140
141static void printUSeqErr(const UChar* a, int len)
142{
143 int i=0;
144 fprintf(stderr, "{U+");
145 while (i<len)
146 fprintf(stderr, "0x%04x ", a[i++]);
147 fprintf(stderr,"}\n");
148}
149
150static void
374ca955 151TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
b75a7d8f
A
152{
153 const char* s0;
154 const char* s=(char*)source;
374ca955 155 const int32_t *r=results;
b75a7d8f 156 UErrorCode errorCode=U_ZERO_ERROR;
374ca955 157 UChar32 c;
b75a7d8f
A
158
159 while(s<limit) {
160 s0=s;
161 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
162 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
163 break; /* no more significant input */
164 } else if(U_FAILURE(errorCode)) {
165 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
166 break;
374ca955
A
167 } else if(
168 /* test the expected number of input bytes only if >=0 */
169 (*r>=0 && (int32_t)(s-s0)!=*r) ||
170 c!=*(r+1)
171 ) {
b75a7d8f
A
172 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
173 message, c, (s-s0), *(r+1), *r);
174 break;
175 }
176 r+=2;
177 }
178}
179
180static void
181TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
182{
183 const char* s=(char*)source;
184 UErrorCode errorCode=U_ZERO_ERROR;
185 uint32_t c;
186 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
187 if(errorCode != expected){
188 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
189 }
190 if(c != 0xFFFD && c != 0xffff){
191 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
192 }
193
194}
195
196static void TestInBufSizes(void)
197{
198 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
199#if 1
200 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
201 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
202 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
203 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
204 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
205 TestNewConvertWithBufferSizes(1,1);
206 TestNewConvertWithBufferSizes(2,3);
207 TestNewConvertWithBufferSizes(3,2);
208#endif
209}
210
211static void TestOutBufSizes(void)
212{
213#if 1
214 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
215 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
216 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
217 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
218 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
219 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
220
221#endif
222}
223
224
225void addTestNewConvert(TestNode** root)
226{
227 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
228 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
229 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
230 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
231 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
232 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
233 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
234 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
374ca955
A
235
236 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
b75a7d8f
A
237 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
238 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
239 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
240 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
241 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
242 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
243 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
73c04bcf
A
244
245#if !UCONFIG_NO_LEGACY_CONVERSION
374ca955 246 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
73c04bcf 247#endif
374ca955 248
b75a7d8f 249 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
73c04bcf
A
250
251#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
252 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
253 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
254 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
73c04bcf 255
374ca955 256#ifdef U_ENABLE_GENERIC_ISO_2022
b75a7d8f 257 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
374ca955 258#endif
73c04bcf 259
b75a7d8f
A
260 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
261 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
262 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
263 addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
264 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
265 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
266 addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
267 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
268 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
269 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
73c04bcf
A
270#endif
271
b75a7d8f 272 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
73c04bcf
A
273
274#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
275 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
276 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
b75a7d8f
A
277 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
278 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
279 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
73c04bcf 280
b75a7d8f
A
281#if !UCONFIG_NO_COLLATION
282 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
283#endif
73c04bcf 284
b75a7d8f 285 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
73c04bcf
A
286#endif
287
288
289#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f 290 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
73c04bcf
A
291#endif
292
b75a7d8f 293 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
73c04bcf
A
294
295#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
296 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
297 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
d5d484b0 298 addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
73c04bcf 299#endif
b75a7d8f
A
300}
301
302
303/* Note that this test already makes use of statics, so it's not really
304 multithread safe.
305 This convenience function lets us make the error messages actually useful.
306*/
307
308static void setNuConvTestName(const char *codepage, const char *direction)
309{
374ca955
A
310 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
311 codepage,
312 direction,
313 (int)gInBufferSize,
314 (int)gOutBufferSize);
b75a7d8f
A
315}
316
317typedef enum
318{
319 TC_OK = 0, /* test was OK */
320 TC_MISMATCH = 1, /* Match failed - err was printed */
321 TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */
322} ETestConvertResult;
323
324/* Note: This function uses global variables and it will not do offset
325checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
326static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
327 const char *codepage, const int32_t *expectOffsets , UBool useFallback)
328{
329 UErrorCode status = U_ZERO_ERROR;
330 UConverter *conv = 0;
73c04bcf 331 char junkout[NEW_MAX_BUFFER]; /* FIX */
b75a7d8f 332 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
73c04bcf 333 char *p;
b75a7d8f 334 const UChar *src;
73c04bcf
A
335 char *end;
336 char *targ;
b75a7d8f
A
337 int32_t *offs;
338 int i;
339 int32_t realBufferSize;
73c04bcf 340 char *realBufferEnd;
b75a7d8f
A
341 const UChar *realSourceEnd;
342 const UChar *sourceLimit;
343 UBool checkOffsets = TRUE;
344 UBool doFlush;
345
346 for(i=0;i<NEW_MAX_BUFFER;i++)
73c04bcf 347 junkout[i] = (char)0xF0;
b75a7d8f
A
348 for(i=0;i<NEW_MAX_BUFFER;i++)
349 junokout[i] = 0xFF;
350
351 setNuConvTestName(codepage, "FROM");
352
353 log_verbose("\n========= %s\n", gNuConvTestName);
354
355 conv = my_ucnv_open(codepage, &status);
356
357 if(U_FAILURE(status))
358 {
359 log_data_err("Couldn't open converter %s\n",codepage);
360 return TC_FAIL;
361 }
362 if(useFallback){
363 ucnv_setFallback(conv,useFallback);
364 }
365
366 log_verbose("Converter opened..\n");
367
368 src = source;
369 targ = junkout;
370 offs = junokout;
371
372 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
373 realBufferEnd = junkout + realBufferSize;
374 realSourceEnd = source + sourceLen;
375
376 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
377 checkOffsets = FALSE;
378
379 do
380 {
381 end = nct_min(targ + gOutBufferSize, realBufferEnd);
382 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
383
384 doFlush = (UBool)(sourceLimit == realSourceEnd);
385
386 if(targ == realBufferEnd) {
387 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
388 return TC_FAIL;
389 }
390 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
391
392
393 status = U_ZERO_ERROR;
394
395 ucnv_fromUnicode (conv,
73c04bcf
A
396 &targ,
397 end,
b75a7d8f
A
398 &src,
399 sourceLimit,
400 checkOffsets ? offs : NULL,
401 doFlush, /* flush if we're at the end of the input data */
402 &status);
403 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
404
405 if(U_FAILURE(status)) {
406 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
407 return TC_FAIL;
408 }
409
410 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
411 sourceLen, targ-junkout);
412
413 if(VERBOSITY)
414 {
415 char junk[9999];
416 char offset_str[9999];
73c04bcf 417 char *ptr;
b75a7d8f
A
418
419 junk[0] = 0;
420 offset_str[0] = 0;
421 for(ptr = junkout;ptr<targ;ptr++) {
422 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
423 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
424 }
425
426 log_verbose(junk);
427 printSeq((const uint8_t *)expect, expectLen);
428 if ( checkOffsets ) {
429 log_verbose("\nOffsets:");
430 log_verbose(offset_str);
431 }
432 log_verbose("\n");
433 }
434 ucnv_close(conv);
435
436 if(expectLen != targ-junkout) {
437 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
438 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
439 printf("\nGot:");
73c04bcf 440 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
b75a7d8f
A
441 printf("\nExpected:");
442 printSeqErr((const unsigned char*)expect, expectLen);
443 return TC_MISMATCH;
444 }
445
446 if (checkOffsets && (expectOffsets != 0) ) {
447 log_verbose("comparing %d offsets..\n", targ-junkout);
448 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
449 log_err("did not get the expected offsets. %s\n", gNuConvTestName);
73c04bcf 450 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
b75a7d8f
A
451 log_err("\n");
452 log_err("Got : ");
453 for(p=junkout;p<targ;p++) {
454 log_err("%d,", junokout[p-junkout]);
455 }
456 log_err("\n");
457 log_err("Expected: ");
458 for(i=0; i<(targ-junkout); i++) {
459 log_err("%d,", expectOffsets[i]);
460 }
461 log_err("\n");
462 }
463 }
464
465 log_verbose("comparing..\n");
466 if(!memcmp(junkout, expect, expectLen)) {
467 log_verbose("Matches!\n");
468 return TC_OK;
469 } else {
470 log_err("String does not match u->%s\n", gNuConvTestName);
471 printUSeqErr(source, sourceLen);
472 printf("\nGot:");
473 printSeqErr((const unsigned char *)junkout, expectLen);
474 printf("\nExpected:");
475 printSeqErr((const unsigned char *)expect, expectLen);
476
477 return TC_MISMATCH;
478 }
479}
480
481/* Note: This function uses global variables and it will not do offset
482checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
483static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
484 const char *codepage, const int32_t *expectOffsets, UBool useFallback)
485{
486 UErrorCode status = U_ZERO_ERROR;
487 UConverter *conv = 0;
488 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
489 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
73c04bcf
A
490 const char *src;
491 const char *realSourceEnd;
492 const char *srcLimit;
b75a7d8f
A
493 UChar *p;
494 UChar *targ;
495 UChar *end;
496 int32_t *offs;
497 int i;
498 UBool checkOffsets = TRUE;
499
500 int32_t realBufferSize;
501 UChar *realBufferEnd;
502
503
504 for(i=0;i<NEW_MAX_BUFFER;i++)
505 junkout[i] = 0xFFFE;
506
507 for(i=0;i<NEW_MAX_BUFFER;i++)
508 junokout[i] = -1;
509
510 setNuConvTestName(codepage, "TO");
511
512 log_verbose("\n========= %s\n", gNuConvTestName);
513
514 conv = my_ucnv_open(codepage, &status);
515
516 if(U_FAILURE(status))
517 {
518 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
519 return TC_FAIL;
520 }
521 if(useFallback){
522 ucnv_setFallback(conv,useFallback);
523 }
524 log_verbose("Converter opened..\n");
525
73c04bcf 526 src = (const char *)source;
b75a7d8f
A
527 targ = junkout;
528 offs = junokout;
529
530 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
531 realBufferEnd = junkout + realBufferSize;
532 realSourceEnd = src + sourcelen;
533
534 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
535 checkOffsets = FALSE;
536
537 do
538 {
539 end = nct_min( targ + gOutBufferSize, realBufferEnd);
540 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
541
542 if(targ == realBufferEnd)
543 {
544 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
545 return TC_FAIL;
546 }
547 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
548
549 /* oldTarg = targ; */
550
551 status = U_ZERO_ERROR;
552
553 ucnv_toUnicode (conv,
554 &targ,
555 end,
73c04bcf
A
556 &src,
557 srcLimit,
b75a7d8f
A
558 checkOffsets ? offs : NULL,
559 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
560 &status);
561
562 /* offs += (targ-oldTarg); */
563
564 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
565
566 if(U_FAILURE(status))
567 {
568 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
569 return TC_FAIL;
570 }
571
572 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
573 sourcelen, targ-junkout);
574 if(VERBOSITY)
575 {
576 char junk[9999];
577 char offset_str[9999];
578 UChar *ptr;
579
580 junk[0] = 0;
581 offset_str[0] = 0;
582
583 for(ptr = junkout;ptr<targ;ptr++)
584 {
585 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
586 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
587 }
588
589 log_verbose(junk);
590 printUSeq(expect, expectlen);
591 if ( checkOffsets )
592 {
593 log_verbose("\nOffsets:");
594 log_verbose(offset_str);
595 }
596 log_verbose("\n");
597 }
598 ucnv_close(conv);
599
600 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
601
602 if (checkOffsets && (expectOffsets != 0))
603 {
604 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
605 log_err("did not get the expected offsets. %s\n",gNuConvTestName);
606 log_err("Got: ");
607 for(p=junkout;p<targ;p++) {
608 log_err("%d,", junokout[p-junkout]);
609 }
610 log_err("\n");
611 log_err("Expected: ");
612 for(i=0; i<(targ-junkout); i++) {
613 log_err("%d,", expectOffsets[i]);
614 }
615 log_err("\n");
616 log_err("output: ");
617 for(i=0; i<(targ-junkout); i++) {
618 log_err("%X,", junkout[i]);
619 }
620 log_err("\n");
621 log_err("input: ");
73c04bcf 622 for(i=0; i<(src-(const char *)source); i++) {
b75a7d8f
A
623 log_err("%X,", (unsigned char)source[i]);
624 }
625 log_err("\n");
626 }
627 }
628
629 if(!memcmp(junkout, expect, expectlen*2))
630 {
631 log_verbose("Matches!\n");
632 return TC_OK;
633 }
634 else
635 {
636 log_err("String does not match. %s\n", gNuConvTestName);
637 log_verbose("String does not match. %s\n", gNuConvTestName);
638 printf("\nGot:");
639 printUSeqErr(junkout, expectlen);
640 printf("\nExpected:");
641 printUSeqErr(expect, expectlen);
642 return TC_MISMATCH;
643 }
644}
645
646
647static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
648{
649/** test chars #1 */
650 /* 1 2 3 1Han 2Han 3Han . */
73c04bcf
A
651 static const UChar sampleText[] =
652 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E };
b75a7d8f
A
653
654
73c04bcf 655 static const uint8_t expectedUTF8[] =
b75a7d8f 656 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
73c04bcf 657 static const int32_t toUTF8Offs[] =
b75a7d8f 658 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07};
73c04bcf 659 static const int32_t fmUTF8Offs[] =
b75a7d8f
A
660 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };
661
374ca955 662#ifdef U_ENABLE_GENERIC_ISO_2022
b75a7d8f 663 /* Same as UTF8, but with ^[%B preceeding */
73c04bcf 664 static const const uint8_t expectedISO2022[] =
b75a7d8f 665 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
73c04bcf 666 static const int32_t toISO2022Offs[] =
b75a7d8f
A
667 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
668 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
73c04bcf 669 static const int32_t fmISO2022Offs[] =
b75a7d8f 670 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
374ca955 671#endif
b75a7d8f
A
672
673 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
73c04bcf 674 static const uint8_t expectedIBM930[] =
b75a7d8f 675 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B };
73c04bcf 676 static const int32_t toIBM930Offs[] =
b75a7d8f 677 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, };
73c04bcf 678 static const int32_t fmIBM930Offs[] =
b75a7d8f
A
679 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c};
680
681 /* 1 2 3 0 h1 h2 h3 . MBCS*/
73c04bcf 682 static const uint8_t expectedIBM943[] =
b75a7d8f 683 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e };
73c04bcf 684 static const int32_t toIBM943Offs [] =
b75a7d8f 685 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07 };
73c04bcf 686 static const int32_t fmIBM943Offs[] =
b75a7d8f
A
687 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a};
688
689 /* 1 2 3 0 h1 h2 h3 . DBCS*/
73c04bcf 690 static const uint8_t expectedIBM9027[] =
b75a7d8f 691 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe};
73c04bcf 692 static const int32_t toIBM9027Offs [] =
b75a7d8f
A
693 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
694
695 /* 1 2 3 0 <?> <?> <?> . SBCS*/
73c04bcf 696 static const uint8_t expectedIBM920[] =
b75a7d8f 697 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e };
73c04bcf 698 static const int32_t toIBM920Offs [] =
b75a7d8f
A
699 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
700
701 /* 1 2 3 0 <?> <?> <?> . SBCS*/
73c04bcf 702 static const uint8_t expectedISO88593[] =
b75a7d8f 703 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
73c04bcf 704 static const int32_t toISO88593Offs[] =
b75a7d8f
A
705 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
706
73c04bcf
A
707 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
708 static const uint8_t expectedLATIN1[] =
b75a7d8f 709 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
73c04bcf 710 static const int32_t toLATIN1Offs[] =
b75a7d8f
A
711 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
712
713
714 /* etc */
73c04bcf 715 static const uint8_t expectedUTF16BE[] =
b75a7d8f 716 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e };
73c04bcf 717 static const int32_t toUTF16BEOffs[]=
b75a7d8f 718 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
73c04bcf 719 static const int32_t fmUTF16BEOffs[] =
b75a7d8f
A
720 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e };
721
73c04bcf 722 static const uint8_t expectedUTF16LE[] =
b75a7d8f 723 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00 };
73c04bcf 724 static const int32_t toUTF16LEOffs[]=
b75a7d8f 725 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
73c04bcf 726 static const int32_t fmUTF16LEOffs[] =
b75a7d8f
A
727 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e };
728
73c04bcf 729 static const uint8_t expectedUTF32BE[] =
b75a7d8f
A
730 { 0x00, 0x00, 0x00, 0x31,
731 0x00, 0x00, 0x00, 0x32,
732 0x00, 0x00, 0x00, 0x33,
733 0x00, 0x00, 0x00, 0x00,
734 0x00, 0x00, 0x4e, 0x00,
735 0x00, 0x00, 0x4e, 0x8c,
736 0x00, 0x00, 0x4e, 0x09,
737 0x00, 0x00, 0x00, 0x2e };
73c04bcf 738 static const int32_t toUTF32BEOffs[]=
b75a7d8f
A
739 { 0x00, 0x00, 0x00, 0x00,
740 0x01, 0x01, 0x01, 0x01,
741 0x02, 0x02, 0x02, 0x02,
742 0x03, 0x03, 0x03, 0x03,
743 0x04, 0x04, 0x04, 0x04,
744 0x05, 0x05, 0x05, 0x05,
745 0x06, 0x06, 0x06, 0x06,
746 0x07, 0x07, 0x07, 0x07,
747 0x08, 0x08, 0x08, 0x08 };
73c04bcf 748 static const int32_t fmUTF32BEOffs[] =
b75a7d8f
A
749 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c };
750
73c04bcf 751 static const uint8_t expectedUTF32LE[] =
b75a7d8f
A
752 { 0x31, 0x00, 0x00, 0x00,
753 0x32, 0x00, 0x00, 0x00,
754 0x33, 0x00, 0x00, 0x00,
755 0x00, 0x00, 0x00, 0x00,
756 0x00, 0x4e, 0x00, 0x00,
757 0x8c, 0x4e, 0x00, 0x00,
758 0x09, 0x4e, 0x00, 0x00,
759 0x2e, 0x00, 0x00, 0x00 };
73c04bcf 760 static const int32_t toUTF32LEOffs[]=
b75a7d8f
A
761 { 0x00, 0x00, 0x00, 0x00,
762 0x01, 0x01, 0x01, 0x01,
763 0x02, 0x02, 0x02, 0x02,
764 0x03, 0x03, 0x03, 0x03,
765 0x04, 0x04, 0x04, 0x04,
766 0x05, 0x05, 0x05, 0x05,
767 0x06, 0x06, 0x06, 0x06,
768 0x07, 0x07, 0x07, 0x07,
769 0x08, 0x08, 0x08, 0x08 };
73c04bcf 770 static const int32_t fmUTF32LEOffs[] =
b75a7d8f
A
771 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c };
772
773
774
775
776/** Test chars #2 **/
777
778 /* Sahha [health], slashed h's */
73c04bcf
A
779 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
780 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
b75a7d8f
A
781
782 /* LMBCS */
73c04bcf
A
783 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
784 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
785 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
786 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
b75a7d8f
A
787 /*********************************** START OF CODE finally *************/
788
73c04bcf
A
789 gInBufferSize = insize;
790 gOutBufferSize = outsize;
b75a7d8f 791
73c04bcf 792 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
b75a7d8f
A
793
794
b75a7d8f
A
795 /*UTF-8*/
796 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
797 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
798
799 log_verbose("Test surrogate behaviour for UTF8\n");
800 {
73c04bcf
A
801 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
802 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
b75a7d8f
A
803 0xf0, 0x90, 0x90, 0x81,
804 0xef, 0xbf, 0xbd
805 };
73c04bcf 806 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
b75a7d8f
A
807 testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]),
808 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
809
810
811 }
73c04bcf
A
812
813#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
b75a7d8f
A
814 /*ISO-2022*/
815 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
816 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
374ca955 817#endif
73c04bcf 818
b75a7d8f
A
819 /*UTF16 LE*/
820 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
821 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
822 /*UTF16 BE*/
823 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
824 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
825 /*UTF32 LE*/
826 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
827 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
828 /*UTF32 BE*/
829 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
830 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
73c04bcf 831
b75a7d8f
A
832 /*LATIN_1*/
833 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
834 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
73c04bcf
A
835
836#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
837 /*EBCDIC_STATEFUL*/
838 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
839 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
840
841 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
842 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
843
844 /*MBCS*/
845
846 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
847 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
848 /*DBCS*/
849 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
850 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
851 /*SBCS*/
852 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
853 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
854 /*SBCS*/
855 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
856 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
73c04bcf 857#endif
b75a7d8f
A
858
859
860/****/
b75a7d8f 861
b75a7d8f
A
862 /*UTF-8*/
863 testConvertToU(expectedUTF8, sizeof(expectedUTF8),
864 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE);
73c04bcf 865#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
b75a7d8f
A
866 /*ISO-2022*/
867 testConvertToU(expectedISO2022, sizeof(expectedISO2022),
868 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE);
374ca955 869#endif
73c04bcf 870
b75a7d8f
A
871 /*UTF16 LE*/
872 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
873 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
874 /*UTF16 BE*/
875 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
876 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE);
877 /*UTF32 LE*/
878 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
879 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE);
880 /*UTF32 BE*/
881 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
882 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE);
73c04bcf
A
883
884#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
885 /*EBCDIC_STATEFUL*/
886 testConvertToU(expectedIBM930, sizeof(expectedIBM930),
887 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-930", fmIBM930Offs,FALSE);
888 /*MBCS*/
889 testConvertToU(expectedIBM943, sizeof(expectedIBM943),
890 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-943", fmIBM943Offs,FALSE);
73c04bcf 891#endif
b75a7d8f
A
892
893 /* Try it again to make sure it still works */
894 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
895 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
896
73c04bcf 897#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
898 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
899 malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE);
900
901 testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]),
902 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
903
904 /*LMBCS*/
905 testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]),
906 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
907 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
908 LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE);
73c04bcf 909#endif
b75a7d8f
A
910
911 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
912 {
913 /* encode directly set D and set O */
914 static const uint8_t utf7[] = {
915 /*
916 Hi Mom -+Jjo--!
917 A+ImIDkQ.
918 +-
919 +ZeVnLIqe
920 */
921 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
922 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
923 0x2b, 0x2d,
924 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
925 };
926 static const UChar unicode[] = {
927 /*
928 Hi Mom -<WHITE SMILING FACE>-!
929 A<NOT IDENTICAL TO><ALPHA>.
930 +
931 [Japanese word "nihongo"]
932 */
933 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
934 0x41, 0x2262, 0x0391, 0x2e,
935 0x2b,
936 0x65e5, 0x672c, 0x8a9e
937 };
938 static const int32_t toUnicodeOffsets[] = {
939 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
940 15, 17, 19, 23,
941 24,
942 27, 29, 32
943 };
944 static const int32_t fromUnicodeOffsets[] = {
945 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
946 11, 12, 12, 12, 13, 13, 13, 13, 14,
947 15, 15,
948 16, 16, 16, 17, 17, 17, 18, 18, 18
949 };
950
951 /* same but escaping set O (the exclamation mark) */
952 static const uint8_t utf7Restricted[] = {
953 /*
954 Hi Mom -+Jjo--+ACE-
955 A+ImIDkQ.
956 +-
957 +ZeVnLIqe
958 */
959 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
960 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
961 0x2b, 0x2d,
962 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
963 };
964 static const int32_t toUnicodeOffsetsR[] = {
965 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
966 19, 21, 23, 27,
967 28,
968 31, 33, 36
969 };
970 static const int32_t fromUnicodeOffsetsR[] = {
971 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
972 11, 12, 12, 12, 13, 13, 13, 13, 14,
973 15, 15,
974 16, 16, 16, 17, 17, 17, 18, 18, 18
975 };
976
977 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
978
979 testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE);
980
981 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
982
983 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
984 }
985
986 /*
987 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
988 * modified according to RFC 2060,
989 * and supplemented with the one example in RFC 2060 itself.
990 */
991 {
992 static const uint8_t imap[] = {
993 /* Hi Mom -&Jjo--!
994 A&ImIDkQ-.
995 &-
996 &ZeVnLIqe-
997 \
998 ~peter
999 /mail
1000 /&ZeVnLIqe-
1001 /&U,BTFw-
1002 */
1003 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1004 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1005 0x26, 0x2d,
1006 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1007 0x5c,
1008 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1009 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1010 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1011 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1012 };
1013 static const UChar unicode[] = {
1014 /* Hi Mom -<WHITE SMILING FACE>-!
1015 A<NOT IDENTICAL TO><ALPHA>.
1016 &
1017 [Japanese word "nihongo"]
1018 \
1019 ~peter
1020 /mail
1021 /<65e5, 672c, 8a9e>
1022 /<53f0, 5317>
1023 */
1024 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1025 0x41, 0x2262, 0x0391, 0x2e,
1026 0x26,
1027 0x65e5, 0x672c, 0x8a9e,
1028 0x5c,
1029 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1030 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1031 0x2f, 0x65e5, 0x672c, 0x8a9e,
1032 0x2f, 0x53f0, 0x5317
1033 };
1034 static const int32_t toUnicodeOffsets[] = {
1035 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1036 15, 17, 19, 24,
1037 25,
1038 28, 30, 33,
1039 37,
1040 38, 39, 40, 41, 42, 43,
1041 44, 45, 46, 47, 48,
1042 49, 51, 53, 56,
1043 60, 62, 64
1044 };
1045 static const int32_t fromUnicodeOffsets[] = {
1046 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1047 11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1048 15, 15,
1049 16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1050 19,
1051 20, 21, 22, 23, 24, 25,
1052 26, 27, 28, 29, 30,
1053 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1054 35, 36, 36, 36, 37, 37, 37, 37, 37
1055 };
1056
1057 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
1058
1059 testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
1060 }
1061
1062 /* Test UTF-8 bad data handling*/
1063 {
1064 static const uint8_t utf8[]={
1065 0x61,
1066 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1067 0x00,
1068 0x62,
1069 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1070 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1071 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */
1072 0xdf, 0xbf, /* 7ff */
1073 0xbf, /* truncated tail */
1074 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */
1075 0x02
1076 };
1077
1078 static const uint16_t utf8Expected[]={
1079 0x0061,
1080 0xfffd,
1081 0x0000,
1082 0x0062,
1083 0xfffd,
1084 0xfffd,
1085 0xdbff, 0xdfff,
1086 0x07ff,
1087 0xfffd,
1088 0xfffd,
1089 0x0002
1090 };
1091
1092 static const int32_t utf8Offsets[]={
1093 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
1094 };
1095 testConvertToU(utf8, sizeof(utf8),
1096 utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE);
1097
1098 }
1099
1100 /* Test UTF-32BE bad data handling*/
1101 {
1102 static const uint8_t utf32[]={
1103 0x00, 0x00, 0x00, 0x61,
1104 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
1105 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1106 0x00, 0x00, 0x00, 0x62,
1107 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1108 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
1109 0x00, 0x00, 0x01, 0x62,
1110 0x00, 0x00, 0x02, 0x62
1111 };
b75a7d8f
A
1112 static const uint16_t utf32Expected[]={
1113 0x0061,
1114 0xfffd, /* 0x110000 out of range */
1115 0xDBFF, /* 0x10FFFF in range */
1116 0xDFFF,
1117 0x0062,
1118 0xfffd, /* 0xffffffff out of range */
1119 0xfffd, /* 0x7fffffff out of range */
1120 0x0162,
1121 0x0262
1122 };
b75a7d8f
A
1123 static const int32_t utf32Offsets[]={
1124 0, 4, 8, 8, 12, 16, 20, 24, 28
1125 };
73c04bcf
A
1126 static const uint8_t utf32ExpectedBack[]={
1127 0x00, 0x00, 0x00, 0x61,
1128 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */
1129 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1130 0x00, 0x00, 0x00, 0x62,
1131 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */
1132 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */
1133 0x00, 0x00, 0x01, 0x62,
1134 0x00, 0x00, 0x02, 0x62
1135 };
1136 static const int32_t utf32OffsetsBack[]={
1137 0,0,0,0,
1138 1,1,1,1,
1139 2,2,2,2,
1140 4,4,4,4,
1141 5,5,5,5,
1142 6,6,6,6,
1143 7,7,7,7,
1144 8,8,8,8
1145 };
1146
b75a7d8f
A
1147 testConvertToU(utf32, sizeof(utf32),
1148 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE);
73c04bcf
A
1149 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]),
1150 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE);
b75a7d8f
A
1151 }
1152
1153 /* Test UTF-32LE bad data handling*/
1154 {
1155 static const uint8_t utf32[]={
1156 0x61, 0x00, 0x00, 0x00,
1157 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
1158 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1159 0x62, 0x00, 0x00, 0x00,
1160 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1161 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
1162 0x62, 0x01, 0x00, 0x00,
1163 0x62, 0x02, 0x00, 0x00,
1164 };
1165
1166 static const uint16_t utf32Expected[]={
1167 0x0061,
1168 0xfffd, /* 0x110000 out of range */
1169 0xDBFF, /* 0x10FFFF in range */
1170 0xDFFF,
1171 0x0062,
1172 0xfffd, /* 0xffffffff out of range */
1173 0xfffd, /* 0x7fffffff out of range */
1174 0x0162,
1175 0x0262
1176 };
b75a7d8f
A
1177 static const int32_t utf32Offsets[]={
1178 0, 4, 8, 8, 12, 16, 20, 24, 28
1179 };
73c04bcf
A
1180 static const uint8_t utf32ExpectedBack[]={
1181 0x61, 0x00, 0x00, 0x00,
1182 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */
1183 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1184 0x62, 0x00, 0x00, 0x00,
1185 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */
1186 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */
1187 0x62, 0x01, 0x00, 0x00,
1188 0x62, 0x02, 0x00, 0x00
1189 };
1190 static const int32_t utf32OffsetsBack[]={
1191 0,0,0,0,
1192 1,1,1,1,
1193 2,2,2,2,
1194 4,4,4,4,
1195 5,5,5,5,
1196 6,6,6,6,
1197 7,7,7,7,
1198 8,8,8,8
1199 };
b75a7d8f
A
1200 testConvertToU(utf32, sizeof(utf32),
1201 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE );
73c04bcf
A
1202 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]),
1203 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE);
b75a7d8f
A
1204 }
1205}
1206
1207static void TestCoverageMBCS(){
1208#if 0
1209 UErrorCode status = U_ZERO_ERROR;
1210 const char *directory = loadTestData(&status);
1211 char* tdpath = NULL;
1212 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1213 int len = strlen(directory);
1214 char* index=NULL;
1215
1216 tdpath = (char*) malloc(sizeof(char) * (len * 2));
1217 uprv_strcpy(saveDirectory,u_getDataDirectory());
1218 log_verbose("Retrieved data directory %s \n",saveDirectory);
1219 uprv_strcpy(tdpath,directory);
1220 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1221
1222 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1223 *(index+1)=0;
1224 }
1225 u_setDataDirectory(tdpath);
1226 log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1227#endif
1228
1229 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
1230 which is test file for MBCS conversion with single-byte codepage data.*/
1231 {
1232
1233 /* MBCS with single byte codepage data test1.ucm*/
1234 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1235 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1236 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, };
1237
b75a7d8f
A
1238 /*from Unicode*/
1239 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1240 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
b75a7d8f
A
1241 }
1242
1243 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
1244 which is test file for MBCS conversion with three-byte codepage data.*/
1245 {
1246
1247 /* MBCS with three byte codepage data test3.ucm*/
1248 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1249 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,};
1250 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1251
1252 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1253 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1254 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1255
1256 /*from Unicode*/
1257 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1258 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1259
1260 /*to Unicode*/
1261 testConvertToU(test3input, sizeof(test3input),
1262 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE);
1263
1264 }
1265
1266 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
1267 which is test file for MBCS conversion with four-byte codepage data.*/
1268 {
1269
1270 /* MBCS with three byte codepage data test4.ucm*/
1271 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1272 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,};
1273 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1274
1275 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1276 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1277 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1278
1279 /*from Unicode*/
1280 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1281 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1282
1283 /*to Unicode*/
1284 testConvertToU(test4input, sizeof(test4input),
1285 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE );
1286
1287 }
1288#if 0
1289 free(tdpath);
1290 /* restore the original data directory */
1291 log_verbose("Setting the data directory to %s \n", saveDirectory);
1292 u_setDataDirectory(saveDirectory);
1293 free(saveDirectory);
1294#endif
1295
1296}
1297
1298static void TestConverterType(const char *convName, UConverterType convType) {
1299 UConverter* myConverter;
1300 UErrorCode err = U_ZERO_ERROR;
1301
1302 myConverter = my_ucnv_open(convName, &err);
1303
1304 if (U_FAILURE(err)) {
1305 log_data_err("Failed to create an %s converter\n", convName);
1306 return;
1307 }
1308 else
1309 {
1310 if (ucnv_getType(myConverter)!=convType) {
1311 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1312 convName, convType);
1313 }
1314 else {
1315 log_verbose("ucnv_getType %s ok\n", convName);
1316 }
1317 }
1318 ucnv_close(myConverter);
1319}
1320
1321static void TestConverterTypesAndStarters()
1322{
73c04bcf 1323#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
1324 UConverter* myConverter;
1325 UErrorCode err = U_ZERO_ERROR;
1326 UBool mystarters[256];
1327
1328/* const UBool expectedKSCstarters[256] = {
1329 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1330 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1331 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1332 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1333 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1334 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1335 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1336 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1337 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1338 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1339 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1340 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1341 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1342 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1343 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1344 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1345 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1346 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1347 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1348 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1349 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1350 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1351 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1352 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1353 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1354 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1355
1356
1357 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types.");
1358
1359 myConverter = ucnv_open("ksc", &err);
1360 if (U_FAILURE(err)) {
1361 log_data_err("Failed to create an ibm-ksc converter\n");
1362 return;
1363 }
1364 else
1365 {
1366 if (ucnv_getType(myConverter)!=UCNV_MBCS)
1367 log_err("ucnv_getType Failed for ibm-949\n");
1368 else
1369 log_verbose("ucnv_getType ibm-949 ok\n");
1370
1371 if(myConverter!=NULL)
1372 ucnv_getStarters(myConverter, mystarters, &err);
1373
1374 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1375 log_err("Failed ucnv_getStarters for ksc\n");
1376 else
1377 log_verbose("ucnv_getStarters ok\n");*/
1378
1379 }
1380 ucnv_close(myConverter);
1381
1382 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1383 TestConverterType("ibm-878", UCNV_SBCS);
73c04bcf
A
1384#endif
1385
b75a7d8f 1386 TestConverterType("iso-8859-1", UCNV_LATIN_1);
73c04bcf 1387
b75a7d8f 1388 TestConverterType("ibm-1208", UCNV_UTF8);
73c04bcf 1389
b75a7d8f
A
1390 TestConverterType("utf-8", UCNV_UTF8);
1391 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1392 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1393 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1394 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
73c04bcf
A
1395
1396#if !UCONFIG_NO_LEGACY_CONVERSION
1397
1398#if defined(U_ENABLE_GENERIC_ISO_2022)
b75a7d8f 1399 TestConverterType("iso-2022", UCNV_ISO_2022);
374ca955 1400#endif
73c04bcf 1401
b75a7d8f 1402 TestConverterType("hz", UCNV_HZ);
73c04bcf
A
1403#endif
1404
b75a7d8f 1405 TestConverterType("scsu", UCNV_SCSU);
73c04bcf
A
1406
1407#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f 1408 TestConverterType("x-iscii-de", UCNV_ISCII);
73c04bcf
A
1409#endif
1410
b75a7d8f
A
1411 TestConverterType("ascii", UCNV_US_ASCII);
1412 TestConverterType("utf-7", UCNV_UTF7);
1413 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1414 TestConverterType("bocu-1", UCNV_BOCU1);
1415}
1416
1417static void
1418TestAmbiguousConverter(UConverter *cnv) {
1419 static const char inBytes[2]={ 0x61, 0x5c };
1420 UChar outUnicode[20]={ 0, 0, 0, 0 };
1421
1422 const char *s;
1423 UChar *u;
1424 UErrorCode errorCode;
1425 UBool isAmbiguous;
1426
1427 /* try to convert an 'a' and a US-ASCII backslash */
1428 errorCode=U_ZERO_ERROR;
1429 s=inBytes;
1430 u=outUnicode;
1431 ucnv_toUnicode(cnv, &u, u+20, &s, s+2, NULL, TRUE, &errorCode);
1432 if(U_FAILURE(errorCode)) {
1433 /* we do not care about general failures in this test; the input may just not be mappable */
1434 return;
1435 }
1436
1437 if(outUnicode[0]!=0x61 || outUnicode[1]==0xfffd) {
1438 /* not an ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1439 return;
1440 }
1441
1442 isAmbiguous=ucnv_isAmbiguous(cnv);
1443
1444 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1445 if((outUnicode[1]!=0x5c)!=isAmbiguous) {
1446 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1447 ucnv_getName(cnv, &errorCode), outUnicode[1]!=0x5c, isAmbiguous);
1448 return;
1449 }
1450
1451 if(outUnicode[1]!=0x5c) {
1452 /* needs fixup, fix it */
1453 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1454 if(outUnicode[1]!=0x5c) {
1455 /* the fix failed */
1456 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1457 return;
1458 }
1459 }
1460}
1461
1462static void TestAmbiguous()
1463{
1464 UErrorCode status = U_ZERO_ERROR;
1465 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
73c04bcf 1466 static const char target[] = {
b75a7d8f
A
1467 /* "\\usr\\local\\share\\data\\icutest.txt" */
1468 0x5c, 0x75, 0x73, 0x72,
1469 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1470 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1471 0x5c, 0x64, 0x61, 0x74, 0x61,
1472 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1473 0
1474 };
1475 UChar asciiResult[200], sjisResult[200];
1476 int32_t asciiLength = 0, sjisLength = 0, i;
1477 const char *name;
1478
1479 /* enumerate all converters */
1480 status=U_ZERO_ERROR;
1481 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1482 cnv=ucnv_open(name, &status);
1483 if(U_SUCCESS(status)) {
1484 TestAmbiguousConverter(cnv);
1485 ucnv_close(cnv);
1486 } else {
1487 log_err("error: unable to open available converter \"%s\"\n", name);
1488 status=U_ZERO_ERROR;
1489 }
1490 }
1491
73c04bcf 1492#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
1493 sjis_cnv = ucnv_open("ibm-943", &status);
1494 if (U_FAILURE(status))
1495 {
1496 log_data_err("Failed to create a SJIS converter\n");
1497 return;
1498 }
1499 ascii_cnv = ucnv_open("LATIN-1", &status);
1500 if (U_FAILURE(status))
1501 {
1502 log_data_err("Failed to create a LATIN-1 converter\n");
1503 ucnv_close(sjis_cnv);
1504 return;
1505 }
1506 /* convert target from SJIS to Unicode */
73c04bcf 1507 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
b75a7d8f
A
1508 if (U_FAILURE(status))
1509 {
1510 log_err("Failed to convert the SJIS string.\n");
1511 ucnv_close(sjis_cnv);
1512 ucnv_close(ascii_cnv);
1513 return;
1514 }
1515 /* convert target from Latin-1 to Unicode */
73c04bcf 1516 asciiLength = ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
b75a7d8f
A
1517 if (U_FAILURE(status))
1518 {
1519 log_err("Failed to convert the Latin-1 string.\n");
1520 free(sjisResult);
1521 ucnv_close(sjis_cnv);
1522 ucnv_close(ascii_cnv);
1523 return;
1524 }
1525 if (!ucnv_isAmbiguous(sjis_cnv))
1526 {
1527 log_err("SJIS converter should contain ambiguous character mappings.\n");
1528 free(sjisResult);
1529 free(asciiResult);
1530 ucnv_close(sjis_cnv);
1531 ucnv_close(ascii_cnv);
1532 return;
1533 }
1534 if (u_strcmp(sjisResult, asciiResult) == 0)
1535 {
1536 log_err("File separators for SJIS don't need to be fixed.\n");
1537 }
1538 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1539 if (u_strcmp(sjisResult, asciiResult) != 0)
1540 {
1541 log_err("Fixing file separator for SJIS failed.\n");
1542 }
1543 ucnv_close(sjis_cnv);
1544 ucnv_close(ascii_cnv);
73c04bcf 1545#endif
b75a7d8f
A
1546}
1547
1548static void
1549TestSignatureDetection(){
1550 /* with null terminated strings */
1551 {
1552 static const char* data[] = {
1553 "\xFE\xFF\x00\x00", /* UTF-16BE */
1554 "\xFF\xFE\x00\x00", /* UTF-16LE */
1555 "\xEF\xBB\xBF\x00", /* UTF-8 */
1556 "\x0E\xFE\xFF\x00", /* SCSU */
1557
1558 "\xFE\xFF", /* UTF-16BE */
1559 "\xFF\xFE", /* UTF-16LE */
1560 "\xEF\xBB\xBF", /* UTF-8 */
1561 "\x0E\xFE\xFF", /* SCSU */
1562
1563 "\xFE\xFF\x41\x42", /* UTF-16BE */
1564 "\xFF\xFE\x41\x41", /* UTF-16LE */
1565 "\xEF\xBB\xBF\x41", /* UTF-8 */
1566 "\x0E\xFE\xFF\x41", /* SCSU */
1567
1568 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */
1569 "\x2B\x2F\x76\x38\x41", /* UTF-7 */
1570 "\x2B\x2F\x76\x39\x41", /* UTF-7 */
1571 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */
374ca955
A
1572 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */
1573
1574 "\xDD\x73\x66\x73" /* UTF-EBCDIC */
b75a7d8f
A
1575 };
1576 static const char* expected[] = {
1577 "UTF-16BE",
1578 "UTF-16LE",
1579 "UTF-8",
1580 "SCSU",
1581
1582 "UTF-16BE",
1583 "UTF-16LE",
1584 "UTF-8",
1585 "SCSU",
1586
1587 "UTF-16BE",
1588 "UTF-16LE",
1589 "UTF-8",
1590 "SCSU",
1591
1592 "UTF-7",
1593 "UTF-7",
1594 "UTF-7",
1595 "UTF-7",
374ca955
A
1596 "UTF-7",
1597 "UTF-EBCDIC"
b75a7d8f
A
1598 };
1599 static const int32_t expectedLength[] ={
1600 2,
1601 2,
1602 3,
1603 3,
1604
1605 2,
1606 2,
1607 3,
1608 3,
1609
1610 2,
1611 2,
1612 3,
1613 3,
1614
1615 5,
1616 4,
1617 4,
1618 4,
374ca955 1619 4,
b75a7d8f
A
1620 4
1621 };
1622 int i=0;
1623 UErrorCode err;
1624 int32_t signatureLength = -1;
1625 const char* source = NULL;
1626 const char* enc = NULL;
1627 for( ; i<sizeof(data)/sizeof(char*); i++){
1628 err = U_ZERO_ERROR;
1629 source = data[i];
1630 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1631 if(U_FAILURE(err)){
1632 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1633 continue;
1634 }
1635 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1636 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1637 continue;
1638 }
1639 if(signatureLength != expectedLength[i]){
1640 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1641 }
1642 }
1643 }
1644 {
1645 static const char* data[] = {
1646 "\xFE\xFF\x00", /* UTF-16BE */
1647 "\xFF\xFE\x00", /* UTF-16LE */
1648 "\xEF\xBB\xBF\x00", /* UTF-8 */
1649 "\x0E\xFE\xFF\x00", /* SCSU */
1650 "\x00\x00\xFE\xFF", /* UTF-32BE */
1651 "\xFF\xFE\x00\x00", /* UTF-32LE */
1652 "\xFE\xFF", /* UTF-16BE */
1653 "\xFF\xFE", /* UTF-16LE */
1654 "\xEF\xBB\xBF", /* UTF-8 */
1655 "\x0E\xFE\xFF", /* SCSU */
1656 "\x00\x00\xFE\xFF", /* UTF-32BE */
1657 "\xFF\xFE\x00\x00", /* UTF-32LE */
1658 "\xFE\xFF\x41\x42", /* UTF-16BE */
1659 "\xFF\xFE\x41\x41", /* UTF-16LE */
1660 "\xEF\xBB\xBF\x41", /* UTF-8 */
1661 "\x0E\xFE\xFF\x41", /* SCSU */
1662 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1663 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1664 "\xFB\xEE\x28", /* BOCU-1 */
1665 "\xFF\x41\x42" /* NULL */
1666 };
1667 static const int len[] = {
1668 3,
1669 3,
1670 4,
1671 4,
1672 4,
1673 4,
1674 2,
1675 2,
1676 3,
1677 3,
1678 4,
1679 4,
1680 4,
1681 4,
1682 4,
1683 4,
1684 5,
1685 5,
1686 3,
1687 3
1688 };
1689
1690 static const char* expected[] = {
1691 "UTF-16BE",
1692 "UTF-16LE",
1693 "UTF-8",
1694 "SCSU",
1695 "UTF-32BE",
1696 "UTF-32LE",
1697 "UTF-16BE",
1698 "UTF-16LE",
1699 "UTF-8",
1700 "SCSU",
1701 "UTF-32BE",
1702 "UTF-32LE",
1703 "UTF-16BE",
1704 "UTF-16LE",
1705 "UTF-8",
1706 "SCSU",
1707 "UTF-32BE",
1708 "UTF-32LE",
1709 "BOCU-1",
1710 NULL
1711 };
1712 static const int32_t expectedLength[] ={
1713 2,
1714 2,
1715 3,
1716 3,
1717 4,
1718 4,
1719 2,
1720 2,
1721 3,
1722 3,
1723 4,
1724 4,
1725 2,
1726 2,
1727 3,
1728 3,
1729 4,
1730 4,
1731 3,
1732 0
1733 };
1734 int i=0;
1735 UErrorCode err;
1736 int32_t signatureLength = -1;
1737 int32_t sourceLength=-1;
1738 const char* source = NULL;
1739 const char* enc = NULL;
1740 for( ; i<sizeof(data)/sizeof(char*); i++){
1741 err = U_ZERO_ERROR;
1742 source = data[i];
1743 sourceLength = len[i];
1744 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1745 if(U_FAILURE(err)){
1746 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1747 continue;
1748 }
1749 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1750 if(expected[i] !=NULL){
1751 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1752 continue;
1753 }
1754 }
1755 if(signatureLength != expectedLength[i]){
1756 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1757 }
1758 }
1759 }
1760}
1761
1762void
1763static TestUTF7() {
1764 /* test input */
1765 static const uint8_t in[]={
1766 /* H - +Jjo- - ! +- +2AHcAQ */
1767 0x48,
1768 0x2d,
1769 0x2b, 0x4a, 0x6a, 0x6f,
1770 0x2d, 0x2d,
1771 0x21,
1772 0x2b, 0x2d,
1773 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1774 };
1775
1776 /* expected test results */
374ca955 1777 static const int32_t results[]={
b75a7d8f
A
1778 /* number of bytes read, code point */
1779 1, 0x48,
1780 1, 0x2d,
1781 4, 0x263a, /* <WHITE SMILING FACE> */
1782 2, 0x2d,
1783 1, 0x21,
1784 2, 0x2b,
1785 7, 0x10401
1786 };
1787
1788 const char *cnvName;
1789 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1790 UErrorCode errorCode=U_ZERO_ERROR;
1791 UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1792 if(U_FAILURE(errorCode)) {
1793 log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1794 return;
1795 }
1796 TestNextUChar(cnv, source, limit, results, "UTF-7");
1797 /* Test the condition when source >= sourceLimit */
1798 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1799 cnvName = ucnv_getName(cnv, &errorCode);
1800 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1801 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1802 }
1803 ucnv_close(cnv);
1804}
1805
1806void
1807static TestIMAP() {
1808 /* test input */
1809 static const uint8_t in[]={
1810 /* H - &Jjo- - ! &- &2AHcAQ- \ */
1811 0x48,
1812 0x2d,
1813 0x26, 0x4a, 0x6a, 0x6f,
1814 0x2d, 0x2d,
1815 0x21,
1816 0x26, 0x2d,
1817 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1818 };
1819
1820 /* expected test results */
374ca955 1821 static const int32_t results[]={
b75a7d8f
A
1822 /* number of bytes read, code point */
1823 1, 0x48,
1824 1, 0x2d,
1825 4, 0x263a, /* <WHITE SMILING FACE> */
1826 2, 0x2d,
1827 1, 0x21,
1828 2, 0x26,
1829 7, 0x10401
1830 };
1831
1832 const char *cnvName;
1833 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1834 UErrorCode errorCode=U_ZERO_ERROR;
1835 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1836 if(U_FAILURE(errorCode)) {
1837 log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1838 return;
1839 }
1840 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1841 /* Test the condition when source >= sourceLimit */
1842 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1843 cnvName = ucnv_getName(cnv, &errorCode);
1844 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1845 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1846 }
1847 ucnv_close(cnv);
1848}
1849
1850void
1851static TestUTF8() {
1852 /* test input */
1853 static const uint8_t in[]={
1854 0x61,
1855 0xc2, 0x80,
1856 0xe0, 0xa0, 0x80,
1857 0xf0, 0x90, 0x80, 0x80,
1858 0xf4, 0x84, 0x8c, 0xa1,
1859 0xf0, 0x90, 0x90, 0x81
1860 };
1861
1862 /* expected test results */
374ca955 1863 static const int32_t results[]={
b75a7d8f
A
1864 /* number of bytes read, code point */
1865 1, 0x61,
1866 2, 0x80,
1867 3, 0x800,
1868 4, 0x10000,
1869 4, 0x104321,
1870 4, 0x10401
1871 };
1872
1873 /* error test input */
1874 static const uint8_t in2[]={
1875 0x61,
1876 0xc0, 0x80, /* illegal non-shortest form */
1877 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1878 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1879 0xc0, 0xc0, /* illegal trail byte */
1880 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1881 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1882 0xfe, /* illegal byte altogether */
1883 0x62
1884 };
1885
1886 /* expected error test results */
374ca955 1887 static const int32_t results2[]={
b75a7d8f
A
1888 /* number of bytes read, code point */
1889 1, 0x61,
1890 22, 0x62
1891 };
1892
1893 UConverterToUCallback cb;
1894 const void *p;
1895
1896 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1897 UErrorCode errorCode=U_ZERO_ERROR;
1898 UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1899 if(U_FAILURE(errorCode)) {
1900 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1901 return;
1902 }
1903 TestNextUChar(cnv, source, limit, results, "UTF-8");
1904 /* Test the condition when source >= sourceLimit */
1905 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1906
1907 /* test error behavior with a skip callback */
1908 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1909 source=(const char *)in2;
1910 limit=(const char *)(in2+sizeof(in2));
1911 TestNextUChar(cnv, source, limit, results2, "UTF-8");
1912
1913 ucnv_close(cnv);
1914}
1915
1916void
1917static TestCESU8() {
1918 /* test input */
1919 static const uint8_t in[]={
1920 0x61,
1921 0xc2, 0x80,
1922 0xe0, 0xa0, 0x80,
1923 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1924 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1925 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1926 0xef, 0xbf, 0xbc
1927 };
1928
1929 /* expected test results */
374ca955 1930 static const int32_t results[]={
b75a7d8f
A
1931 /* number of bytes read, code point */
1932 1, 0x61,
1933 2, 0x80,
1934 3, 0x800,
1935 6, 0x10000,
1936 3, 0xdc01,
374ca955
A
1937 -1,0xd802, /* may read 3 or 6 bytes */
1938 -1,0x10ffff,/* may read 0 or 3 bytes */
b75a7d8f
A
1939 3, 0xfffc
1940 };
1941
1942 /* error test input */
1943 static const uint8_t in2[]={
1944 0x61,
1945 0xc0, 0x80, /* illegal non-shortest form */
1946 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1947 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1948 0xc0, 0xc0, /* illegal trail byte */
1949 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */
1950 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */
1951 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */
1952 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1953 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1954 0xfe, /* illegal byte altogether */
1955 0x62
1956 };
1957
1958 /* expected error test results */
374ca955 1959 static const int32_t results2[]={
b75a7d8f
A
1960 /* number of bytes read, code point */
1961 1, 0x61,
1962 34, 0x62
1963 };
1964
1965 UConverterToUCallback cb;
1966 const void *p;
1967
1968 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1969 UErrorCode errorCode=U_ZERO_ERROR;
1970 UConverter *cnv=ucnv_open("CESU-8", &errorCode);
1971 if(U_FAILURE(errorCode)) {
1972 log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
1973 return;
1974 }
1975 TestNextUChar(cnv, source, limit, results, "CESU-8");
1976 /* Test the condition when source >= sourceLimit */
1977 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1978
1979 /* test error behavior with a skip callback */
1980 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1981 source=(const char *)in2;
1982 limit=(const char *)(in2+sizeof(in2));
1983 TestNextUChar(cnv, source, limit, results2, "CESU-8");
1984
1985 ucnv_close(cnv);
1986}
1987
1988void
1989static TestUTF16() {
1990 /* test input */
1991 static const uint8_t in1[]={
1992 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
1993 };
1994 static const uint8_t in2[]={
1995 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
1996 };
1997 static const uint8_t in3[]={
1998 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
1999 };
2000
2001 /* expected test results */
374ca955 2002 static const int32_t results1[]={
b75a7d8f
A
2003 /* number of bytes read, code point */
2004 4, 0x4e00,
2005 2, 0xfeff
2006 };
374ca955 2007 static const int32_t results2[]={
b75a7d8f
A
2008 /* number of bytes read, code point */
2009 4, 0x004e,
2010 2, 0xfffe
2011 };
374ca955 2012 static const int32_t results3[]={
b75a7d8f
A
2013 /* number of bytes read, code point */
2014 2, 0xfefe,
2015 2, 0x4e00,
2016 2, 0xfeff,
2017 4, 0x20001
2018 };
2019
2020 const char *source, *limit;
2021
2022 UErrorCode errorCode=U_ZERO_ERROR;
2023 UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2024 if(U_FAILURE(errorCode)) {
2025 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2026 return;
2027 }
2028
2029 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2030 TestNextUChar(cnv, source, limit, results1, "UTF-16");
2031
2032 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2033 ucnv_resetToUnicode(cnv);
2034 TestNextUChar(cnv, source, limit, results2, "UTF-16");
2035
2036 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2037 ucnv_resetToUnicode(cnv);
2038 TestNextUChar(cnv, source, limit, results3, "UTF-16");
2039
2040 /* Test the condition when source >= sourceLimit */
2041 ucnv_resetToUnicode(cnv);
2042 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2043
2044 ucnv_close(cnv);
2045}
2046
2047void
2048static TestUTF16BE() {
2049 /* test input */
2050 static const uint8_t in[]={
2051 0x00, 0x61,
2052 0x00, 0xc0,
2053 0x00, 0x31,
2054 0x00, 0xf4,
2055 0xce, 0xfe,
2056 0xd8, 0x01, 0xdc, 0x01
2057 };
2058
2059 /* expected test results */
374ca955 2060 static const int32_t results[]={
b75a7d8f
A
2061 /* number of bytes read, code point */
2062 2, 0x61,
2063 2, 0xc0,
2064 2, 0x31,
2065 2, 0xf4,
2066 2, 0xcefe,
2067 4, 0x10401
2068 };
2069
2070 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2071 UErrorCode errorCode=U_ZERO_ERROR;
2072 UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2073 if(U_FAILURE(errorCode)) {
2074 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2075 return;
2076 }
2077 TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2078 /* Test the condition when source >= sourceLimit */
2079 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2080 /*Test for the condition where there is an invalid character*/
2081 {
2082 static const uint8_t source2[]={0x61};
374ca955 2083 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
b75a7d8f
A
2084 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2085 }
374ca955
A
2086#if 0
2087 /*
2088 * Test disabled because currently the UTF-16BE/LE converters are supposed
2089 * to not set errors for unpaired surrogates.
2090 * This may change with
2091 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2092 */
2093
b75a7d8f
A
2094 /*Test for the condition where there is a surrogate pair*/
2095 {
2096 const uint8_t source2[]={0xd8, 0x01};
2097 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2098 }
374ca955 2099#endif
b75a7d8f
A
2100 ucnv_close(cnv);
2101}
2102
2103static void
2104TestUTF16LE() {
2105 /* test input */
2106 static const uint8_t in[]={
2107 0x61, 0x00,
2108 0x31, 0x00,
2109 0x4e, 0x2e,
2110 0x4e, 0x00,
2111 0x01, 0xd8, 0x01, 0xdc
2112 };
2113
2114 /* expected test results */
374ca955 2115 static const int32_t results[]={
b75a7d8f
A
2116 /* number of bytes read, code point */
2117 2, 0x61,
2118 2, 0x31,
2119 2, 0x2e4e,
2120 2, 0x4e,
2121 4, 0x10401
2122 };
2123
2124 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2125 UErrorCode errorCode=U_ZERO_ERROR;
2126 UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2127 if(U_FAILURE(errorCode)) {
2128 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2129 return;
2130 }
2131 TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2132 /* Test the condition when source >= sourceLimit */
2133 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2134 /*Test for the condition where there is an invalid character*/
2135 {
2136 static const uint8_t source2[]={0x61};
374ca955 2137 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
b75a7d8f
A
2138 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2139 }
374ca955
A
2140#if 0
2141 /*
2142 * Test disabled because currently the UTF-16BE/LE converters are supposed
2143 * to not set errors for unpaired surrogates.
2144 * This may change with
2145 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2146 */
2147
b75a7d8f
A
2148 /*Test for the condition where there is a surrogate character*/
2149 {
2150 static const uint8_t source2[]={0x01, 0xd8};
2151 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2152 }
374ca955 2153#endif
b75a7d8f
A
2154
2155 ucnv_close(cnv);
2156}
2157
2158void
2159static TestUTF32() {
2160 /* test input */
2161 static const uint8_t in1[]={
2162 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff
2163 };
2164 static const uint8_t in2[]={
2165 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00
2166 };
2167 static const uint8_t in3[]={
2168 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01
2169 };
2170
2171 /* expected test results */
374ca955 2172 static const int32_t results1[]={
b75a7d8f
A
2173 /* number of bytes read, code point */
2174 8, 0x100f00,
2175 4, 0xfeff
2176 };
374ca955 2177 static const int32_t results2[]={
b75a7d8f
A
2178 /* number of bytes read, code point */
2179 8, 0x0f1000,
2180 4, 0xfffe
2181 };
374ca955 2182 static const int32_t results3[]={
b75a7d8f
A
2183 /* number of bytes read, code point */
2184 4, 0xfefe,
2185 4, 0x100f00,
374ca955
A
2186 4, 0xfffd, /* unmatched surrogate */
2187 4, 0xfffd /* unmatched surrogate */
b75a7d8f
A
2188 };
2189
2190 const char *source, *limit;
2191
2192 UErrorCode errorCode=U_ZERO_ERROR;
2193 UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2194 if(U_FAILURE(errorCode)) {
2195 log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2196 return;
2197 }
2198
2199 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2200 TestNextUChar(cnv, source, limit, results1, "UTF-32");
2201
2202 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2203 ucnv_resetToUnicode(cnv);
2204 TestNextUChar(cnv, source, limit, results2, "UTF-32");
2205
2206 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2207 ucnv_resetToUnicode(cnv);
2208 TestNextUChar(cnv, source, limit, results3, "UTF-32");
2209
2210 /* Test the condition when source >= sourceLimit */
2211 ucnv_resetToUnicode(cnv);
2212 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2213
2214 ucnv_close(cnv);
2215}
2216
2217static void
2218TestUTF32BE() {
2219 /* test input */
2220 static const uint8_t in[]={
2221 0x00, 0x00, 0x00, 0x61,
374ca955 2222 0x00, 0x00, 0x30, 0x61,
b75a7d8f
A
2223 0x00, 0x00, 0xdc, 0x00,
2224 0x00, 0x00, 0xd8, 0x00,
2225 0x00, 0x00, 0xdf, 0xff,
374ca955 2226 0x00, 0x00, 0xff, 0xfe,
b75a7d8f
A
2227 0x00, 0x10, 0xab, 0xcd,
2228 0x00, 0x10, 0xff, 0xff
2229 };
2230
2231 /* expected test results */
374ca955 2232 static const int32_t results[]={
b75a7d8f
A
2233 /* number of bytes read, code point */
2234 4, 0x61,
374ca955
A
2235 4, 0x3061,
2236 4, 0xfffd,
2237 4, 0xfffd,
b75a7d8f 2238 4, 0xfffd,
374ca955 2239 4, 0xfffe,
b75a7d8f
A
2240 4, 0x10abcd,
2241 4, 0x10ffff
2242 };
2243
2244 /* error test input */
2245 static const uint8_t in2[]={
2246 0x00, 0x00, 0x00, 0x61,
2247 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
2248 0x00, 0x00, 0x00, 0x62,
2249 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2250 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
2251 0x00, 0x00, 0x01, 0x62,
2252 0x00, 0x00, 0x02, 0x62
2253 };
2254
2255 /* expected error test results */
374ca955 2256 static const int32_t results2[]={
b75a7d8f
A
2257 /* number of bytes read, code point */
2258 4, 0x61,
2259 8, 0x62,
2260 12, 0x162,
2261 4, 0x262
2262 };
2263
2264 UConverterToUCallback cb;
2265 const void *p;
2266
2267 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2268 UErrorCode errorCode=U_ZERO_ERROR;
2269 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2270 if(U_FAILURE(errorCode)) {
2271 log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2272 return;
2273 }
2274 TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2275
2276 /* Test the condition when source >= sourceLimit */
2277 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2278
2279 /* test error behavior with a skip callback */
2280 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2281 source=(const char *)in2;
2282 limit=(const char *)(in2+sizeof(in2));
2283 TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2284
2285 ucnv_close(cnv);
2286}
2287
2288static void
2289TestUTF32LE() {
2290 /* test input */
2291 static const uint8_t in[]={
2292 0x61, 0x00, 0x00, 0x00,
374ca955 2293 0x61, 0x30, 0x00, 0x00,
b75a7d8f
A
2294 0x00, 0xdc, 0x00, 0x00,
2295 0x00, 0xd8, 0x00, 0x00,
2296 0xff, 0xdf, 0x00, 0x00,
374ca955 2297 0xfe, 0xff, 0x00, 0x00,
b75a7d8f
A
2298 0xcd, 0xab, 0x10, 0x00,
2299 0xff, 0xff, 0x10, 0x00
2300 };
2301
2302 /* expected test results */
374ca955 2303 static const int32_t results[]={
b75a7d8f
A
2304 /* number of bytes read, code point */
2305 4, 0x61,
374ca955 2306 4, 0x3061,
b75a7d8f 2307 4, 0xfffd,
374ca955
A
2308 4, 0xfffd,
2309 4, 0xfffd,
2310 4, 0xfffe,
b75a7d8f
A
2311 4, 0x10abcd,
2312 4, 0x10ffff
2313 };
2314
2315 /* error test input */
2316 static const uint8_t in2[]={
2317 0x61, 0x00, 0x00, 0x00,
2318 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
2319 0x62, 0x00, 0x00, 0x00,
2320 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2321 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
2322 0x62, 0x01, 0x00, 0x00,
2323 0x62, 0x02, 0x00, 0x00,
2324 };
2325
2326 /* expected error test results */
374ca955 2327 static const int32_t results2[]={
b75a7d8f
A
2328 /* number of bytes read, code point */
2329 4, 0x61,
2330 8, 0x62,
2331 12, 0x162,
2332 4, 0x262,
2333 };
2334
2335 UConverterToUCallback cb;
2336 const void *p;
2337
2338 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2339 UErrorCode errorCode=U_ZERO_ERROR;
2340 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2341 if(U_FAILURE(errorCode)) {
2342 log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2343 return;
2344 }
2345 TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2346
2347 /* Test the condition when source >= sourceLimit */
2348 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2349
2350 /* test error behavior with a skip callback */
2351 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2352 source=(const char *)in2;
2353 limit=(const char *)(in2+sizeof(in2));
2354 TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2355
2356 ucnv_close(cnv);
2357}
2358
2359static void
2360TestLATIN1() {
2361 /* test input */
2362 static const uint8_t in[]={
2363 0x61,
2364 0x31,
2365 0x32,
2366 0xc0,
2367 0xf0,
2368 0xf4,
2369 };
2370
2371 /* expected test results */
374ca955 2372 static const int32_t results[]={
b75a7d8f
A
2373 /* number of bytes read, code point */
2374 1, 0x61,
2375 1, 0x31,
2376 1, 0x32,
2377 1, 0xc0,
2378 1, 0xf0,
2379 1, 0xf4,
2380 };
2381 static const uint16_t in1[] = {
2382 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2383 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2384 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2385 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2386 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2387 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2388 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2389 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2390 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2391 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2392 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2393 0xcb, 0x82
2394 };
2395 static const uint8_t out1[] = {
2396 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2397 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2398 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2399 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2400 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2401 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2402 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2403 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2404 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2405 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2406 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2407 0xcb, 0x82
2408 };
2409 static const uint16_t in2[]={
2410 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2411 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2412 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2413 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2414 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2415 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2416 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2417 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2418 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2419 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2420 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2421 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2422 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2423 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2424 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2425 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2426 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2427 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2428 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2429 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2430 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2431 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2432 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2433 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2434 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2435 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2436 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2437 0x37, 0x20, 0x2A, 0x2F,
2438 };
2439 static const unsigned char out2[]={
2440 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2441 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2442 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2443 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2444 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2445 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2446 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2447 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2448 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2449 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2450 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2451 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2452 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2453 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2454 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2455 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2456 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2457 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2458 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2459 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2460 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2461 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2462 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2463 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2464 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2465 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2466 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2467 0x37, 0x20, 0x2A, 0x2F,
2468 };
2469 const char *source=(const char *)in;
2470 const char *limit=(const char *)in+sizeof(in);
2471
2472 UErrorCode errorCode=U_ZERO_ERROR;
2473 UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2474 if(U_FAILURE(errorCode)) {
2475 log_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2476 return;
2477 }
2478 TestNextUChar(cnv, source, limit, results, "LATIN_1");
2479 /* Test the condition when source >= sourceLimit */
2480 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2481 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2482 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2483
2484 ucnv_close(cnv);
2485}
2486
2487static void
2488TestSBCS() {
2489 /* test input */
2490 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2491 /* expected test results */
374ca955 2492 static const int32_t results[]={
b75a7d8f
A
2493 /* number of bytes read, code point */
2494 1, 0x61,
2495 1, 0xbf,
2496 1, 0xc4,
2497 1, 0x2021,
2498 1, 0xf8ff,
2499 1, 0x00d9
2500 };
2501
2502 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2503 UErrorCode errorCode=U_ZERO_ERROR;
374ca955 2504 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
b75a7d8f 2505 if(U_FAILURE(errorCode)) {
374ca955 2506 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
b75a7d8f
A
2507 return;
2508 }
374ca955 2509 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
b75a7d8f
A
2510 /* Test the condition when source >= sourceLimit */
2511 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2512 /*Test for Illegal character */ /*
2513 {
2514 static const uint8_t input1[]={ 0xA1 };
2515 const char* illegalsource=(const char*)input1;
2516 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2517 }
2518 */
2519 ucnv_close(cnv);
2520}
2521
2522static void
2523TestDBCS() {
2524 /* test input */
2525 static const uint8_t in[]={
2526 0x44, 0x6a,
2527 0xc4, 0x9c,
2528 0x7a, 0x74,
2529 0x46, 0xab,
2530 0x42, 0x5b,
2531
2532 };
2533
2534 /* expected test results */
374ca955 2535 static const int32_t results[]={
b75a7d8f
A
2536 /* number of bytes read, code point */
2537 2, 0x00a7,
2538 2, 0xe1d2,
2539 2, 0x6962,
2540 2, 0xf842,
2541 2, 0xffe5,
2542 };
2543
2544 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2545 UErrorCode errorCode=U_ZERO_ERROR;
2546
2547 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2548 if(U_FAILURE(errorCode)) {
2549 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2550 return;
2551 }
2552 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2553 /* Test the condition when source >= sourceLimit */
2554 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
b75a7d8f
A
2555 /*Test for the condition where there is an invalid character*/
2556 {
2557 static const uint8_t source2[]={0x1a, 0x1b};
2558 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2559 }
374ca955
A
2560 /*Test for the condition where we have a truncated char*/
2561 {
2562 static const uint8_t source1[]={0xc4};
2563 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2564 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2565 }
b75a7d8f
A
2566 ucnv_close(cnv);
2567}
2568
2569static void
2570TestMBCS() {
2571 /* test input */
2572 static const uint8_t in[]={
2573 0x01,
2574 0xa6, 0xa3,
2575 0x00,
2576 0xa6, 0xa1,
2577 0x08,
2578 0xc2, 0x76,
2579 0xc2, 0x78,
2580
2581 };
2582
2583 /* expected test results */
374ca955 2584 static const int32_t results[]={
b75a7d8f
A
2585 /* number of bytes read, code point */
2586 1, 0x0001,
2587 2, 0x250c,
2588 1, 0x0000,
2589 2, 0x2500,
2590 1, 0x0008,
2591 2, 0xd60c,
2592 2, 0xd60e,
2593 };
2594
2595 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2596 UErrorCode errorCode=U_ZERO_ERROR;
2597
2598 UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2599 if(U_FAILURE(errorCode)) {
2600 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2601 return;
2602 }
2603 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2604 /* Test the condition when source >= sourceLimit */
2605 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
b75a7d8f
A
2606 /*Test for the condition where there is an invalid character*/
2607 {
2608 static const uint8_t source2[]={0xa1, 0x01};
2609 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2610 }
374ca955
A
2611 /*Test for the condition where we have a truncated char*/
2612 {
2613 static const uint8_t source1[]={0xc4};
2614 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2615 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2616 }
b75a7d8f
A
2617 ucnv_close(cnv);
2618
2619}
2620
374ca955
A
2621#ifdef U_ENABLE_GENERIC_ISO_2022
2622
b75a7d8f
A
2623static void
2624TestISO_2022() {
2625 /* test input */
2626 static const uint8_t in[]={
374ca955
A
2627 0x1b, 0x25, 0x42,
2628 0x31,
b75a7d8f
A
2629 0x32,
2630 0x61,
2631 0xc2, 0x80,
2632 0xe0, 0xa0, 0x80,
2633 0xf0, 0x90, 0x80, 0x80
2634 };
2635
2636
2637
2638 /* expected test results */
374ca955 2639 static const int32_t results[]={
b75a7d8f 2640 /* number of bytes read, code point */
374ca955 2641 4, 0x0031, /* 4 bytes including the escape sequence */
b75a7d8f
A
2642 1, 0x0032,
2643 1, 0x61,
2644 2, 0x80,
2645 3, 0x800,
374ca955 2646 4, 0x10000
b75a7d8f
A
2647 };
2648
2649 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2650 UErrorCode errorCode=U_ZERO_ERROR;
2651 UConverter *cnv;
2652
2653 cnv=ucnv_open("ISO_2022", &errorCode);
2654 if(U_FAILURE(errorCode)) {
2655 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2656 return;
2657 }
2658 TestNextUChar(cnv, source, limit, results, "ISO_2022");
2659
2660 /* Test the condition when source >= sourceLimit */
2661 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2662 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2663 /*Test for the condition where we have a truncated char*/
2664 {
2665 static const uint8_t source1[]={0xc4};
374ca955 2666 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
b75a7d8f
A
2667 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2668 }
2669 /*Test for the condition where there is an invalid character*/
2670 {
2671 static const uint8_t source2[]={0xa1, 0x01};
374ca955 2672 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
b75a7d8f
A
2673 }
2674 ucnv_close(cnv);
2675}
2676
374ca955
A
2677#endif
2678
b75a7d8f
A
2679static void
2680TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2681 const UChar* uSource;
2682 const UChar* uSourceLimit;
2683 const char* cSource;
2684 const char* cSourceLimit;
2685 UChar *uTargetLimit =NULL;
2686 UChar *uTarget;
2687 char *cTarget;
2688 const char *cTargetLimit;
2689 char *cBuf;
2690 UChar *uBuf,*test;
2691 int32_t uBufSize = 120;
2692 int len=0;
2693 int i=2;
2694 UErrorCode errorCode=U_ZERO_ERROR;
2695 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2696 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2697 ucnv_reset(cnv);
2698 for(;--i>0; ){
2699 uSource = (UChar*) source;
2700 uSourceLimit=(const UChar*)sourceLimit;
2701 cTarget = cBuf;
2702 uTarget = uBuf;
2703 cSource = cBuf;
2704 cTargetLimit = cBuf;
2705 uTargetLimit = uBuf;
2706
2707 do{
2708
2709 cTargetLimit = cTargetLimit+ i;
2710 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2711 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2712 errorCode=U_ZERO_ERROR;
2713 continue;
2714 }
2715
2716 if(U_FAILURE(errorCode)){
2717 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2718 return;
2719 }
2720
2721 }while (uSource<uSourceLimit);
2722
2723 cSourceLimit =cTarget;
2724 do{
2725 uTargetLimit=uTargetLimit+i;
2726 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2727 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2728 errorCode=U_ZERO_ERROR;
2729 continue;
2730 }
2731 if(U_FAILURE(errorCode)){
2732 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2733 return;
2734 }
2735 }while(cSource<cSourceLimit);
2736
2737 uSource = source;
2738 test =uBuf;
2739 for(len=0;len<(int)(source - sourceLimit);len++){
2740 if(uBuf[len]!=uSource[len]){
2741 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2742 }
2743 }
2744 }
2745 free(uBuf);
2746 free(cBuf);
2747}
2748/* Test for Jitterbug 778 */
2749static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2750 const UChar* uSource;
2751 const UChar* uSourceLimit;
2752 const char* cSource;
2753 UChar *uTargetLimit =NULL;
2754 UChar *uTarget;
2755 char *cTarget;
2756 const char *cTargetLimit;
2757 char *cBuf;
2758 UChar *uBuf,*test;
2759 int32_t uBufSize = 120;
2760 int numCharsInTarget=0;
2761 UErrorCode errorCode=U_ZERO_ERROR;
2762 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2763 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2764 uSource = source;
2765 uSourceLimit=sourceLimit;
2766 cTarget = cBuf;
2767 cTargetLimit = cBuf +uBufSize*5;
2768 uTarget = uBuf;
2769 uTargetLimit = uBuf+ uBufSize*5;
2770 ucnv_reset(cnv);
73c04bcf 2771 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
b75a7d8f
A
2772 if(U_FAILURE(errorCode)){
2773 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2774 return;
2775 }
2776 cSource = cBuf;
2777 test =uBuf;
73c04bcf 2778 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
b75a7d8f
A
2779 if(U_FAILURE(errorCode)){
2780 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2781 return;
2782 }
2783 uSource = source;
2784 while(uSource<uSourceLimit){
2785 if(*test!=*uSource){
2786
2787 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2788 }
2789 uSource++;
2790 test++;
2791 }
2792 free(uBuf);
2793 free(cBuf);
2794}
2795
2796static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2797 const UChar* uSource;
2798 const UChar* uSourceLimit;
2799 const char* cSource;
2800 const char* cSourceLimit;
2801 UChar *uTargetLimit =NULL;
2802 UChar *uTarget;
2803 char *cTarget;
2804 const char *cTargetLimit;
2805 char *cBuf;
2806 UChar *uBuf,*test;
2807 int32_t uBufSize = 120;
2808 int len=0;
2809 int i=2;
2810 const UChar *temp = sourceLimit;
2811 UErrorCode errorCode=U_ZERO_ERROR;
2812 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2813 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2814
2815 ucnv_reset(cnv);
2816 for(;--i>0;){
2817 uSource = (UChar*) source;
2818 cTarget = cBuf;
2819 uTarget = uBuf;
2820 cSource = cBuf;
2821 cTargetLimit = cBuf;
2822 uTargetLimit = uBuf+uBufSize*5;
2823 cTargetLimit = cTargetLimit+uBufSize*10;
2824 uSourceLimit=uSource;
2825 do{
2826
2827 if (uSourceLimit < sourceLimit) {
2828 uSourceLimit = uSourceLimit+1;
2829 }
2830 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2831 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2832 errorCode=U_ZERO_ERROR;
2833 continue;
2834 }
2835
2836 if(U_FAILURE(errorCode)){
2837 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2838 return;
2839 }
2840
2841 }while (uSource<temp);
2842
2843 cSourceLimit =cBuf;
2844 do{
2845 if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2846 cSourceLimit = cSourceLimit+1;
2847 }
2848 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2849 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2850 errorCode=U_ZERO_ERROR;
2851 continue;
2852 }
2853 if(U_FAILURE(errorCode)){
2854 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2855 return;
2856 }
2857 }while(cSource<cTarget);
2858
2859 uSource = source;
2860 test =uBuf;
2861 for(;len<(int)(source - sourceLimit);len++){
2862 if(uBuf[len]!=uSource[len]){
2863 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2864 }
2865 }
2866 }
2867 free(uBuf);
2868 free(cBuf);
2869}
2870static void
2871TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2872 const uint16_t results[], const char* message){
2873 const char* s0;
2874 const char* s=(char*)source;
2875 const uint16_t *r=results;
2876 UErrorCode errorCode=U_ZERO_ERROR;
2877 uint32_t c,exC;
2878 ucnv_reset(cnv);
2879 while(s<limit) {
2880 s0=s;
2881 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2882 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2883 break; /* no more significant input */
2884 } else if(U_FAILURE(errorCode)) {
2885 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2886 break;
2887 } else {
2888 if(UTF_IS_FIRST_SURROGATE(*r)){
2889 int i =0, len = 2;
2890 UTF_NEXT_CHAR_SAFE(r, i, len, exC, FALSE);
2891 r++;
2892 }else{
2893 exC = *r;
2894 }
2895 if(c!=(uint32_t)(exC))
2896 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c);
2897 }
2898 r++;
2899 }
2900}
2901
2902static int TestJitterbug930(const char* enc){
2903 UErrorCode err = U_ZERO_ERROR;
2904 UConverter*converter;
2905 char out[80];
2906 char*target = out;
2907 UChar in[4];
2908 const UChar*source = in;
2909 int32_t off[80];
2910 int32_t* offsets = off;
2911 int numOffWritten=0;
2912 UBool flush = 0;
2913 converter = my_ucnv_open(enc, &err);
2914
2915 in[0] = 0x41; /* 0x4E00;*/
2916 in[1] = 0x4E01;
2917 in[2] = 0x4E02;
2918 in[3] = 0x4E03;
2919
2920 memset(off, '*', sizeof(off));
2921
2922 ucnv_fromUnicode (converter,
2923 &target,
2924 target+2,
2925 &source,
2926 source+3,
2927 offsets,
2928 flush,
2929 &err);
2930
2931 /* writes three bytes into the output buffer: 41 1B 24
2932 * but offsets contains 0 1 1
2933 */
2934 while(*offsets< off[10]){
2935 numOffWritten++;
2936 offsets++;
2937 }
2938 log_verbose("Testing Jitterbug 930 for encoding %s",enc);
2939 if(numOffWritten!= (int)(target-out)){
2940 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
2941 }
2942
2943 err = U_ZERO_ERROR;
2944
2945 memset(off,'*' , sizeof(off));
2946
2947 flush = 1;
2948 offsets=off;
2949 ucnv_fromUnicode (converter,
2950 &target,
2951 target+4,
2952 &source,
2953 source,
2954 offsets,
2955 flush,
2956 &err);
2957 numOffWritten=0;
2958 while(*offsets< off[10]){
2959 numOffWritten++;
2960 if(*offsets!= -1){
2961 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
2962 }
2963 offsets++;
2964 }
2965
2966 /* writes 42 43 7A into output buffer,
2967 * offsets contains -1 -1 -1
2968 */
2969 ucnv_close(converter);
2970 return 0;
2971}
2972
2973static void
2974TestHZ() {
2975 /* test input */
2976 static const uint16_t in[]={
2977 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
2978 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
2979 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
2980 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
2981 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
2982 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
2983 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
2984 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
2985 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
2986 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
2987 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
2988 0x005A, 0x005B, 0x005C, 0x000A
2989 };
2990 const UChar* uSource;
2991 const UChar* uSourceLimit;
2992 const char* cSource;
2993 const char* cSourceLimit;
2994 UChar *uTargetLimit =NULL;
2995 UChar *uTarget;
2996 char *cTarget;
2997 const char *cTargetLimit;
2998 char *cBuf;
2999 UChar *uBuf,*test;
3000 int32_t uBufSize = 120;
3001 UErrorCode errorCode=U_ZERO_ERROR;
3002 UConverter *cnv;
3003 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3004 int32_t* myOff= offsets;
3005 cnv=ucnv_open("HZ", &errorCode);
3006 if(U_FAILURE(errorCode)) {
3007 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3008 return;
3009 }
3010
3011 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3012 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
73c04bcf
A
3013 uSource = (const UChar*)in;
3014 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
b75a7d8f
A
3015 cTarget = cBuf;
3016 cTargetLimit = cBuf +uBufSize*5;
3017 uTarget = uBuf;
3018 uTargetLimit = uBuf+ uBufSize*5;
3019 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3020 if(U_FAILURE(errorCode)){
3021 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3022 return;
3023 }
3024 cSource = cBuf;
3025 cSourceLimit =cTarget;
3026 test =uBuf;
3027 myOff=offsets;
3028 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3029 if(U_FAILURE(errorCode)){
3030 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3031 return;
3032 }
73c04bcf 3033 uSource = (const UChar*)in;
b75a7d8f
A
3034 while(uSource<uSourceLimit){
3035 if(*test!=*uSource){
3036
3037 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3038 }
3039 uSource++;
3040 test++;
3041 }
3042 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
73c04bcf
A
3043 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3044 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3045 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
b75a7d8f
A
3046 TestJitterbug930("csISO2022JP");
3047 ucnv_close(cnv);
3048 free(offsets);
3049 free(uBuf);
3050 free(cBuf);
3051}
3052
3053static void
3054TestISCII(){
3055 /* test input */
3056 static const uint16_t in[]={
3057 /* test full range of Devanagari */
3058 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3059 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3060 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3061 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3062 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3063 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3064 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3065 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3066 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3067 0x096D,0x096E,0x096F,
3068 /* test Soft halant*/
3069 0x0915,0x094d, 0x200D,
3070 /* test explicit halant */
3071 0x0915,0x094d, 0x200c,
3072 /* test double danda */
3073 0x965,
3074 /* test ASCII */
3075 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3076 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3077 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3078 /* tests from Lotus */
3079 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3080 0x0930,0x094D,0x200D,
3081 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3082 0x0915,0x0921,0x002B,0x095F,
3083 /* tamil range */
3084 0x0B86, 0xB87, 0xB88,
3085 /* telugu range */
3086 0x0C05, 0x0C02, 0x0C03,0x0c31,
3087 /* kannada range */
3088 0x0C85, 0xC82, 0x0C83,
3089 /* test Abbr sign and Anudatta */
3090 0x0970, 0x952,
3091 /* 0x0958,
3092 0x0959,
3093 0x095A,
3094 0x095B,
3095 0x095C,
3096 0x095D,
3097 0x095E,
3098 0x095F,*/
3099 0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3100 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3101 0x090C ,
3102 0x0962,
3103 0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3104 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3105 0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3106 0x093D /* Avagraha 0xEA, 0xE9*/,
3107 0x0958,
3108 0x0959,
3109 0x095A,
3110 0x095B,
3111 0x095C,
3112 0x095D,
3113 0x095E,
3114 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3115 };
3116 static const unsigned char byteArr[]={
3117
3118 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3119 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3120 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3121 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3122 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3123 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3124 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3125 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3126 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3127 0xf8,0xf9,0xfa,
3128 /* test soft halant */
3129 0xb3, 0xE8, 0xE9,
3130 /* test explicit halant */
3131 0xb3, 0xE8, 0xE8,
3132 /* test double danda */
3133 0xea, 0xea,
3134 /* test ASCII */
3135 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3136 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3137 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3138 /* test ATR code */
3139
3140 /* tests from Lotus */
3141 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3142 0xEF,0x42,0xCF,0xE8,0xD9,
3143 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3144 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3145 /* tamil range */
3146 0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3147 /* telugu range */
3148 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3149 /* kannada range */
3150 0xEF, 0x48,0xa4, 0xa2, 0xa3,
3151 /* anudatta and abbreviation sign */
3152 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3153
3154
3155 0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3156
3157 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3158
3159 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3160
3161 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3162
3163 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3164
3165 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3166
3167 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3168
3169 0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3170
3171 0xB3, 0xE9, /* Ka + NUKTA */
3172
3173 0xB4, 0xE9, /* Kha + NUKTA */
3174
3175 0xB5, 0xE9, /* Ga + NUKTA */
3176
3177 0xBA, 0xE9,
3178
3179 0xBF, 0xE9,
3180
3181 0xC0, 0xE9,
3182
3183 0xC9, 0xE9,
3184 /* INV halant RA */
3185 0xD9, 0xE8, 0xCF,
3186 0x00, 0x00A0,
3187 /* just consume unhandled codepoints */
3188 0xEF, 0x30,
3189
3190 };
3191 testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE);
3192 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3193
3194}
3195
3196static void
3197TestISO_2022_JP() {
3198 /* test input */
3199 static const uint16_t in[]={
3200 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3201 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3202 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3203 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3204 0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3205 0x201D, 0x3014, 0x000D, 0x000A,
3206 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3207 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3208 };
3209 const UChar* uSource;
3210 const UChar* uSourceLimit;
3211 const char* cSource;
3212 const char* cSourceLimit;
3213 UChar *uTargetLimit =NULL;
3214 UChar *uTarget;
3215 char *cTarget;
3216 const char *cTargetLimit;
3217 char *cBuf;
3218 UChar *uBuf,*test;
3219 int32_t uBufSize = 120;
3220 UErrorCode errorCode=U_ZERO_ERROR;
3221 UConverter *cnv;
3222 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3223 int32_t* myOff= offsets;
3224 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3225 if(U_FAILURE(errorCode)) {
374ca955 3226 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
b75a7d8f
A
3227 return;
3228 }
3229
3230 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3231 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
73c04bcf
A
3232 uSource = (const UChar*)in;
3233 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
b75a7d8f
A
3234 cTarget = cBuf;
3235 cTargetLimit = cBuf +uBufSize*5;
3236 uTarget = uBuf;
3237 uTargetLimit = uBuf+ uBufSize*5;
3238 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3239 if(U_FAILURE(errorCode)){
3240 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3241 return;
3242 }
3243 cSource = cBuf;
3244 cSourceLimit =cTarget;
3245 test =uBuf;
3246 myOff=offsets;
3247 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3248 if(U_FAILURE(errorCode)){
3249 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3250 return;
3251 }
3252
73c04bcf 3253 uSource = (const UChar*)in;
b75a7d8f
A
3254 while(uSource<uSourceLimit){
3255 if(*test!=*uSource){
3256
3257 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3258 }
3259 uSource++;
3260 test++;
3261 }
3262
73c04bcf
A
3263 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3264 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
b75a7d8f 3265 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
73c04bcf 3266 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
b75a7d8f
A
3267 TestJitterbug930("csISO2022JP");
3268 ucnv_close(cnv);
3269 free(uBuf);
3270 free(cBuf);
3271 free(offsets);
3272}
3273
3274static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3275 const UChar* uSource;
3276 const UChar* uSourceLimit;
3277 const char* cSource;
3278 const char* cSourceLimit;
3279 UChar *uTargetLimit =NULL;
3280 UChar *uTarget;
3281 char *cTarget;
3282 const char *cTargetLimit;
3283 char *cBuf;
3284 UChar *uBuf,*test;
3285 int32_t uBufSize = 120*10;
3286 UErrorCode errorCode=U_ZERO_ERROR;
3287 UConverter *cnv;
3288 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3289 int32_t* myOff= offsets;
3290 cnv=my_ucnv_open(conv, &errorCode);
3291 if(U_FAILURE(errorCode)) {
3292 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3293 return;
3294 }
3295
3296 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
3297 cBuf =(char*)malloc(uBufSize * sizeof(char));
73c04bcf 3298 uSource = (const UChar*)in;
b75a7d8f
A
3299 uSourceLimit=uSource+len;
3300 cTarget = cBuf;
3301 cTargetLimit = cBuf +uBufSize;
3302 uTarget = uBuf;
3303 uTargetLimit = uBuf+ uBufSize;
3304 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3305 if(U_FAILURE(errorCode)){
3306 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3307 return;
3308 }
3309 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3310 cSource = cBuf;
3311 cSourceLimit =cTarget;
3312 test =uBuf;
3313 myOff=offsets;
3314 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3315 if(U_FAILURE(errorCode)){
3316 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3317 return;
3318 }
3319
73c04bcf 3320 uSource = (const UChar*)in;
b75a7d8f
A
3321 while(uSource<uSourceLimit){
3322 if(*test!=*uSource){
3323 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3324 }
3325 uSource++;
3326 test++;
3327 }
73c04bcf
A
3328 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3329 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
b75a7d8f
A
3330 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3331 if(byteArr && byteArrLen!=0){
3332 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
73c04bcf 3333 TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
b75a7d8f
A
3334 {
3335 cSource = byteArr;
3336 cSourceLimit = cSource+byteArrLen;
3337 test=uBuf;
3338 myOff = offsets;
3339 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3340 if(U_FAILURE(errorCode)){
3341 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3342 return;
3343 }
3344
73c04bcf 3345 uSource = (const UChar*)in;
b75a7d8f
A
3346 while(uSource<uSourceLimit){
3347 if(*test!=*uSource){
3348 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3349 }
3350 uSource++;
3351 test++;
3352 }
3353 }
3354 }
3355
3356 ucnv_close(cnv);
3357 free(uBuf);
3358 free(cBuf);
3359 free(offsets);
3360}
3361static UChar U_CALLCONV
3362_charAt(int32_t offset, void *context) {
3363 return ((char*)context)[offset];
3364}
3365
3366static int32_t
3367unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3368 int32_t srcIndex=0;
3369 int32_t dstIndex=0;
3370 if(U_FAILURE(*status)){
3371 return 0;
3372 }
3373 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3374 *status = U_ILLEGAL_ARGUMENT_ERROR;
3375 return 0;
3376 }
3377 if(srcLen==-1){
73c04bcf 3378 srcLen = (int32_t)uprv_strlen(src);
b75a7d8f
A
3379 }
3380
3381 for (; srcIndex<srcLen; ) {
3382 UChar32 c = src[srcIndex++];
3383 if (c == 0x005C /*'\\'*/) {
3384 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3385 if (c == (UChar32)0xFFFFFFFF) {
3386 *status=U_INVALID_CHAR_FOUND; /* return empty string */
3387 break; /* invalid escape sequence */
3388 }
3389 }
3390 if(dstIndex < dstLen){
3391 if(c>0xFFFF){
3392 dst[dstIndex++] = UTF16_LEAD(c);
3393 if(dstIndex<dstLen){
3394 dst[dstIndex]=UTF16_TRAIL(c);
3395 }else{
3396 *status=U_BUFFER_OVERFLOW_ERROR;
3397 }
3398 }else{
3399 dst[dstIndex]=(UChar)c;
3400 }
3401
3402 }else{
3403 *status = U_BUFFER_OVERFLOW_ERROR;
3404 }
3405 dstIndex++; /* for preflighting */
3406 }
3407 return dstIndex;
3408}
3409
3410static void
3411TestFullRoundtrip(const char* cp){
3412 UChar usource[10] ={0};
3413 UChar nsrc[10] = {0};
3414 uint32_t i=1;
3415 int len=0, ulen;
3416 nsrc[0]=0x0061;
3417 /* Test codepoint 0 */
3418 TestConv(usource,1,cp,"",NULL,0);
3419 TestConv(usource,2,cp,"",NULL,0);
3420 nsrc[2]=0x5555;
3421 TestConv(nsrc,3,cp,"",NULL,0);
3422
3423 for(;i<=0x10FFFF;i++){
3424 if(i==0xD800){
3425 i=0xDFFF;
3426 continue;
3427 }
3428 if(i<=0xFFFF){
3429 usource[0] =(UChar) i;
3430 len=1;
3431 }else{
3432 usource[0]=UTF16_LEAD(i);
3433 usource[1]=UTF16_TRAIL(i);
3434 len=2;
3435 }
3436 ulen=len;
3437 if(i==0x80) {
3438 usource[2]=0;
3439 }
3440 /* Test only single code points */
3441 TestConv(usource,ulen,cp,"",NULL,0);
3442 /* Test codepoint repeated twice */
3443 usource[ulen]=usource[0];
3444 usource[ulen+1]=usource[1];
3445 ulen+=len;
3446 TestConv(usource,ulen,cp,"",NULL,0);
3447 /* Test codepoint repeated 3 times */
3448 usource[ulen]=usource[0];
3449 usource[ulen+1]=usource[1];
3450 ulen+=len;
3451 TestConv(usource,ulen,cp,"",NULL,0);
3452 /* Test codepoint in between 2 codepoints */
3453 nsrc[1]=usource[0];
3454 nsrc[2]=usource[1];
3455 nsrc[len+1]=0x5555;
3456 TestConv(nsrc,len+2,cp,"",NULL,0);
3457 uprv_memset(usource,0,sizeof(UChar)*10);
3458 }
3459}
3460
3461static void
3462TestRoundTrippingAllUTF(void){
3463 if(!QUICK){
3464 log_verbose("Running exhaustive round trip test for BOCU-1\n");
3465 TestFullRoundtrip("BOCU-1");
3466 log_verbose("Running exhaustive round trip test for SCSU\n");
3467 TestFullRoundtrip("SCSU");
3468 log_verbose("Running exhaustive round trip test for UTF-8\n");
3469 TestFullRoundtrip("UTF-8");
3470 log_verbose("Running exhaustive round trip test for CESU-8\n");
3471 TestFullRoundtrip("CESU-8");
3472 log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3473 TestFullRoundtrip("UTF-16BE");
3474 log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3475 TestFullRoundtrip("UTF-16LE");
3476 log_verbose("Running exhaustive round trip test for UTF-16\n");
3477 TestFullRoundtrip("UTF-16");
3478 log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3479 TestFullRoundtrip("UTF-32BE");
3480 log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3481 TestFullRoundtrip("UTF-32LE");
3482 log_verbose("Running exhaustive round trip test for UTF-32\n");
3483 TestFullRoundtrip("UTF-32");
3484 log_verbose("Running exhaustive round trip test for UTF-7\n");
3485 TestFullRoundtrip("UTF-7");
3486 log_verbose("Running exhaustive round trip test for UTF-7\n");
3487 TestFullRoundtrip("UTF-7,version=1");
3488 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3489 TestFullRoundtrip("IMAP-mailbox-name");
3490 log_verbose("Running exhaustive round trip test for GB18030\n");
3491 TestFullRoundtrip("GB18030");
3492 }
3493}
3494
3495static void
3496TestSCSU() {
3497
3498 static const uint16_t germanUTF16[]={
3499 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3500 };
3501
3502 static const uint8_t germanSCSU[]={
3503 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3504 };
3505
3506 static const uint16_t russianUTF16[]={
3507 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3508 };
3509
3510 static const uint8_t russianSCSU[]={
3511 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3512 };
3513
3514 static const uint16_t japaneseUTF16[]={
3515 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3516 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3517 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3518 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3519 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3520 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3521 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3522 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3523 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3524 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3525 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3526 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3527 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3528 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3529 0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3530 };
3531
3532 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3533 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3534 static const uint8_t japaneseSCSU[]={
3535 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3536 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3537 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3538 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3539 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3540 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3541 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3542 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3543 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3544 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3545 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3546 0xcb, 0x82
3547 };
3548
3549 static const uint16_t allFeaturesUTF16[]={
3550 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3551 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3552 0x01df, 0xf000, 0xdbff, 0xdfff
3553 };
3554
3555 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3556 * result here (34B vs. 35B)
3557 */
3558 static const uint8_t allFeaturesSCSU[]={
3559 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3560 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3561 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3562 0xdf, 0x14, 0x80, 0x15, 0xff
3563 };
3564 static const uint16_t monkeyIn[]={
3565 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3566 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3567 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3568 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3569 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3570 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3571 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3572 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3573 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3574 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3575 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3576 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3577 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3578 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3579 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3580 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3581 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3582 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3583 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3584 /* test non-BMP code points */
3585 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3586 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3587 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3588 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3589 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3590 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3591 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3592 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3593 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3594 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3595 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3596
3597
3598 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3599 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3600 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3601 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3602 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3603 };
3604 static const char *fTestCases [] = {
3605 "\\ud800\\udc00", /* smallest surrogate*/
3606 "\\ud8ff\\udcff",
3607 "\\udBff\\udFff", /* largest surrogate pair*/
3608 "\\ud834\\udc00",
3609 "\\U0010FFFF",
3610 "Hello \\u9292 \\u9192 World!",
3611 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3612 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3613
3614 "\\u0648\\u06c8", /* catch missing reset*/
3615 "\\u0648\\u06c8",
3616
3617 "\\u4444\\uE001", /* lowest quotable*/
3618 "\\u4444\\uf2FF", /* highest quotable*/
3619 "\\u4444\\uf188\\u4444",
3620 "\\u4444\\uf188\\uf288",
3621 "\\u4444\\uf188abc\\u0429\\uf288",
3622 "\\u9292\\u2222",
3623 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3624 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3625 "Hello World!123456",
3626 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3627
3628 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/
3629 "abc\\u4411d", /* uses SQU*/
3630 "abc\\u4411\\u4412d",/* uses SCU*/
3631 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3632 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3633 "\\u9292\\u2222",
3634 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3635 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3636 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3637
3638 "", /* empty input*/
3639 "\\u0000", /* smallest BMP character*/
3640 "\\uFFFF", /* largest BMP character*/
3641
3642 /* regression tests*/
3643 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3644 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3645 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3646 "\\u0041\\u00df\\u0401\\u015f",
3647 "\\u9066\\u2123abc",
3648 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3649 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3650 };
3651 int i=0;
3652 for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){
3653 const char* cSrc = fTestCases[i];
3654 UErrorCode status = U_ZERO_ERROR;
3655 int32_t cSrcLen,srcLen;
3656 UChar* src;
3657 /* UConverter* cnv = ucnv_open("SCSU",&status); */
73c04bcf 3658 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
b75a7d8f
A
3659 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3660 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3661 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3662 TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3663 free(src);
3664 }
3665 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3666 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3667 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3668 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3669 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3670 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3671 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3672}
73c04bcf
A
3673
3674#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
3675static void TestJitterbug2346(){
3676 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3677 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3678 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3679
3680 UChar uTarget[500]={'\0'};
3681 UChar* utarget=uTarget;
3682 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3683
3684 char cTarget[500]={'\0'};
3685 char* ctarget=cTarget;
3686 char* ctargetLimit=cTarget+sizeof(cTarget);
3687 const char* csource=source;
3688 UChar* temp = expected;
3689 UErrorCode err=U_ZERO_ERROR;
3690
3691 UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3692 if(U_FAILURE(err)) {
3693 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3694 return;
3695 }
3696 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3697 if(U_FAILURE(err)) {
3698 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3699 return;
3700 }
3701 utargetLimit=utarget;
3702 utarget = uTarget;
3703 while(utarget<utargetLimit){
3704 if(*temp!=*utarget){
3705
3706 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3707 }
3708 utarget++;
3709 temp++;
3710 }
3711 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3712 if(U_FAILURE(err)) {
3713 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3714 return;
3715 }
3716 ctargetLimit=ctarget;
3717 ctarget =cTarget;
3718 ucnv_close(conv);
3719
3720
3721}
73c04bcf 3722
b75a7d8f
A
3723static void
3724TestISO_2022_JP_1() {
3725 /* test input */
3726 static const uint16_t in[]={
3727 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3728 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3729 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3730 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3731 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3732 0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3733 0x201D, 0x000D, 0x000A,
3734 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3735 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3736 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3737 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3738 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3739 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3740 };
3741 const UChar* uSource;
3742 const UChar* uSourceLimit;
3743 const char* cSource;
3744 const char* cSourceLimit;
3745 UChar *uTargetLimit =NULL;
3746 UChar *uTarget;
3747 char *cTarget;
3748 const char *cTargetLimit;
3749 char *cBuf;
3750 UChar *uBuf,*test;
3751 int32_t uBufSize = 120;
3752 UErrorCode errorCode=U_ZERO_ERROR;
3753 UConverter *cnv;
3754
3755 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3756 if(U_FAILURE(errorCode)) {
3757 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3758 return;
3759 }
3760
3761 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3762 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
73c04bcf
A
3763 uSource = (const UChar*)in;
3764 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
b75a7d8f
A
3765 cTarget = cBuf;
3766 cTargetLimit = cBuf +uBufSize*5;
3767 uTarget = uBuf;
3768 uTargetLimit = uBuf+ uBufSize*5;
3769 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3770 if(U_FAILURE(errorCode)){
3771 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3772 return;
3773 }
3774 cSource = cBuf;
3775 cSourceLimit =cTarget;
3776 test =uBuf;
3777 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3778 if(U_FAILURE(errorCode)){
3779 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3780 return;
3781 }
73c04bcf 3782 uSource = (const UChar*)in;
b75a7d8f
A
3783 while(uSource<uSourceLimit){
3784 if(*test!=*uSource){
3785
3786 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3787 }
3788 uSource++;
3789 test++;
3790 }
3791 /*ucnv_close(cnv);
3792 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3793 /*Test for the condition where there is an invalid character*/
3794 ucnv_reset(cnv);
3795 {
3796 static const uint8_t source2[]={0x0e,0x24,0x053};
3797 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3798 }
73c04bcf
A
3799 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3800 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
b75a7d8f
A
3801 ucnv_close(cnv);
3802 free(uBuf);
3803 free(cBuf);
3804}
3805
3806static void
3807TestISO_2022_JP_2() {
3808 /* test input */
3809 static const uint16_t in[]={
3810 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3811 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3812 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3813 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3814 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3815 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3816 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3817 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3818 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3819 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3820 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3821 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3822 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3823 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3824 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3825 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3826 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3827 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3828 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3829 };
3830 const UChar* uSource;
3831 const UChar* uSourceLimit;
3832 const char* cSource;
3833 const char* cSourceLimit;
3834 UChar *uTargetLimit =NULL;
3835 UChar *uTarget;
3836 char *cTarget;
3837 const char *cTargetLimit;
3838 char *cBuf;
3839 UChar *uBuf,*test;
3840 int32_t uBufSize = 120;
3841 UErrorCode errorCode=U_ZERO_ERROR;
3842 UConverter *cnv;
3843 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3844 int32_t* myOff= offsets;
3845 cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3846 if(U_FAILURE(errorCode)) {
3847 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3848 return;
3849 }
3850
3851 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3852 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
73c04bcf
A
3853 uSource = (const UChar*)in;
3854 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
b75a7d8f
A
3855 cTarget = cBuf;
3856 cTargetLimit = cBuf +uBufSize*5;
3857 uTarget = uBuf;
3858 uTargetLimit = uBuf+ uBufSize*5;
3859 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3860 if(U_FAILURE(errorCode)){
3861 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3862 return;
3863 }
3864 cSource = cBuf;
3865 cSourceLimit =cTarget;
3866 test =uBuf;
3867 myOff=offsets;
3868 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3869 if(U_FAILURE(errorCode)){
3870 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3871 return;
3872 }
73c04bcf 3873 uSource = (const UChar*)in;
b75a7d8f
A
3874 while(uSource<uSourceLimit){
3875 if(*test!=*uSource){
3876
3877 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3878 }
3879 uSource++;
3880 test++;
3881 }
73c04bcf
A
3882 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3883 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3884 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
b75a7d8f
A
3885 /*Test for the condition where there is an invalid character*/
3886 ucnv_reset(cnv);
3887 {
3888 static const uint8_t source2[]={0x0e,0x24,0x053};
3889 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
3890 }
3891 ucnv_close(cnv);
3892 free(uBuf);
3893 free(cBuf);
3894 free(offsets);
3895}
3896
3897static void
3898TestISO_2022_KR() {
3899 /* test input */
3900 static const uint16_t in[]={
3901 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F66,0x9F67,0x9F6A,0x000A,0x000D
3902 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC02,0xAC04
3903 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3904 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3905 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53DF,0x53E1,0x53E2
3906 ,0x53E3,0x53E4,0x000A,0x000D};
3907 const UChar* uSource;
3908 const UChar* uSourceLimit;
3909 const char* cSource;
3910 const char* cSourceLimit;
3911 UChar *uTargetLimit =NULL;
3912 UChar *uTarget;
3913 char *cTarget;
3914 const char *cTargetLimit;
3915 char *cBuf;
3916 UChar *uBuf,*test;
3917 int32_t uBufSize = 120;
3918 UErrorCode errorCode=U_ZERO_ERROR;
3919 UConverter *cnv;
3920 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3921 int32_t* myOff= offsets;
3922 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
3923 if(U_FAILURE(errorCode)) {
3924 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3925 return;
3926 }
3927
3928 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3929 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
73c04bcf
A
3930 uSource = (const UChar*)in;
3931 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
b75a7d8f
A
3932 cTarget = cBuf;
3933 cTargetLimit = cBuf +uBufSize*5;
3934 uTarget = uBuf;
3935 uTargetLimit = uBuf+ uBufSize*5;
3936 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3937 if(U_FAILURE(errorCode)){
3938 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3939 return;
3940 }
3941 cSource = cBuf;
3942 cSourceLimit =cTarget;
3943 test =uBuf;
3944 myOff=offsets;
3945 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3946 if(U_FAILURE(errorCode)){
3947 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3948 return;
3949 }
73c04bcf 3950 uSource = (const UChar*)in;
b75a7d8f
A
3951 while(uSource<uSourceLimit){
3952 if(*test!=*uSource){
3953 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
3954 }
3955 uSource++;
3956 test++;
3957 }
3958 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
73c04bcf
A
3959 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3960 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3961 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
b75a7d8f
A
3962 TestJitterbug930("csISO2022KR");
3963 /*Test for the condition where there is an invalid character*/
3964 ucnv_reset(cnv);
3965 {
3966 static const uint8_t source2[]={0x1b,0x24,0x053};
374ca955 3967 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
b75a7d8f
A
3968 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
3969 }
3970 ucnv_close(cnv);
3971 free(uBuf);
3972 free(cBuf);
3973 free(offsets);
3974}
3975
3976static void
3977TestISO_2022_KR_1() {
3978 /* test input */
3979 static const uint16_t in[]={
3980 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
3981 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
3982 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3983 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3984 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
3985 ,0x53E3,0x53E4,0x000A,0x000D};
3986 const UChar* uSource;
3987 const UChar* uSourceLimit;
3988 const char* cSource;
3989 const char* cSourceLimit;
3990 UChar *uTargetLimit =NULL;
3991 UChar *uTarget;
3992 char *cTarget;
3993 const char *cTargetLimit;
3994 char *cBuf;
3995 UChar *uBuf,*test;
3996 int32_t uBufSize = 120;
3997 UErrorCode errorCode=U_ZERO_ERROR;
3998 UConverter *cnv;
3999 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4000 int32_t* myOff= offsets;
4001 cnv=ucnv_open("ibm-25546", &errorCode);
4002 if(U_FAILURE(errorCode)) {
4003 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4004 return;
4005 }
4006
4007 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4008 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
73c04bcf
A
4009 uSource = (const UChar*)in;
4010 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
b75a7d8f
A
4011 cTarget = cBuf;
4012 cTargetLimit = cBuf +uBufSize*5;
4013 uTarget = uBuf;
4014 uTargetLimit = uBuf+ uBufSize*5;
4015 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4016 if(U_FAILURE(errorCode)){
4017 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4018 return;
4019 }
4020 cSource = cBuf;
4021 cSourceLimit =cTarget;
4022 test =uBuf;
4023 myOff=offsets;
4024 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4025 if(U_FAILURE(errorCode)){
4026 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4027 return;
4028 }
73c04bcf 4029 uSource = (const UChar*)in;
b75a7d8f
A
4030 while(uSource<uSourceLimit){
4031 if(*test!=*uSource){
4032 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4033 }
4034 uSource++;
4035 test++;
4036 }
4037 ucnv_reset(cnv);
4038 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
73c04bcf
A
4039 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4040 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
b75a7d8f 4041 ucnv_reset(cnv);
73c04bcf 4042 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
b75a7d8f
A
4043 /*Test for the condition where there is an invalid character*/
4044 ucnv_reset(cnv);
4045 {
4046 static const uint8_t source2[]={0x1b,0x24,0x053};
374ca955 4047 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
b75a7d8f
A
4048 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4049 }
4050 ucnv_close(cnv);
4051 free(uBuf);
4052 free(cBuf);
4053 free(offsets);
4054}
4055
4056static void TestJitterbug2411(){
73c04bcf 4057 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
b75a7d8f
A
4058 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4059 UConverter* kr=NULL, *kr1=NULL;
4060 UErrorCode errorCode = U_ZERO_ERROR;
4061 UChar tgt[100]={'\0'};
4062 UChar* target = tgt;
4063 UChar* targetLimit = target+100;
4064 kr=ucnv_open("iso-2022-kr", &errorCode);
4065 if(U_FAILURE(errorCode)) {
4066 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4067 return;
4068 }
4069 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4070 if(U_FAILURE(errorCode)) {
4071 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4072 return;
4073 }
4074 kr1 = ucnv_open("ibm-25546", &errorCode);
4075 if(U_FAILURE(errorCode)) {
4076 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4077 return;
4078 }
4079 target = tgt;
4080 targetLimit = target+100;
4081 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4082
4083 if(U_FAILURE(errorCode)) {
4084 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4085 return;
4086 }
4087
4088 ucnv_close(kr);
4089 ucnv_close(kr1);
4090
4091}
4092
4093static void
4094TestJIS(){
374ca955 4095 /* From Unicode moved to testdata/conversion.txt */
b75a7d8f
A
4096 /*To Unicode*/
4097 {
73c04bcf 4098 static const uint8_t sampleTextJIS[] = {
b75a7d8f
A
4099 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4100 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4101 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4102 };
73c04bcf 4103 static const uint16_t expectedISO2022JIS[] = {
b75a7d8f
A
4104 0x0041, 0x0042,
4105 0xFF81, 0xFF82,
4106 0x3000
4107 };
73c04bcf 4108 static const int32_t toISO2022JISOffs[]={
b75a7d8f
A
4109 3,4,
4110 8,9,
4111 16
4112 };
4113
73c04bcf 4114 static const uint8_t sampleTextJIS7[] = {
b75a7d8f
A
4115 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4116 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4117 0x1b,0x24,0x42,0x21,0x21,
4118 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */
4119 0x21,0x22,
4120 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4121 };
73c04bcf 4122 static const uint16_t expectedISO2022JIS7[] = {
b75a7d8f
A
4123 0x0041, 0x0042,
4124 0xFF81, 0xFF82,
4125 0x3000,
4126 0xFF81, 0xFF82,
4127 0x3001,
4128 0x3000
4129 };
73c04bcf 4130 static const int32_t toISO2022JIS7Offs[]={
b75a7d8f
A
4131 3,4,
4132 8,9,
4133 13,16,
4134 17,
4135 19,27
4136 };
73c04bcf 4137 static const uint8_t sampleTextJIS8[] = {
b75a7d8f
A
4138 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4139 0xa1,0xc8,0xd9,/*Katakana Set*/
4140 0x1b,0x28,0x42,
4141 0x41,0x42,
4142 0xb1,0xc3, /*Katakana Set*/
4143 0x1b,0x24,0x42,0x21,0x21
4144 };
73c04bcf 4145 static const uint16_t expectedISO2022JIS8[] = {
b75a7d8f
A
4146 0x0041, 0x0042,
4147 0xff61, 0xff88, 0xff99,
4148 0x0041, 0x0042,
4149 0xff71, 0xff83,
4150 0x3000
4151 };
73c04bcf 4152 static const int32_t toISO2022JIS8Offs[]={
b75a7d8f
A
4153 3, 4, 5, 6,
4154 7, 11, 12, 13,
4155 14, 18,
4156 };
4157
4158 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4159 sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE);
4160 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4161 sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE);
4162 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4163 sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE);
4164 }
4165
4166}
4167
4168static void TestJitterbug915(){
4169/* tests for roundtripping of the below sequence
4170\x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * /
4171\x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4172\x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4173\x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4174\x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4175\x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4176\x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4177*/
73c04bcf 4178 static const char cSource[]={
b75a7d8f
A
4179 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4180 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4181 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4182 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4183 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4184 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
374ca955 4185 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
b75a7d8f
A
4186 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4187 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4188 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
374ca955 4189 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
b75a7d8f
A
4190 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4191 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4192 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
374ca955 4193 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
b75a7d8f
A
4194 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4195 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4196 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
374ca955 4197 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
b75a7d8f
A
4198 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4199 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4200 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
374ca955 4201 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
b75a7d8f
A
4202 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4203 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4204 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
374ca955
A
4205 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4206 0x37, 0x20, 0x2A, 0x2F
b75a7d8f
A
4207 };
4208 UChar uTarget[500]={'\0'};
4209 UChar* utarget=uTarget;
4210 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4211
4212 char cTarget[500]={'\0'};
4213 char* ctarget=cTarget;
4214 char* ctargetLimit=cTarget+sizeof(cTarget);
4215 const char* csource=cSource;
73c04bcf 4216 const char* tempSrc = cSource;
b75a7d8f
A
4217 UErrorCode err=U_ZERO_ERROR;
4218
4219 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4220 if(U_FAILURE(err)) {
4221 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4222 return;
4223 }
4224 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4225 if(U_FAILURE(err)) {
4226 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4227 return;
4228 }
4229 utargetLimit=utarget;
4230 utarget = uTarget;
4231 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4232 if(U_FAILURE(err)) {
4233 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4234 return;
4235 }
4236 ctargetLimit=ctarget;
4237 ctarget =cTarget;
4238 while(ctarget<ctargetLimit){
374ca955
A
4239 if(*ctarget != *tempSrc){
4240 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
b75a7d8f 4241 }
374ca955
A
4242 ++ctarget;
4243 ++tempSrc;
b75a7d8f
A
4244 }
4245
4246 ucnv_close(conv);
4247}
4248
4249static void
4250TestISO_2022_CN_EXT() {
4251 /* test input */
4252 static const uint16_t in[]={
4253 /* test Non-BMP code points */
4254 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4255 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4256 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4257 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4258 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4259 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4260 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4261 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4262 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4263 0xD869, 0xDED5,
4264
4265 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4266 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4267 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4268 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4269 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4270 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4271 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4272 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4273 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4274 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4275 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4276 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4277 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4278 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4279 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4280 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4281 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4282 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4283
4284 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4285
4286 };
4287
4288 const UChar* uSource;
4289 const UChar* uSourceLimit;
4290 const char* cSource;
4291 const char* cSourceLimit;
4292 UChar *uTargetLimit =NULL;
4293 UChar *uTarget;
4294 char *cTarget;
4295 const char *cTargetLimit;
4296 char *cBuf;
4297 UChar *uBuf,*test;
4298 int32_t uBufSize = 180;
4299 UErrorCode errorCode=U_ZERO_ERROR;
4300 UConverter *cnv;
4301 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4302 int32_t* myOff= offsets;
4303 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4304 if(U_FAILURE(errorCode)) {
4305 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4306 return;
4307 }
4308
4309 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4310 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
73c04bcf
A
4311 uSource = (const UChar*)in;
4312 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
b75a7d8f
A
4313 cTarget = cBuf;
4314 cTargetLimit = cBuf +uBufSize*5;
4315 uTarget = uBuf;
4316 uTargetLimit = uBuf+ uBufSize*5;
4317 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4318 if(U_FAILURE(errorCode)){
4319 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4320 return;
4321 }
4322 cSource = cBuf;
4323 cSourceLimit =cTarget;
4324 test =uBuf;
4325 myOff=offsets;
4326 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4327 if(U_FAILURE(errorCode)){
4328 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4329 return;
4330 }
73c04bcf 4331 uSource = (const UChar*)in;
b75a7d8f
A
4332 while(uSource<uSourceLimit){
4333 if(*test!=*uSource){
4334 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4335 }
4336 else{
4337 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4338 }
4339 uSource++;
4340 test++;
4341 }
73c04bcf
A
4342 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4343 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
b75a7d8f
A
4344 /*Test for the condition where there is an invalid character*/
4345 ucnv_reset(cnv);
4346 {
4347 static const uint8_t source2[]={0x0e,0x24,0x053};
4348 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4349 }
4350 ucnv_close(cnv);
4351 free(uBuf);
4352 free(cBuf);
4353 free(offsets);
4354}
4355
4356static void
4357TestISO_2022_CN() {
4358 /* test input */
4359 static const uint16_t in[]={
4360 /* jitterbug 951 */
4361 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4362 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4363 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4364 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4365 0x0020, 0x0045, 0x004e, 0x0044,
4366 /**/
4367 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4368 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4369 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4370 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4371 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4372 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4373 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4374 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4375 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4376 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4377 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4378 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4379 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4380 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4381 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4382 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4383 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4384
4385 };
4386 const UChar* uSource;
4387 const UChar* uSourceLimit;
4388 const char* cSource;
4389 const char* cSourceLimit;
4390 UChar *uTargetLimit =NULL;
4391 UChar *uTarget;
4392 char *cTarget;
4393 const char *cTargetLimit;
4394 char *cBuf;
4395 UChar *uBuf,*test;
4396 int32_t uBufSize = 180;
4397 UErrorCode errorCode=U_ZERO_ERROR;
4398 UConverter *cnv;
4399 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4400 int32_t* myOff= offsets;
4401 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4402 if(U_FAILURE(errorCode)) {
4403 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4404 return;
4405 }
4406
4407 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4408 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
73c04bcf
A
4409 uSource = (const UChar*)in;
4410 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
b75a7d8f
A
4411 cTarget = cBuf;
4412 cTargetLimit = cBuf +uBufSize*5;
4413 uTarget = uBuf;
4414 uTargetLimit = uBuf+ uBufSize*5;
4415 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4416 if(U_FAILURE(errorCode)){
4417 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4418 return;
4419 }
4420 cSource = cBuf;
4421 cSourceLimit =cTarget;
4422 test =uBuf;
4423 myOff=offsets;
4424 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4425 if(U_FAILURE(errorCode)){
4426 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4427 return;
4428 }
73c04bcf 4429 uSource = (const UChar*)in;
b75a7d8f
A
4430 while(uSource<uSourceLimit){
4431 if(*test!=*uSource){
4432 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4433 }
4434 else{
4435 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4436 }
4437 uSource++;
4438 test++;
4439 }
4440 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
73c04bcf
A
4441 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4442 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4443 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
b75a7d8f
A
4444 TestJitterbug930("csISO2022CN");
4445 /*Test for the condition where there is an invalid character*/
4446 ucnv_reset(cnv);
4447 {
4448 static const uint8_t source2[]={0x0e,0x24,0x053};
4449 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4450 }
4451
4452 ucnv_close(cnv);
4453 free(uBuf);
4454 free(cBuf);
4455 free(offsets);
4456}
4457
d5d484b0
A
4458/* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4459typedef struct {
4460 const char * converterName;
4461 const char * inputText;
4462 int inputTextLength;
4463} EmptySegmentTest;
4464
4465/* Callback for TestJitterbug6175, should only get called for empty segment errors */
4466static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
4467 int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
4468 if (reason > UCNV_IRREGULAR)
4469 return;
4470 if (reason != UCNV_IRREGULAR)
4471 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4472 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4473 *err = U_ZERO_ERROR;
4474 ucnv_cbToUWriteSub(toArgs,0,err);
4475}
4476
4477enum { kEmptySegmentToUCharsMax = 64 };
4478static void TestJitterbug6175(void) {
4479 static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4480 static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4481 static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4482 static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4483 static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4484 static const EmptySegmentTest emptySegmentTests[] = {
4485 /* converterName inputText inputTextLength */
4486 { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
4487 { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
4488 { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
4489 { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
4490 { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) },
4491 /* terminator: */
4492 { NULL, NULL, 0, }
4493 };
4494 const EmptySegmentTest * testPtr;
4495 for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
4496 UErrorCode err = U_ZERO_ERROR;
4497 UConverter * cnv = ucnv_open(testPtr->converterName, &err);
4498 if (U_FAILURE(err)) {
4499 log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
4500 return;
4501 }
4502 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
4503 if (U_FAILURE(err)) {
4504 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
4505 ucnv_close(cnv);
4506 return;
4507 }
4508 {
4509 UChar toUChars[kEmptySegmentToUCharsMax];
4510 UChar * toUCharsPtr = toUChars;
4511 const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
4512 const char * inCharsPtr = testPtr->inputText;
4513 const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength;
4514 ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err);
4515 }
4516 ucnv_close(cnv);
4517 }
4518}
4519
b75a7d8f
A
4520static void
4521TestEBCDIC_STATEFUL() {
4522 /* test input */
4523 static const uint8_t in[]={
4524 0x61,
4525 0x1a,
4526 0x0f, 0x4b,
4527 0x42,
4528 0x40,
4529 0x36,
4530 };
4531
4532 /* expected test results */
374ca955 4533 static const int32_t results[]={
b75a7d8f
A
4534 /* number of bytes read, code point */
4535 1, 0x002f,
4536 1, 0x0092,
4537 2, 0x002e,
4538 1, 0xff62,
4539 1, 0x0020,
4540 1, 0x0096,
4541
4542 };
4543 static const uint8_t in2[]={
4544 0x0f,
4545 0xa1,
4546 0x01
4547 };
4548
4549 /* expected test results */
374ca955 4550 static const int32_t results2[]={
b75a7d8f
A
4551 /* number of bytes read, code point */
4552 2, 0x203E,
4553 1, 0x0001,
4554 };
4555
4556 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4557 UErrorCode errorCode=U_ZERO_ERROR;
4558 UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4559 if(U_FAILURE(errorCode)) {
4560 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4561 return;
4562 }
4563 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4564 ucnv_reset(cnv);
4565 /* Test the condition when source >= sourceLimit */
4566 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4567 ucnv_reset(cnv);
4568 /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4569 {
4570 static const uint8_t source1[]={0x0f};
4571 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4572 }
4573 /*Test for the condition where there is an invalid character*/
4574 ucnv_reset(cnv);
4575 {
4576 static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4577 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4578 }
4579 ucnv_reset(cnv);
4580 source=(const char*)in2;
4581 limit=(const char*)in2+sizeof(in2);
4582 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4583 ucnv_close(cnv);
4584
4585}
4586
4587static void
4588TestGB18030() {
4589 /* test input */
4590 static const uint8_t in[]={
4591 0x24,
4592 0x7f,
4593 0x81, 0x30, 0x81, 0x30,
4594 0xa8, 0xbf,
4595 0xa2, 0xe3,
4596 0xd2, 0xbb,
4597 0x82, 0x35, 0x8f, 0x33,
4598 0x84, 0x31, 0xa4, 0x39,
4599 0x90, 0x30, 0x81, 0x30,
4600 0xe3, 0x32, 0x9a, 0x35
4601#if 0
4602 /*
4603 * Feature removed markus 2000-oct-26
4604 * Only some codepages must match surrogate pairs into supplementary code points -
4605 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4606 * GB 18030 provides direct encodings for supplementary code points, therefore
4607 * it must not combine two single-encoded surrogates into one code point.
4608 */
4609 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4610#endif
4611 };
4612
4613 /* expected test results */
374ca955 4614 static const int32_t results[]={
b75a7d8f
A
4615 /* number of bytes read, code point */
4616 1, 0x24,
4617 1, 0x7f,
4618 4, 0x80,
4619 2, 0x1f9,
4620 2, 0x20ac,
4621 2, 0x4e00,
4622 4, 0x9fa6,
4623 4, 0xffff,
4624 4, 0x10000,
4625 4, 0x10ffff
4626#if 0
4627 /* Feature removed. See comment above. */
4628 8, 0x10000
4629#endif
4630 };
4631
4632/* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4633 UErrorCode errorCode=U_ZERO_ERROR;
4634 UConverter *cnv=ucnv_open("gb18030", &errorCode);
4635 if(U_FAILURE(errorCode)) {
4636 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4637 return;
4638 }
4639 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4640 ucnv_close(cnv);
4641}
4642
4643static void
4644TestLMBCS() {
4645 /* LMBCS-1 string */
4646 static const uint8_t pszLMBCS[]={
4647 0x61,
4648 0x01, 0x29,
4649 0x81,
4650 0xA0,
4651 0x0F, 0x27,
4652 0x0F, 0x91,
4653 0x14, 0x0a, 0x74,
4654 0x14, 0xF6, 0x02,
4655 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4656 0x10, 0x88, 0xA0,
4657 };
4658
4659 /* Unicode UChar32 equivalents */
4660 static const UChar32 pszUnicode32[]={
4661 /* code point */
4662 0x00000061,
4663 0x00002013,
4664 0x000000FC,
4665 0x000000E1,
4666 0x00000007,
4667 0x00000091,
4668 0x00000a74,
4669 0x00000200,
4670 0x00023456, /* code point for surrogate pair */
4671 0x00005516
4672 };
4673
4674/* Unicode UChar equivalents */
4675 static const UChar pszUnicode[]={
4676 /* code point */
4677 0x0061,
4678 0x2013,
4679 0x00FC,
4680 0x00E1,
4681 0x0007,
4682 0x0091,
4683 0x0a74,
4684 0x0200,
4685 0xD84D, /* low surrogate */
4686 0xDC56, /* high surrogate */
4687 0x5516
4688 };
4689
4690/* expected test results */
4691 static const int offsets32[]={
4692 /* number of bytes read, code point */
4693 0,
4694 1,
4695 3,
4696 4,
4697 5,
4698 7,
4699 9,
4700 12,
4701 15,
4702 21,
4703 24
4704 };
4705
4706/* expected test results */
4707 static const int offsets[]={
4708 /* number of bytes read, code point */
4709 0,
4710 1,
4711 3,
4712 4,
4713 5,
4714 7,
4715 9,
4716 12,
4717 15,
4718 18,
4719 21,
4720 24
4721 };
4722
4723
4724 UConverter *cnv;
4725
4726#define NAME_LMBCS_1 "LMBCS-1"
4727#define NAME_LMBCS_2 "LMBCS-2"
4728
4729
4730 /* Some basic open/close/property tests on some LMBCS converters */
4731 {
4732
4733 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */
4734 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/
4735 char get_subchars [1];
4736 const char * get_name;
4737 UConverter *cnv1;
4738 UConverter *cnv2;
4739
4740 int8_t len = sizeof(get_subchars);
4741
4742 UErrorCode errorCode=U_ZERO_ERROR;
4743
4744 /* Open */
4745 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4746 if(U_FAILURE(errorCode)) {
4747 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4748 return;
4749 }
4750 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4751 if(U_FAILURE(errorCode)) {
4752 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4753 return;
4754 }
4755
4756 /* Name */
4757 get_name = ucnv_getName (cnv1, &errorCode);
4758 if (strcmp(NAME_LMBCS_1,get_name)){
4759 log_err("Unexpected converter name: %s\n", get_name);
4760 }
4761 get_name = ucnv_getName (cnv2, &errorCode);
4762 if (strcmp(NAME_LMBCS_2,get_name)){
4763 log_err("Unexpected converter name: %s\n", get_name);
4764 }
4765
4766 /* substitution chars */
4767 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4768 if(U_FAILURE(errorCode)) {
4769 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4770 }
4771 if (len!=1){
4772 log_err("Unexpected length of sub chars\n");
4773 }
4774 if (get_subchars[0] != expected_subchars[0]){
4775 log_err("Unexpected value of sub chars\n");
4776 }
4777 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4778 if(U_FAILURE(errorCode)) {
4779 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4780 }
4781 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4782 if(U_FAILURE(errorCode)) {
4783 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4784 }
4785 if (len!=1){
4786 log_err("Unexpected length of sub chars\n");
4787 }
4788 if (get_subchars[0] != new_subchars[0]){
4789 log_err("Unexpected value of sub chars\n");
4790 }
4791 ucnv_close(cnv1);
4792 ucnv_close(cnv2);
4793
4794 }
4795
4796 /* LMBCS to Unicode - offsets */
4797 {
4798 UErrorCode errorCode=U_ZERO_ERROR;
4799
73c04bcf
A
4800 const char * pSource = (const char *)pszLMBCS;
4801 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
b75a7d8f
A
4802
4803 UChar Out [sizeof(pszUnicode) + 1];
4804 UChar * pOut = Out;
4805 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
4806
4807 int32_t off [sizeof(offsets)];
4808
4809 /* last 'offset' in expected results is just the final size.
4810 (Makes other tests easier). Compensate here: */
4811
4812 off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS);
4813
4814
4815
4816 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4817 if(U_FAILURE(errorCode)) {
4818 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4819 return;
4820 }
4821
4822
4823
4824 ucnv_toUnicode (cnv,
4825 &pOut,
4826 OutLimit,
73c04bcf
A
4827 &pSource,
4828 sourceLimit,
b75a7d8f
A
4829 off,
4830 TRUE,
4831 &errorCode);
4832
4833
4834 if (memcmp(off,offsets,sizeof(offsets)))
4835 {
4836 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4837 }
4838 if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4839 {
4840 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4841 }
4842 ucnv_close(cnv);
4843 }
4844 {
4845 /* LMBCS to Unicode - getNextUChar */
4846 const char * sourceStart;
4847 const char *source=(const char *)pszLMBCS;
4848 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4849 const UChar32 *results= pszUnicode32;
4850 const int *off = offsets32;
4851
4852 UErrorCode errorCode=U_ZERO_ERROR;
4853 UChar32 uniChar;
4854
4855 cnv=ucnv_open("LMBCS-1", &errorCode);
4856 if(U_FAILURE(errorCode)) {
4857 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4858 return;
4859 }
4860 else
4861 {
4862
4863 while(source<limit) {
4864 sourceStart=source;
4865 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
4866 if(U_FAILURE(errorCode)) {
4867 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
4868 break;
4869 } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
4870 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4871 uniChar, (source-sourceStart), *results, *off);
4872 break;
4873 }
4874 results++;
4875 off++;
4876 }
4877 }
4878 ucnv_close(cnv);
4879 }
4880 { /* test locale & optimization group operations: Unicode to LMBCS */
4881
4882 UErrorCode errorCode=U_ZERO_ERROR;
4883 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
4884 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
4885 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
4886 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
4887 const UChar * pUniOut = uniString;
4888 UChar * pUniIn = uniString;
4889 uint8_t lmbcsString [4];
73c04bcf
A
4890 const char * pLMBCSOut = (const char *)lmbcsString;
4891 char * pLMBCSIn = (char *)lmbcsString;
b75a7d8f
A
4892
4893 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
4894 ucnv_fromUnicode (cnv16he,
73c04bcf 4895 &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
b75a7d8f
A
4896 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
4897 NULL, 1, &errorCode);
4898
4899 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
4900 {
4901 log_err("LMBCS-16,locale=he gives unexpected translation\n");
4902 }
4903
73c04bcf 4904 pLMBCSIn= (char *)lmbcsString;
b75a7d8f
A
4905 pUniOut = uniString;
4906 ucnv_fromUnicode (cnv01us,
73c04bcf 4907 &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
b75a7d8f
A
4908 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
4909 NULL, 1, &errorCode);
4910
4911 if (lmbcsString[0] != 0x9F)
4912 {
4913 log_err("LMBCS-1,locale=US gives unexpected translation\n");
4914 }
4915
4916 /* single byte char from mbcs char set */
4917 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */
73c04bcf 4918 pLMBCSOut = (const char *)lmbcsString;
b75a7d8f
A
4919 pUniIn = uniString;
4920 ucnv_toUnicode (cnv16jp,
4921 &pUniIn, pUniIn + 1,
73c04bcf 4922 &pLMBCSOut, (pLMBCSOut + 1),
b75a7d8f 4923 NULL, 1, &errorCode);
73c04bcf 4924 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
b75a7d8f
A
4925 {
4926 log_err("Unexpected results from LMBCS-16 single byte char\n");
4927 }
4928 /* convert to group 1: should be 3 bytes */
73c04bcf 4929 pLMBCSIn = (char *)lmbcsString;
b75a7d8f
A
4930 pUniOut = uniString;
4931 ucnv_fromUnicode (cnv01us,
73c04bcf 4932 &pLMBCSIn, (const char *)(pLMBCSIn + 3),
b75a7d8f
A
4933 &pUniOut, pUniOut + 1,
4934 NULL, 1, &errorCode);
73c04bcf 4935 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
b75a7d8f
A
4936 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
4937 {
4938 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
4939 }
73c04bcf 4940 pLMBCSOut = (const char *)lmbcsString;
b75a7d8f
A
4941 pUniIn = uniString;
4942 ucnv_toUnicode (cnv01us,
4943 &pUniIn, pUniIn + 1,
73c04bcf 4944 &pLMBCSOut, (const char *)(pLMBCSOut + 3),
b75a7d8f 4945 NULL, 1, &errorCode);
73c04bcf 4946 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
b75a7d8f
A
4947 {
4948 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
4949 }
73c04bcf 4950 pLMBCSIn = (char *)lmbcsString;
b75a7d8f
A
4951 pUniOut = uniString;
4952 ucnv_fromUnicode (cnv16jp,
73c04bcf 4953 &pLMBCSIn, (const char *)(pLMBCSIn + 1),
b75a7d8f
A
4954 &pUniOut, pUniOut + 1,
4955 NULL, 1, &errorCode);
73c04bcf 4956 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
b75a7d8f
A
4957 {
4958 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
4959 }
4960 ucnv_close(cnv16he);
4961 ucnv_close(cnv16jp);
4962 ucnv_close(cnv01us);
4963 }
4964 {
4965 /* Small source buffer testing, LMBCS -> Unicode */
4966
4967 UErrorCode errorCode=U_ZERO_ERROR;
4968
73c04bcf
A
4969 const char * pSource = (const char *)pszLMBCS;
4970 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
b75a7d8f
A
4971 int codepointCount = 0;
4972
4973 UChar Out [sizeof(pszUnicode) + 1];
4974 UChar * pOut = Out;
4975 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
4976
4977
4978 cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
4979 if(U_FAILURE(errorCode)) {
4980 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4981 return;
4982 }
4983
4984
4985 while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
4986 {
4987 ucnv_toUnicode (cnv,
4988 &pOut,
4989 OutLimit,
73c04bcf
A
4990 &pSource,
4991 (pSource+1), /* claim that this is a 1- byte buffer */
b75a7d8f
A
4992 NULL,
4993 FALSE, /* FALSE means there might be more chars in the next buffer */
4994 &errorCode);
4995
4996 if (U_SUCCESS (errorCode))
4997 {
73c04bcf 4998 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
b75a7d8f
A
4999 {
5000 /* we are on to the next code point: check value */
5001
5002 if (Out[0] != pszUnicode[codepointCount]){
5003 log_err("LMBCS->Uni result %lx should have been %lx \n",
5004 Out[0], pszUnicode[codepointCount]);
5005 }
5006
5007 pOut = Out; /* reset for accumulating next code point */
5008 codepointCount++;
5009 }
5010 }
5011 else
5012 {
5013 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
5014 }
5015 }
5016 {
5017 /* limits & surrogate error testing */
73c04bcf
A
5018 char LIn [sizeof(pszLMBCS)];
5019 const char * pLIn = LIn;
b75a7d8f
A
5020
5021 char LOut [sizeof(pszLMBCS)];
5022 char * pLOut = LOut;
5023
5024 UChar UOut [sizeof(pszUnicode)];
5025 UChar * pUOut = UOut;
5026
5027 UChar UIn [sizeof(pszUnicode)];
5028 const UChar * pUIn = UIn;
5029
5030 int32_t off [sizeof(offsets)];
5031 UChar32 uniChar;
5032
5033 errorCode=U_ZERO_ERROR;
5034
5035 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5036 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn-1,off,FALSE, &errorCode);
5037 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5038 {
5039 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
5040 }
5041 errorCode=U_ZERO_ERROR;
5042 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
5043 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5044 {
5045 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
5046 }
5047 errorCode=U_ZERO_ERROR;
5048
5049 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
5050 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5051 {
5052 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
5053 }
5054 errorCode=U_ZERO_ERROR;
5055
5056 /* 0 byte source request - no error, no pointer movement */
5057 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
5058 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
5059 if(U_FAILURE(errorCode)) {
5060 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
5061 }
5062 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5063 {
5064 log_err("Unexpected pointer move in 0 byte source request \n");
5065 }
5066 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5067 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
374ca955 5068 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
b75a7d8f
A
5069 {
5070 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5071 }
5072 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5073 {
5074 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5075 }
5076 errorCode = U_ZERO_ERROR;
5077
5078 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5079
5080 pUIn = pszUnicode;
5081 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode);
5082 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5083 {
5084 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5085 }
5086
5087 errorCode = U_ZERO_ERROR;
5088
73c04bcf
A
5089 pLIn = (const char *)pszLMBCS;
5090 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
5091 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
b75a7d8f
A
5092 {
5093 log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5094 }
5095
5096 /* unpaired or chopped LMBCS surrogates */
5097
5098 /* OK high surrogate, Low surrogate is chopped */
73c04bcf
A
5099 LIn [0] = (char)0x14;
5100 LIn [1] = (char)0xD8;
5101 LIn [2] = (char)0x01;
5102 LIn [3] = (char)0x14;
5103 LIn [4] = (char)0xDC;
b75a7d8f
A
5104 pLIn = LIn;
5105 errorCode = U_ZERO_ERROR;
5106 pUOut = UOut;
5107
374ca955 5108 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
b75a7d8f
A
5109 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5110 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5111 {
5112 log_err("Unexpected results on chopped low surrogate\n");
5113 }
5114
5115 /* chopped at surrogate boundary */
73c04bcf
A
5116 LIn [0] = (char)0x14;
5117 LIn [1] = (char)0xD8;
5118 LIn [2] = (char)0x01;
b75a7d8f
A
5119 pLIn = LIn;
5120 errorCode = U_ZERO_ERROR;
5121 pUOut = UOut;
5122
5123 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
5124 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5125 {
5126 log_err("Unexpected results on chopped at surrogate boundary \n");
5127 }
5128
5129 /* unpaired surrogate plus valid Unichar */
73c04bcf
A
5130 LIn [0] = (char)0x14;
5131 LIn [1] = (char)0xD8;
5132 LIn [2] = (char)0x01;
5133 LIn [3] = (char)0x14;
5134 LIn [4] = (char)0xC9;
5135 LIn [5] = (char)0xD0;
b75a7d8f
A
5136 pLIn = LIn;
5137 errorCode = U_ZERO_ERROR;
5138 pUOut = UOut;
5139
5140 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
5141 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5142 {
5143 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5144 }
5145
5146 /* unpaired surrogate plus chopped Unichar */
73c04bcf
A
5147 LIn [0] = (char)0x14;
5148 LIn [1] = (char)0xD8;
5149 LIn [2] = (char)0x01;
5150 LIn [3] = (char)0x14;
5151 LIn [4] = (char)0xC9;
b75a7d8f
A
5152
5153 pLIn = LIn;
5154 errorCode = U_ZERO_ERROR;
5155 pUOut = UOut;
5156
5157 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5158 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5159 {
5160 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5161 }
5162
5163 /* unpaired surrogate plus valid non-Unichar */
73c04bcf
A
5164 LIn [0] = (char)0x14;
5165 LIn [1] = (char)0xD8;
5166 LIn [2] = (char)0x01;
5167 LIn [3] = (char)0x0F;
5168 LIn [4] = (char)0x3B;
b75a7d8f
A
5169
5170 pLIn = LIn;
5171 errorCode = U_ZERO_ERROR;
5172 pUOut = UOut;
5173
5174 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5175 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5176 {
5177 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5178 }
5179
5180 /* unpaired surrogate plus chopped non-Unichar */
73c04bcf
A
5181 LIn [0] = (char)0x14;
5182 LIn [1] = (char)0xD8;
5183 LIn [2] = (char)0x01;
5184 LIn [3] = (char)0x0F;
b75a7d8f
A
5185
5186 pLIn = LIn;
5187 errorCode = U_ZERO_ERROR;
5188 pUOut = UOut;
5189
5190 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
5191
5192 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5193 {
5194 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5195 }
5196 }
5197 }
5198 ucnv_close(cnv); /* final cleanup */
5199}
5200
5201
5202static void TestJitterbug255()
5203{
73c04bcf
A
5204 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5205 const char *testBuffer = (const char *)testBytes;
5206 const char *testEnd = (const char *)testBytes + sizeof(testBytes);
b75a7d8f
A
5207 UErrorCode status = U_ZERO_ERROR;
5208 UChar32 result;
5209 UConverter *cnv = 0;
5210
5211 cnv = ucnv_open("shift-jis", &status);
5212 if (U_FAILURE(status) || cnv == 0) {
5213 log_data_err("Failed to open the converter for SJIS.\n");
5214 return;
5215 }
5216 while (testBuffer != testEnd)
5217 {
73c04bcf 5218 result = ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
b75a7d8f
A
5219 if (U_FAILURE(status))
5220 {
5221 log_err("Failed to convert the next UChar for SJIS.\n");
5222 break;
5223 }
5224 }
5225 ucnv_close(cnv);
5226}
5227
5228static void TestEBCDICUS4XML()
5229{
5230 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5231 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5232 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5233 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5234 char target_x[] = {0x00, 0x00, 0x00, 0x00};
5235 UChar *unicodes = unicodes_x;
5236 const UChar *toUnicodeMaps = toUnicodeMaps_x;
5237 char *target = target_x;
5238 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5239 UErrorCode status = U_ZERO_ERROR;
5240 UConverter *cnv = 0;
5241
5242 cnv = ucnv_open("ebcdic-xml-us", &status);
5243 if (U_FAILURE(status) || cnv == 0) {
5244 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5245 return;
5246 }
5247 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5248 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5249 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5250 u_errorName(status));
5251 printUSeqErr(unicodes_x, 3);
5252 printUSeqErr(toUnicodeMaps, 3);
5253 }
5254 status = U_ZERO_ERROR;
5255 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5256 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5257 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5258 u_errorName(status));
5259 printSeqErr((const unsigned char*)target_x, 3);
5260 printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5261 }
5262 ucnv_close(cnv);
5263}
73c04bcf 5264#endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
b75a7d8f
A
5265
5266#if !UCONFIG_NO_COLLATION
5267
5268static void TestJitterbug981(){
374ca955
A
5269 const UChar* rules;
5270 int32_t rules_length, target_cap, bytes_needed, buff_size;
5271 UErrorCode status = U_ZERO_ERROR;
5272 UConverter *utf8cnv;
5273 UCollator* myCollator;
5274 char *buff;
5275 int numNeeded=0;
5276 utf8cnv = ucnv_open ("utf8", &status);
5277 if(U_FAILURE(status)){
5278 log_err("Could not open UTF-8 converter. Error: %s", u_errorName(status));
5279 return;
5280 }
5281 myCollator = ucol_open("zh", &status);
5282 if(U_FAILURE(status)){
5283 log_err("Could not open collator for zh locale. Error: %s", u_errorName(status));
5284 return;
5285 }
b75a7d8f 5286
374ca955
A
5287 rules = ucol_getRules(myCollator, &rules_length);
5288 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5289 buff = malloc(buff_size);
b75a7d8f 5290
374ca955
A
5291 target_cap = 0;
5292 do {
5293 ucnv_reset(utf8cnv);
5294 status = U_ZERO_ERROR;
5295 if(target_cap >= buff_size) {
5296 log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5297 return;
5298 }
5299 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5300 rules, rules_length, &status);
5301 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5302 if(numNeeded!=0 && numNeeded!= bytes_needed){
5303 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5304 }
5305 numNeeded = bytes_needed;
5306 } while (status == U_BUFFER_OVERFLOW_ERROR);
5307 ucol_close(myCollator);
5308 ucnv_close(utf8cnv);
5309 free(buff);
b75a7d8f
A
5310}
5311
5312#endif
5313
5314static void TestJitterbug1293(){
73c04bcf 5315 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
b75a7d8f
A
5316 char target[256];
5317 UErrorCode status = U_ZERO_ERROR;
5318 UConverter* conv=NULL;
5319 int32_t target_cap, bytes_needed, numNeeded = 0;
5320 conv = ucnv_open("shift-jis",&status);
5321 if(U_FAILURE(status)){
5322 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5323 return;
5324 }
5325
5326 do{
5327 target_cap =0;
5328 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5329 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5330 if(numNeeded!=0 && numNeeded!= bytes_needed){
5331 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5332 }
5333 numNeeded = bytes_needed;
5334 } while (status == U_BUFFER_OVERFLOW_ERROR);
5335 if(U_FAILURE(status)){
5336 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
5337 return;
5338 }
5339 ucnv_close(conv);
5340}
5341