]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/test/cintltst/nucnvtst.c
ICU-461.18.tar.gz
[apple/icu.git] / icuSources / test / cintltst / nucnvtst.c
... / ...
CommitLineData
1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2010, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*******************************************************************************
7*
8* File CCONVTST.C
9*
10* Modification History:
11* Name Description
12* Steven R. Loomis 7/8/1999 Adding input buffer test
13********************************************************************************
14*/
15#include <stdio.h>
16#include "cstring.h"
17#include "unicode/uloc.h"
18#include "unicode/ucnv.h"
19#include "unicode/ucnv_err.h"
20#include "unicode/ucnv_cb.h"
21#include "cintltst.h"
22#include "unicode/utypes.h"
23#include "unicode/ustring.h"
24#include "unicode/ucol.h"
25#include "cmemory.h"
26#include "nucnvtst.h"
27
28static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
29static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
30#if !UCONFIG_NO_COLLATION
31static void TestJitterbug981(void);
32#endif
33static void TestJitterbug1293(void);
34static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
35static void TestConverterTypesAndStarters(void);
36static void TestAmbiguous(void);
37static void TestSignatureDetection(void);
38static void TestUTF7(void);
39static void TestIMAP(void);
40static void TestUTF8(void);
41static void TestCESU8(void);
42static void TestUTF16(void);
43static void TestUTF16BE(void);
44static void TestUTF16LE(void);
45static void TestUTF32(void);
46static void TestUTF32BE(void);
47static void TestUTF32LE(void);
48static void TestLATIN1(void);
49
50#if !UCONFIG_NO_LEGACY_CONVERSION
51static void TestSBCS(void);
52static void TestDBCS(void);
53static void TestMBCS(void);
54#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
55static void TestICCRunout(void);
56#endif
57
58#ifdef U_ENABLE_GENERIC_ISO_2022
59static void TestISO_2022(void);
60#endif
61
62static void TestISO_2022_JP(void);
63static void TestISO_2022_JP_1(void);
64static void TestISO_2022_JP_2(void);
65static void TestISO_2022_KR(void);
66static void TestISO_2022_KR_1(void);
67static void TestISO_2022_CN(void);
68#if 0
69 /*
70 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
71 */
72static void TestISO_2022_CN_EXT(void);
73#endif
74static void TestJIS(void);
75static void TestHZ(void);
76#endif
77
78static void TestSCSU(void);
79
80#if !UCONFIG_NO_LEGACY_CONVERSION
81static void TestEBCDIC_STATEFUL(void);
82static void TestGB18030(void);
83static void TestLMBCS(void);
84static void TestJitterbug255(void);
85static void TestEBCDICUS4XML(void);
86#if 0
87 /*
88 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
89 */
90static void TestJitterbug915(void);
91#endif
92static void TestISCII(void);
93
94static void TestCoverageMBCS(void);
95static void TestJitterbug2346(void);
96static void TestJitterbug2411(void);
97static void TestJB5275(void);
98static void TestJB5275_1(void);
99static void TestJitterbug6175(void);
100#endif
101
102static void TestInBufSizes(void);
103
104static void TestRoundTrippingAllUTF(void);
105static void TestConv(const uint16_t in[],
106 int len,
107 const char* conv,
108 const char* lang,
109 char byteArr[],
110 int byteArrLen);
111
112/* open a converter, using test data if it begins with '@' */
113static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
114
115
116#define NEW_MAX_BUFFER 999
117
118static int32_t gInBufferSize = NEW_MAX_BUFFER;
119static int32_t gOutBufferSize = NEW_MAX_BUFFER;
120static char gNuConvTestName[1024];
121
122#define nct_min(x,y) ((x<y) ? x : y)
123
124static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
125{
126 if(cnv && cnv[0] == '@') {
127 return ucnv_openPackage(loadTestData(err), cnv+1, err);
128 } else {
129 return ucnv_open(cnv, err);
130 }
131}
132
133static void printSeq(const unsigned char* a, int len)
134{
135 int i=0;
136 log_verbose("{");
137 while (i<len)
138 log_verbose("0x%02x ", a[i++]);
139 log_verbose("}\n");
140}
141
142static void printUSeq(const UChar* a, int len)
143{
144 int i=0;
145 log_verbose("{U+");
146 while (i<len) log_verbose("0x%04x ", a[i++]);
147 log_verbose("}\n");
148}
149
150static void printSeqErr(const unsigned char* a, int len)
151{
152 int i=0;
153 fprintf(stderr, "{");
154 while (i<len)
155 fprintf(stderr, "0x%02x ", a[i++]);
156 fprintf(stderr, "}\n");
157}
158
159static void printUSeqErr(const UChar* a, int len)
160{
161 int i=0;
162 fprintf(stderr, "{U+");
163 while (i<len)
164 fprintf(stderr, "0x%04x ", a[i++]);
165 fprintf(stderr,"}\n");
166}
167
168static void
169TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
170{
171 const char* s0;
172 const char* s=(char*)source;
173 const int32_t *r=results;
174 UErrorCode errorCode=U_ZERO_ERROR;
175 UChar32 c;
176
177 while(s<limit) {
178 s0=s;
179 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
180 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
181 break; /* no more significant input */
182 } else if(U_FAILURE(errorCode)) {
183 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
184 break;
185 } else if(
186 /* test the expected number of input bytes only if >=0 */
187 (*r>=0 && (int32_t)(s-s0)!=*r) ||
188 c!=*(r+1)
189 ) {
190 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
191 message, c, (s-s0), *(r+1), *r);
192 break;
193 }
194 r+=2;
195 }
196}
197
198static void
199TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
200{
201 const char* s=(char*)source;
202 UErrorCode errorCode=U_ZERO_ERROR;
203 uint32_t c;
204 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
205 if(errorCode != expected){
206 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
207 }
208 if(c != 0xFFFD && c != 0xffff){
209 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
210 }
211
212}
213
214static void TestInBufSizes(void)
215{
216 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
217#if 1
218 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
219 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
220 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
221 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
222 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
223 TestNewConvertWithBufferSizes(1,1);
224 TestNewConvertWithBufferSizes(2,3);
225 TestNewConvertWithBufferSizes(3,2);
226#endif
227}
228
229static void TestOutBufSizes(void)
230{
231#if 1
232 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
233 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
234 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
235 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
236 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
237 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
238
239#endif
240}
241
242
243void addTestNewConvert(TestNode** root)
244{
245#if !UCONFIG_NO_FILE_IO
246 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
247 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
248#endif
249 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
250 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
251 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
252 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
253 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
254 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
255
256 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
257 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
258 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
259 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
260 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
261 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
262 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
263 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
264
265#if !UCONFIG_NO_LEGACY_CONVERSION
266 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
267#endif
268
269 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
270
271#if !UCONFIG_NO_LEGACY_CONVERSION
272 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
273#if !UCONFIG_NO_FILE_IO
274 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
275 addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout");
276#endif
277 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
278
279#ifdef U_ENABLE_GENERIC_ISO_2022
280 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
281#endif
282
283 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
284 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
285 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
286 addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
287 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
288 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
289 addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
290 /*
291 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
292 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
293 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
294 */
295 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
296#endif
297
298 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
299
300#if !UCONFIG_NO_LEGACY_CONVERSION
301 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
302 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
303 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
304 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
305 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
306 addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275");
307 addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1");
308#if !UCONFIG_NO_COLLATION
309 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
310#endif
311
312 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
313#endif
314
315
316#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
317 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
318#endif
319
320 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
321
322#if !UCONFIG_NO_LEGACY_CONVERSION
323 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
324 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
325 addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
326#endif
327
328}
329
330
331/* Note that this test already makes use of statics, so it's not really
332 multithread safe.
333 This convenience function lets us make the error messages actually useful.
334*/
335
336static void setNuConvTestName(const char *codepage, const char *direction)
337{
338 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
339 codepage,
340 direction,
341 (int)gInBufferSize,
342 (int)gOutBufferSize);
343}
344
345typedef enum
346{
347 TC_OK = 0, /* test was OK */
348 TC_MISMATCH = 1, /* Match failed - err was printed */
349 TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */
350} ETestConvertResult;
351
352/* Note: This function uses global variables and it will not do offset
353checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
354static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
355 const char *codepage, const int32_t *expectOffsets , UBool useFallback)
356{
357 UErrorCode status = U_ZERO_ERROR;
358 UConverter *conv = 0;
359 char junkout[NEW_MAX_BUFFER]; /* FIX */
360 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
361 char *p;
362 const UChar *src;
363 char *end;
364 char *targ;
365 int32_t *offs;
366 int i;
367 int32_t realBufferSize;
368 char *realBufferEnd;
369 const UChar *realSourceEnd;
370 const UChar *sourceLimit;
371 UBool checkOffsets = TRUE;
372 UBool doFlush;
373
374 for(i=0;i<NEW_MAX_BUFFER;i++)
375 junkout[i] = (char)0xF0;
376 for(i=0;i<NEW_MAX_BUFFER;i++)
377 junokout[i] = 0xFF;
378
379 setNuConvTestName(codepage, "FROM");
380
381 log_verbose("\n========= %s\n", gNuConvTestName);
382
383 conv = my_ucnv_open(codepage, &status);
384
385 if(U_FAILURE(status))
386 {
387 log_data_err("Couldn't open converter %s\n",codepage);
388 return TC_FAIL;
389 }
390 if(useFallback){
391 ucnv_setFallback(conv,useFallback);
392 }
393
394 log_verbose("Converter opened..\n");
395
396 src = source;
397 targ = junkout;
398 offs = junokout;
399
400 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
401 realBufferEnd = junkout + realBufferSize;
402 realSourceEnd = source + sourceLen;
403
404 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
405 checkOffsets = FALSE;
406
407 do
408 {
409 end = nct_min(targ + gOutBufferSize, realBufferEnd);
410 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
411
412 doFlush = (UBool)(sourceLimit == realSourceEnd);
413
414 if(targ == realBufferEnd) {
415 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
416 return TC_FAIL;
417 }
418 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
419
420
421 status = U_ZERO_ERROR;
422
423 ucnv_fromUnicode (conv,
424 &targ,
425 end,
426 &src,
427 sourceLimit,
428 checkOffsets ? offs : NULL,
429 doFlush, /* flush if we're at the end of the input data */
430 &status);
431 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
432
433 if(U_FAILURE(status)) {
434 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
435 return TC_FAIL;
436 }
437
438 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
439 sourceLen, targ-junkout);
440
441 if(getTestOption(VERBOSITY_OPTION))
442 {
443 char junk[9999];
444 char offset_str[9999];
445 char *ptr;
446
447 junk[0] = 0;
448 offset_str[0] = 0;
449 for(ptr = junkout;ptr<targ;ptr++) {
450 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
451 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
452 }
453
454 log_verbose(junk);
455 printSeq((const uint8_t *)expect, expectLen);
456 if ( checkOffsets ) {
457 log_verbose("\nOffsets:");
458 log_verbose(offset_str);
459 }
460 log_verbose("\n");
461 }
462 ucnv_close(conv);
463
464 if(expectLen != targ-junkout) {
465 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
466 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
467 printf("\nGot:");
468 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
469 printf("\nExpected:");
470 printSeqErr((const unsigned char*)expect, expectLen);
471 return TC_MISMATCH;
472 }
473
474 if (checkOffsets && (expectOffsets != 0) ) {
475 log_verbose("comparing %d offsets..\n", targ-junkout);
476 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
477 log_err("did not get the expected offsets. %s\n", gNuConvTestName);
478 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
479 log_err("\n");
480 log_err("Got : ");
481 for(p=junkout;p<targ;p++) {
482 log_err("%d,", junokout[p-junkout]);
483 }
484 log_err("\n");
485 log_err("Expected: ");
486 for(i=0; i<(targ-junkout); i++) {
487 log_err("%d,", expectOffsets[i]);
488 }
489 log_err("\n");
490 }
491 }
492
493 log_verbose("comparing..\n");
494 if(!memcmp(junkout, expect, expectLen)) {
495 log_verbose("Matches!\n");
496 return TC_OK;
497 } else {
498 log_err("String does not match u->%s\n", gNuConvTestName);
499 printUSeqErr(source, sourceLen);
500 printf("\nGot:");
501 printSeqErr((const unsigned char *)junkout, expectLen);
502 printf("\nExpected:");
503 printSeqErr((const unsigned char *)expect, expectLen);
504
505 return TC_MISMATCH;
506 }
507}
508
509/* Note: This function uses global variables and it will not do offset
510checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
511static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
512 const char *codepage, const int32_t *expectOffsets, UBool useFallback)
513{
514 UErrorCode status = U_ZERO_ERROR;
515 UConverter *conv = 0;
516 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
517 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
518 const char *src;
519 const char *realSourceEnd;
520 const char *srcLimit;
521 UChar *p;
522 UChar *targ;
523 UChar *end;
524 int32_t *offs;
525 int i;
526 UBool checkOffsets = TRUE;
527
528 int32_t realBufferSize;
529 UChar *realBufferEnd;
530
531
532 for(i=0;i<NEW_MAX_BUFFER;i++)
533 junkout[i] = 0xFFFE;
534
535 for(i=0;i<NEW_MAX_BUFFER;i++)
536 junokout[i] = -1;
537
538 setNuConvTestName(codepage, "TO");
539
540 log_verbose("\n========= %s\n", gNuConvTestName);
541
542 conv = my_ucnv_open(codepage, &status);
543
544 if(U_FAILURE(status))
545 {
546 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
547 return TC_FAIL;
548 }
549 if(useFallback){
550 ucnv_setFallback(conv,useFallback);
551 }
552 log_verbose("Converter opened..\n");
553
554 src = (const char *)source;
555 targ = junkout;
556 offs = junokout;
557
558 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
559 realBufferEnd = junkout + realBufferSize;
560 realSourceEnd = src + sourcelen;
561
562 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
563 checkOffsets = FALSE;
564
565 do
566 {
567 end = nct_min( targ + gOutBufferSize, realBufferEnd);
568 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
569
570 if(targ == realBufferEnd)
571 {
572 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
573 return TC_FAIL;
574 }
575 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
576
577 /* oldTarg = targ; */
578
579 status = U_ZERO_ERROR;
580
581 ucnv_toUnicode (conv,
582 &targ,
583 end,
584 &src,
585 srcLimit,
586 checkOffsets ? offs : NULL,
587 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
588 &status);
589
590 /* offs += (targ-oldTarg); */
591
592 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
593
594 if(U_FAILURE(status))
595 {
596 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
597 return TC_FAIL;
598 }
599
600 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
601 sourcelen, targ-junkout);
602 if(getTestOption(VERBOSITY_OPTION))
603 {
604 char junk[9999];
605 char offset_str[9999];
606 UChar *ptr;
607
608 junk[0] = 0;
609 offset_str[0] = 0;
610
611 for(ptr = junkout;ptr<targ;ptr++)
612 {
613 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
614 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
615 }
616
617 log_verbose(junk);
618 printUSeq(expect, expectlen);
619 if ( checkOffsets )
620 {
621 log_verbose("\nOffsets:");
622 log_verbose(offset_str);
623 }
624 log_verbose("\n");
625 }
626 ucnv_close(conv);
627
628 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
629
630 if (checkOffsets && (expectOffsets != 0))
631 {
632 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
633 log_err("did not get the expected offsets. %s\n",gNuConvTestName);
634 log_err("Got: ");
635 for(p=junkout;p<targ;p++) {
636 log_err("%d,", junokout[p-junkout]);
637 }
638 log_err("\n");
639 log_err("Expected: ");
640 for(i=0; i<(targ-junkout); i++) {
641 log_err("%d,", expectOffsets[i]);
642 }
643 log_err("\n");
644 log_err("output: ");
645 for(i=0; i<(targ-junkout); i++) {
646 log_err("%X,", junkout[i]);
647 }
648 log_err("\n");
649 log_err("input: ");
650 for(i=0; i<(src-(const char *)source); i++) {
651 log_err("%X,", (unsigned char)source[i]);
652 }
653 log_err("\n");
654 }
655 }
656
657 if(!memcmp(junkout, expect, expectlen*2))
658 {
659 log_verbose("Matches!\n");
660 return TC_OK;
661 }
662 else
663 {
664 log_err("String does not match. %s\n", gNuConvTestName);
665 log_verbose("String does not match. %s\n", gNuConvTestName);
666 printf("\nGot:");
667 printUSeqErr(junkout, expectlen);
668 printf("\nExpected:");
669 printUSeqErr(expect, expectlen);
670 return TC_MISMATCH;
671 }
672}
673
674
675static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
676{
677/** test chars #1 */
678 /* 1 2 3 1Han 2Han 3Han . */
679 static const UChar sampleText[] =
680 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
681 static const UChar sampleTextRoundTripUnmappable[] =
682 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
683
684
685 static const uint8_t expectedUTF8[] =
686 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
687 static const int32_t toUTF8Offs[] =
688 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
689 static const int32_t fmUTF8Offs[] =
690 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
691
692#ifdef U_ENABLE_GENERIC_ISO_2022
693 /* Same as UTF8, but with ^[%B preceeding */
694 static const const uint8_t expectedISO2022[] =
695 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
696 static const int32_t toISO2022Offs[] =
697 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
698 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
699 static const int32_t fmISO2022Offs[] =
700 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
701#endif
702
703 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
704 static const uint8_t expectedIBM930[] =
705 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
706 static const int32_t toIBM930Offs[] =
707 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
708 static const int32_t fmIBM930Offs[] =
709 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
710
711 /* 1 2 3 0 h1 h2 h3 . MBCS*/
712 static const uint8_t expectedIBM943[] =
713 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
714 static const int32_t toIBM943Offs [] =
715 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
716 static const int32_t fmIBM943Offs[] =
717 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
718
719 /* 1 2 3 0 h1 h2 h3 . DBCS*/
720 static const uint8_t expectedIBM9027[] =
721 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
722 static const int32_t toIBM9027Offs [] =
723 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
724
725 /* 1 2 3 0 <?> <?> <?> . SBCS*/
726 static const uint8_t expectedIBM920[] =
727 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
728 static const int32_t toIBM920Offs [] =
729 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
730
731 /* 1 2 3 0 <?> <?> <?> . SBCS*/
732 static const uint8_t expectedISO88593[] =
733 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
734 static const int32_t toISO88593Offs[] =
735 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
736
737 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
738 static const uint8_t expectedLATIN1[] =
739 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
740 static const int32_t toLATIN1Offs[] =
741 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
742
743
744 /* etc */
745 static const uint8_t expectedUTF16BE[] =
746 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
747 static const int32_t toUTF16BEOffs[]=
748 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
749 static const int32_t fmUTF16BEOffs[] =
750 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
751
752 static const uint8_t expectedUTF16LE[] =
753 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
754 static const int32_t toUTF16LEOffs[]=
755 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
756 static const int32_t fmUTF16LEOffs[] =
757 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
758
759 static const uint8_t expectedUTF32BE[] =
760 { 0x00, 0x00, 0x00, 0x31,
761 0x00, 0x00, 0x00, 0x32,
762 0x00, 0x00, 0x00, 0x33,
763 0x00, 0x00, 0x00, 0x00,
764 0x00, 0x00, 0x4e, 0x00,
765 0x00, 0x00, 0x4e, 0x8c,
766 0x00, 0x00, 0x4e, 0x09,
767 0x00, 0x00, 0x00, 0x2e,
768 0x00, 0x02, 0x00, 0x21 };
769 static const int32_t toUTF32BEOffs[]=
770 { 0x00, 0x00, 0x00, 0x00,
771 0x01, 0x01, 0x01, 0x01,
772 0x02, 0x02, 0x02, 0x02,
773 0x03, 0x03, 0x03, 0x03,
774 0x04, 0x04, 0x04, 0x04,
775 0x05, 0x05, 0x05, 0x05,
776 0x06, 0x06, 0x06, 0x06,
777 0x07, 0x07, 0x07, 0x07,
778 0x08, 0x08, 0x08, 0x08,
779 0x08, 0x08, 0x08, 0x08 };
780 static const int32_t fmUTF32BEOffs[] =
781 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
782
783 static const uint8_t expectedUTF32LE[] =
784 { 0x31, 0x00, 0x00, 0x00,
785 0x32, 0x00, 0x00, 0x00,
786 0x33, 0x00, 0x00, 0x00,
787 0x00, 0x00, 0x00, 0x00,
788 0x00, 0x4e, 0x00, 0x00,
789 0x8c, 0x4e, 0x00, 0x00,
790 0x09, 0x4e, 0x00, 0x00,
791 0x2e, 0x00, 0x00, 0x00,
792 0x21, 0x00, 0x02, 0x00 };
793 static const int32_t toUTF32LEOffs[]=
794 { 0x00, 0x00, 0x00, 0x00,
795 0x01, 0x01, 0x01, 0x01,
796 0x02, 0x02, 0x02, 0x02,
797 0x03, 0x03, 0x03, 0x03,
798 0x04, 0x04, 0x04, 0x04,
799 0x05, 0x05, 0x05, 0x05,
800 0x06, 0x06, 0x06, 0x06,
801 0x07, 0x07, 0x07, 0x07,
802 0x08, 0x08, 0x08, 0x08,
803 0x08, 0x08, 0x08, 0x08 };
804 static const int32_t fmUTF32LEOffs[] =
805 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
806
807
808
809
810/** Test chars #2 **/
811
812 /* Sahha [health], slashed h's */
813 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
814 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
815
816 /* LMBCS */
817 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
818 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
819 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
820 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
821 /*********************************** START OF CODE finally *************/
822
823 gInBufferSize = insize;
824 gOutBufferSize = outsize;
825
826 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
827
828
829 /*UTF-8*/
830 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
831 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
832
833 log_verbose("Test surrogate behaviour for UTF8\n");
834 {
835 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
836 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
837 0xf0, 0x90, 0x90, 0x81,
838 0xef, 0xbf, 0xbd
839 };
840 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
841 testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]),
842 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
843
844
845 }
846
847#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
848 /*ISO-2022*/
849 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
850 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
851#endif
852
853 /*UTF16 LE*/
854 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
855 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
856 /*UTF16 BE*/
857 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
858 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
859 /*UTF32 LE*/
860 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
861 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
862 /*UTF32 BE*/
863 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
864 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
865
866 /*LATIN_1*/
867 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
868 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
869
870#if !UCONFIG_NO_LEGACY_CONVERSION
871 /*EBCDIC_STATEFUL*/
872 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
873 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
874
875 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
876 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
877
878 /*MBCS*/
879
880 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
881 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
882 /*DBCS*/
883 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
884 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
885 /*SBCS*/
886 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
887 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
888 /*SBCS*/
889 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
890 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
891#endif
892
893
894/****/
895
896 /*UTF-8*/
897 testConvertToU(expectedUTF8, sizeof(expectedUTF8),
898 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE);
899#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
900 /*ISO-2022*/
901 testConvertToU(expectedISO2022, sizeof(expectedISO2022),
902 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE);
903#endif
904
905 /*UTF16 LE*/
906 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
907 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
908 /*UTF16 BE*/
909 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
910 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE);
911 /*UTF32 LE*/
912 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
913 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE);
914 /*UTF32 BE*/
915 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
916 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE);
917
918#if !UCONFIG_NO_LEGACY_CONVERSION
919 /*EBCDIC_STATEFUL*/
920 testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable,
921 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-930", fmIBM930Offs,FALSE);
922 /*MBCS*/
923 testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable,
924 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-943", fmIBM943Offs,FALSE);
925#endif
926
927 /* Try it again to make sure it still works */
928 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
929 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
930
931#if !UCONFIG_NO_LEGACY_CONVERSION
932 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
933 malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE);
934
935 testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]),
936 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
937
938 /*LMBCS*/
939 testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]),
940 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
941 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
942 LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE);
943#endif
944
945 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
946 {
947 /* encode directly set D and set O */
948 static const uint8_t utf7[] = {
949 /*
950 Hi Mom -+Jjo--!
951 A+ImIDkQ.
952 +-
953 +ZeVnLIqe
954 */
955 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
956 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
957 0x2b, 0x2d,
958 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
959 };
960 static const UChar unicode[] = {
961 /*
962 Hi Mom -<WHITE SMILING FACE>-!
963 A<NOT IDENTICAL TO><ALPHA>.
964 +
965 [Japanese word "nihongo"]
966 */
967 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
968 0x41, 0x2262, 0x0391, 0x2e,
969 0x2b,
970 0x65e5, 0x672c, 0x8a9e
971 };
972 static const int32_t toUnicodeOffsets[] = {
973 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
974 15, 17, 19, 23,
975 24,
976 27, 29, 32
977 };
978 static const int32_t fromUnicodeOffsets[] = {
979 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
980 11, 12, 12, 12, 13, 13, 13, 13, 14,
981 15, 15,
982 16, 16, 16, 17, 17, 17, 18, 18, 18
983 };
984
985 /* same but escaping set O (the exclamation mark) */
986 static const uint8_t utf7Restricted[] = {
987 /*
988 Hi Mom -+Jjo--+ACE-
989 A+ImIDkQ.
990 +-
991 +ZeVnLIqe
992 */
993 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
994 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
995 0x2b, 0x2d,
996 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
997 };
998 static const int32_t toUnicodeOffsetsR[] = {
999 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1000 19, 21, 23, 27,
1001 28,
1002 31, 33, 36
1003 };
1004 static const int32_t fromUnicodeOffsetsR[] = {
1005 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1006 11, 12, 12, 12, 13, 13, 13, 13, 14,
1007 15, 15,
1008 16, 16, 16, 17, 17, 17, 18, 18, 18
1009 };
1010
1011 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
1012
1013 testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE);
1014
1015 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
1016
1017 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
1018 }
1019
1020 /*
1021 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1022 * modified according to RFC 2060,
1023 * and supplemented with the one example in RFC 2060 itself.
1024 */
1025 {
1026 static const uint8_t imap[] = {
1027 /* Hi Mom -&Jjo--!
1028 A&ImIDkQ-.
1029 &-
1030 &ZeVnLIqe-
1031 \
1032 ~peter
1033 /mail
1034 /&ZeVnLIqe-
1035 /&U,BTFw-
1036 */
1037 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1038 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1039 0x26, 0x2d,
1040 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1041 0x5c,
1042 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1043 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1044 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1045 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1046 };
1047 static const UChar unicode[] = {
1048 /* Hi Mom -<WHITE SMILING FACE>-!
1049 A<NOT IDENTICAL TO><ALPHA>.
1050 &
1051 [Japanese word "nihongo"]
1052 \
1053 ~peter
1054 /mail
1055 /<65e5, 672c, 8a9e>
1056 /<53f0, 5317>
1057 */
1058 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1059 0x41, 0x2262, 0x0391, 0x2e,
1060 0x26,
1061 0x65e5, 0x672c, 0x8a9e,
1062 0x5c,
1063 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1064 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1065 0x2f, 0x65e5, 0x672c, 0x8a9e,
1066 0x2f, 0x53f0, 0x5317
1067 };
1068 static const int32_t toUnicodeOffsets[] = {
1069 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1070 15, 17, 19, 24,
1071 25,
1072 28, 30, 33,
1073 37,
1074 38, 39, 40, 41, 42, 43,
1075 44, 45, 46, 47, 48,
1076 49, 51, 53, 56,
1077 60, 62, 64
1078 };
1079 static const int32_t fromUnicodeOffsets[] = {
1080 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1081 11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1082 15, 15,
1083 16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1084 19,
1085 20, 21, 22, 23, 24, 25,
1086 26, 27, 28, 29, 30,
1087 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1088 35, 36, 36, 36, 37, 37, 37, 37, 37
1089 };
1090
1091 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
1092
1093 testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
1094 }
1095
1096 /* Test UTF-8 bad data handling*/
1097 {
1098 static const uint8_t utf8[]={
1099 0x61,
1100 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1101 0x00,
1102 0x62,
1103 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1104 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1105 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */
1106 0xdf, 0xbf, /* 7ff */
1107 0xbf, /* truncated tail */
1108 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */
1109 0x02
1110 };
1111
1112 static const uint16_t utf8Expected[]={
1113 0x0061,
1114 0xfffd,
1115 0x0000,
1116 0x0062,
1117 0xfffd,
1118 0xfffd,
1119 0xdbff, 0xdfff,
1120 0x07ff,
1121 0xfffd,
1122 0xfffd,
1123 0x0002
1124 };
1125
1126 static const int32_t utf8Offsets[]={
1127 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
1128 };
1129 testConvertToU(utf8, sizeof(utf8),
1130 utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE);
1131
1132 }
1133
1134 /* Test UTF-32BE bad data handling*/
1135 {
1136 static const uint8_t utf32[]={
1137 0x00, 0x00, 0x00, 0x61,
1138 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
1139 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1140 0x00, 0x00, 0x00, 0x62,
1141 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1142 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
1143 0x00, 0x00, 0x01, 0x62,
1144 0x00, 0x00, 0x02, 0x62
1145 };
1146 static const uint16_t utf32Expected[]={
1147 0x0061,
1148 0xfffd, /* 0x110000 out of range */
1149 0xDBFF, /* 0x10FFFF in range */
1150 0xDFFF,
1151 0x0062,
1152 0xfffd, /* 0xffffffff out of range */
1153 0xfffd, /* 0x7fffffff out of range */
1154 0x0162,
1155 0x0262
1156 };
1157 static const int32_t utf32Offsets[]={
1158 0, 4, 8, 8, 12, 16, 20, 24, 28
1159 };
1160 static const uint8_t utf32ExpectedBack[]={
1161 0x00, 0x00, 0x00, 0x61,
1162 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */
1163 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1164 0x00, 0x00, 0x00, 0x62,
1165 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */
1166 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */
1167 0x00, 0x00, 0x01, 0x62,
1168 0x00, 0x00, 0x02, 0x62
1169 };
1170 static const int32_t utf32OffsetsBack[]={
1171 0,0,0,0,
1172 1,1,1,1,
1173 2,2,2,2,
1174 4,4,4,4,
1175 5,5,5,5,
1176 6,6,6,6,
1177 7,7,7,7,
1178 8,8,8,8
1179 };
1180
1181 testConvertToU(utf32, sizeof(utf32),
1182 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE);
1183 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]),
1184 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE);
1185 }
1186
1187 /* Test UTF-32LE bad data handling*/
1188 {
1189 static const uint8_t utf32[]={
1190 0x61, 0x00, 0x00, 0x00,
1191 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
1192 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1193 0x62, 0x00, 0x00, 0x00,
1194 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1195 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
1196 0x62, 0x01, 0x00, 0x00,
1197 0x62, 0x02, 0x00, 0x00,
1198 };
1199
1200 static const uint16_t utf32Expected[]={
1201 0x0061,
1202 0xfffd, /* 0x110000 out of range */
1203 0xDBFF, /* 0x10FFFF in range */
1204 0xDFFF,
1205 0x0062,
1206 0xfffd, /* 0xffffffff out of range */
1207 0xfffd, /* 0x7fffffff out of range */
1208 0x0162,
1209 0x0262
1210 };
1211 static const int32_t utf32Offsets[]={
1212 0, 4, 8, 8, 12, 16, 20, 24, 28
1213 };
1214 static const uint8_t utf32ExpectedBack[]={
1215 0x61, 0x00, 0x00, 0x00,
1216 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */
1217 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1218 0x62, 0x00, 0x00, 0x00,
1219 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */
1220 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */
1221 0x62, 0x01, 0x00, 0x00,
1222 0x62, 0x02, 0x00, 0x00
1223 };
1224 static const int32_t utf32OffsetsBack[]={
1225 0,0,0,0,
1226 1,1,1,1,
1227 2,2,2,2,
1228 4,4,4,4,
1229 5,5,5,5,
1230 6,6,6,6,
1231 7,7,7,7,
1232 8,8,8,8
1233 };
1234 testConvertToU(utf32, sizeof(utf32),
1235 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE );
1236 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]),
1237 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE);
1238 }
1239}
1240
1241static void TestCoverageMBCS(){
1242#if 0
1243 UErrorCode status = U_ZERO_ERROR;
1244 const char *directory = loadTestData(&status);
1245 char* tdpath = NULL;
1246 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1247 int len = strlen(directory);
1248 char* index=NULL;
1249
1250 tdpath = (char*) malloc(sizeof(char) * (len * 2));
1251 uprv_strcpy(saveDirectory,u_getDataDirectory());
1252 log_verbose("Retrieved data directory %s \n",saveDirectory);
1253 uprv_strcpy(tdpath,directory);
1254 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1255
1256 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1257 *(index+1)=0;
1258 }
1259 u_setDataDirectory(tdpath);
1260 log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1261#endif
1262
1263 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
1264 which is test file for MBCS conversion with single-byte codepage data.*/
1265 {
1266
1267 /* MBCS with single byte codepage data test1.ucm*/
1268 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1269 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1270 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, };
1271
1272 /*from Unicode*/
1273 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1274 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
1275 }
1276
1277 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
1278 which is test file for MBCS conversion with three-byte codepage data.*/
1279 {
1280
1281 /* MBCS with three byte codepage data test3.ucm*/
1282 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1283 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,};
1284 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1285
1286 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1287 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1288 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1289
1290 /*from Unicode*/
1291 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1292 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1293
1294 /*to Unicode*/
1295 testConvertToU(test3input, sizeof(test3input),
1296 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE);
1297
1298 }
1299
1300 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
1301 which is test file for MBCS conversion with four-byte codepage data.*/
1302 {
1303
1304 /* MBCS with three byte codepage data test4.ucm*/
1305 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1306 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,};
1307 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1308
1309 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1310 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1311 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1312
1313 /*from Unicode*/
1314 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1315 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1316
1317 /*to Unicode*/
1318 testConvertToU(test4input, sizeof(test4input),
1319 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE );
1320
1321 }
1322#if 0
1323 free(tdpath);
1324 /* restore the original data directory */
1325 log_verbose("Setting the data directory to %s \n", saveDirectory);
1326 u_setDataDirectory(saveDirectory);
1327 free(saveDirectory);
1328#endif
1329
1330}
1331
1332static void TestConverterType(const char *convName, UConverterType convType) {
1333 UConverter* myConverter;
1334 UErrorCode err = U_ZERO_ERROR;
1335
1336 myConverter = my_ucnv_open(convName, &err);
1337
1338 if (U_FAILURE(err)) {
1339 log_data_err("Failed to create an %s converter\n", convName);
1340 return;
1341 }
1342 else
1343 {
1344 if (ucnv_getType(myConverter)!=convType) {
1345 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1346 convName, convType);
1347 }
1348 else {
1349 log_verbose("ucnv_getType %s ok\n", convName);
1350 }
1351 }
1352 ucnv_close(myConverter);
1353}
1354
1355static void TestConverterTypesAndStarters()
1356{
1357#if !UCONFIG_NO_LEGACY_CONVERSION
1358 UConverter* myConverter;
1359 UErrorCode err = U_ZERO_ERROR;
1360 UBool mystarters[256];
1361
1362/* const UBool expectedKSCstarters[256] = {
1363 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1364 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1365 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1366 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1367 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1368 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1369 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1370 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1371 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1372 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1373 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1374 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1375 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1376 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1377 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1378 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1379 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1380 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1381 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1382 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1383 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1384 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1385 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1386 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1387 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1388 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1389
1390
1391 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types.");
1392
1393 myConverter = ucnv_open("ksc", &err);
1394 if (U_FAILURE(err)) {
1395 log_data_err("Failed to create an ibm-ksc converter\n");
1396 return;
1397 }
1398 else
1399 {
1400 if (ucnv_getType(myConverter)!=UCNV_MBCS)
1401 log_err("ucnv_getType Failed for ibm-949\n");
1402 else
1403 log_verbose("ucnv_getType ibm-949 ok\n");
1404
1405 if(myConverter!=NULL)
1406 ucnv_getStarters(myConverter, mystarters, &err);
1407
1408 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1409 log_err("Failed ucnv_getStarters for ksc\n");
1410 else
1411 log_verbose("ucnv_getStarters ok\n");*/
1412
1413 }
1414 ucnv_close(myConverter);
1415
1416 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1417 TestConverterType("ibm-878", UCNV_SBCS);
1418#endif
1419
1420 TestConverterType("iso-8859-1", UCNV_LATIN_1);
1421
1422 TestConverterType("ibm-1208", UCNV_UTF8);
1423
1424 TestConverterType("utf-8", UCNV_UTF8);
1425 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1426 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1427 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1428 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1429
1430#if !UCONFIG_NO_LEGACY_CONVERSION
1431
1432#if defined(U_ENABLE_GENERIC_ISO_2022)
1433 TestConverterType("iso-2022", UCNV_ISO_2022);
1434#endif
1435
1436 TestConverterType("hz", UCNV_HZ);
1437#endif
1438
1439 TestConverterType("scsu", UCNV_SCSU);
1440
1441#if !UCONFIG_NO_LEGACY_CONVERSION
1442 TestConverterType("x-iscii-de", UCNV_ISCII);
1443#endif
1444
1445 TestConverterType("ascii", UCNV_US_ASCII);
1446 TestConverterType("utf-7", UCNV_UTF7);
1447 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1448 TestConverterType("bocu-1", UCNV_BOCU1);
1449}
1450
1451static void
1452TestAmbiguousConverter(UConverter *cnv) {
1453 static const char inBytes[3]={ 0x61, 0x5B, 0x5c };
1454 UChar outUnicode[20]={ 0, 0, 0, 0 };
1455
1456 const char *s;
1457 UChar *u;
1458 UErrorCode errorCode;
1459 UBool isAmbiguous;
1460
1461 /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1462 errorCode=U_ZERO_ERROR;
1463 s=inBytes;
1464 u=outUnicode;
1465 ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode);
1466 if(U_FAILURE(errorCode)) {
1467 /* we do not care about general failures in this test; the input may just not be mappable */
1468 return;
1469 }
1470
1471 if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) {
1472 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1473 /* There are some encodings that are partially ASCII based,
1474 like the ISO-7 and GSM series of codepages, which we ignore. */
1475 return;
1476 }
1477
1478 isAmbiguous=ucnv_isAmbiguous(cnv);
1479
1480 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1481 if((outUnicode[2]!=0x5c)!=isAmbiguous) {
1482 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1483 ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous);
1484 return;
1485 }
1486
1487 if(outUnicode[2]!=0x5c) {
1488 /* needs fixup, fix it */
1489 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1490 if(outUnicode[2]!=0x5c) {
1491 /* the fix failed */
1492 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1493 return;
1494 }
1495 }
1496}
1497
1498static void TestAmbiguous()
1499{
1500 UErrorCode status = U_ZERO_ERROR;
1501 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1502 static const char target[] = {
1503 /* "\\usr\\local\\share\\data\\icutest.txt" */
1504 0x5c, 0x75, 0x73, 0x72,
1505 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1506 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1507 0x5c, 0x64, 0x61, 0x74, 0x61,
1508 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1509 0
1510 };
1511 UChar asciiResult[200], sjisResult[200];
1512 int32_t /*asciiLength = 0,*/ sjisLength = 0, i;
1513 const char *name;
1514
1515 /* enumerate all converters */
1516 status=U_ZERO_ERROR;
1517 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1518 cnv=ucnv_open(name, &status);
1519 if(U_SUCCESS(status)) {
1520 TestAmbiguousConverter(cnv);
1521 ucnv_close(cnv);
1522 } else {
1523 log_err("error: unable to open available converter \"%s\"\n", name);
1524 status=U_ZERO_ERROR;
1525 }
1526 }
1527
1528#if !UCONFIG_NO_LEGACY_CONVERSION
1529 sjis_cnv = ucnv_open("ibm-943", &status);
1530 if (U_FAILURE(status))
1531 {
1532 log_data_err("Failed to create a SJIS converter\n");
1533 return;
1534 }
1535 ascii_cnv = ucnv_open("LATIN-1", &status);
1536 if (U_FAILURE(status))
1537 {
1538 log_data_err("Failed to create a LATIN-1 converter\n");
1539 ucnv_close(sjis_cnv);
1540 return;
1541 }
1542 /* convert target from SJIS to Unicode */
1543 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
1544 if (U_FAILURE(status))
1545 {
1546 log_err("Failed to convert the SJIS string.\n");
1547 ucnv_close(sjis_cnv);
1548 ucnv_close(ascii_cnv);
1549 return;
1550 }
1551 /* convert target from Latin-1 to Unicode */
1552 /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
1553 if (U_FAILURE(status))
1554 {
1555 log_err("Failed to convert the Latin-1 string.\n");
1556 ucnv_close(sjis_cnv);
1557 ucnv_close(ascii_cnv);
1558 return;
1559 }
1560 if (!ucnv_isAmbiguous(sjis_cnv))
1561 {
1562 log_err("SJIS converter should contain ambiguous character mappings.\n");
1563 ucnv_close(sjis_cnv);
1564 ucnv_close(ascii_cnv);
1565 return;
1566 }
1567 if (u_strcmp(sjisResult, asciiResult) == 0)
1568 {
1569 log_err("File separators for SJIS don't need to be fixed.\n");
1570 }
1571 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1572 if (u_strcmp(sjisResult, asciiResult) != 0)
1573 {
1574 log_err("Fixing file separator for SJIS failed.\n");
1575 }
1576 ucnv_close(sjis_cnv);
1577 ucnv_close(ascii_cnv);
1578#endif
1579}
1580
1581static void
1582TestSignatureDetection(){
1583 /* with null terminated strings */
1584 {
1585 static const char* data[] = {
1586 "\xFE\xFF\x00\x00", /* UTF-16BE */
1587 "\xFF\xFE\x00\x00", /* UTF-16LE */
1588 "\xEF\xBB\xBF\x00", /* UTF-8 */
1589 "\x0E\xFE\xFF\x00", /* SCSU */
1590
1591 "\xFE\xFF", /* UTF-16BE */
1592 "\xFF\xFE", /* UTF-16LE */
1593 "\xEF\xBB\xBF", /* UTF-8 */
1594 "\x0E\xFE\xFF", /* SCSU */
1595
1596 "\xFE\xFF\x41\x42", /* UTF-16BE */
1597 "\xFF\xFE\x41\x41", /* UTF-16LE */
1598 "\xEF\xBB\xBF\x41", /* UTF-8 */
1599 "\x0E\xFE\xFF\x41", /* SCSU */
1600
1601 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */
1602 "\x2B\x2F\x76\x38\x41", /* UTF-7 */
1603 "\x2B\x2F\x76\x39\x41", /* UTF-7 */
1604 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */
1605 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */
1606
1607 "\xDD\x73\x66\x73" /* UTF-EBCDIC */
1608 };
1609 static const char* expected[] = {
1610 "UTF-16BE",
1611 "UTF-16LE",
1612 "UTF-8",
1613 "SCSU",
1614
1615 "UTF-16BE",
1616 "UTF-16LE",
1617 "UTF-8",
1618 "SCSU",
1619
1620 "UTF-16BE",
1621 "UTF-16LE",
1622 "UTF-8",
1623 "SCSU",
1624
1625 "UTF-7",
1626 "UTF-7",
1627 "UTF-7",
1628 "UTF-7",
1629 "UTF-7",
1630 "UTF-EBCDIC"
1631 };
1632 static const int32_t expectedLength[] ={
1633 2,
1634 2,
1635 3,
1636 3,
1637
1638 2,
1639 2,
1640 3,
1641 3,
1642
1643 2,
1644 2,
1645 3,
1646 3,
1647
1648 5,
1649 4,
1650 4,
1651 4,
1652 4,
1653 4
1654 };
1655 int i=0;
1656 UErrorCode err;
1657 int32_t signatureLength = -1;
1658 const char* source = NULL;
1659 const char* enc = NULL;
1660 for( ; i<sizeof(data)/sizeof(char*); i++){
1661 err = U_ZERO_ERROR;
1662 source = data[i];
1663 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1664 if(U_FAILURE(err)){
1665 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1666 continue;
1667 }
1668 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1669 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1670 continue;
1671 }
1672 if(signatureLength != expectedLength[i]){
1673 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1674 }
1675 }
1676 }
1677 {
1678 static const char* data[] = {
1679 "\xFE\xFF\x00", /* UTF-16BE */
1680 "\xFF\xFE\x00", /* UTF-16LE */
1681 "\xEF\xBB\xBF\x00", /* UTF-8 */
1682 "\x0E\xFE\xFF\x00", /* SCSU */
1683 "\x00\x00\xFE\xFF", /* UTF-32BE */
1684 "\xFF\xFE\x00\x00", /* UTF-32LE */
1685 "\xFE\xFF", /* UTF-16BE */
1686 "\xFF\xFE", /* UTF-16LE */
1687 "\xEF\xBB\xBF", /* UTF-8 */
1688 "\x0E\xFE\xFF", /* SCSU */
1689 "\x00\x00\xFE\xFF", /* UTF-32BE */
1690 "\xFF\xFE\x00\x00", /* UTF-32LE */
1691 "\xFE\xFF\x41\x42", /* UTF-16BE */
1692 "\xFF\xFE\x41\x41", /* UTF-16LE */
1693 "\xEF\xBB\xBF\x41", /* UTF-8 */
1694 "\x0E\xFE\xFF\x41", /* SCSU */
1695 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1696 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1697 "\xFB\xEE\x28", /* BOCU-1 */
1698 "\xFF\x41\x42" /* NULL */
1699 };
1700 static const int len[] = {
1701 3,
1702 3,
1703 4,
1704 4,
1705 4,
1706 4,
1707 2,
1708 2,
1709 3,
1710 3,
1711 4,
1712 4,
1713 4,
1714 4,
1715 4,
1716 4,
1717 5,
1718 5,
1719 3,
1720 3
1721 };
1722
1723 static const char* expected[] = {
1724 "UTF-16BE",
1725 "UTF-16LE",
1726 "UTF-8",
1727 "SCSU",
1728 "UTF-32BE",
1729 "UTF-32LE",
1730 "UTF-16BE",
1731 "UTF-16LE",
1732 "UTF-8",
1733 "SCSU",
1734 "UTF-32BE",
1735 "UTF-32LE",
1736 "UTF-16BE",
1737 "UTF-16LE",
1738 "UTF-8",
1739 "SCSU",
1740 "UTF-32BE",
1741 "UTF-32LE",
1742 "BOCU-1",
1743 NULL
1744 };
1745 static const int32_t expectedLength[] ={
1746 2,
1747 2,
1748 3,
1749 3,
1750 4,
1751 4,
1752 2,
1753 2,
1754 3,
1755 3,
1756 4,
1757 4,
1758 2,
1759 2,
1760 3,
1761 3,
1762 4,
1763 4,
1764 3,
1765 0
1766 };
1767 int i=0;
1768 UErrorCode err;
1769 int32_t signatureLength = -1;
1770 int32_t sourceLength=-1;
1771 const char* source = NULL;
1772 const char* enc = NULL;
1773 for( ; i<sizeof(data)/sizeof(char*); i++){
1774 err = U_ZERO_ERROR;
1775 source = data[i];
1776 sourceLength = len[i];
1777 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1778 if(U_FAILURE(err)){
1779 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1780 continue;
1781 }
1782 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1783 if(expected[i] !=NULL){
1784 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1785 continue;
1786 }
1787 }
1788 if(signatureLength != expectedLength[i]){
1789 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1790 }
1791 }
1792 }
1793}
1794
1795static void TestUTF7() {
1796 /* test input */
1797 static const uint8_t in[]={
1798 /* H - +Jjo- - ! +- +2AHcAQ */
1799 0x48,
1800 0x2d,
1801 0x2b, 0x4a, 0x6a, 0x6f,
1802 0x2d, 0x2d,
1803 0x21,
1804 0x2b, 0x2d,
1805 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1806 };
1807
1808 /* expected test results */
1809 static const int32_t results[]={
1810 /* number of bytes read, code point */
1811 1, 0x48,
1812 1, 0x2d,
1813 4, 0x263a, /* <WHITE SMILING FACE> */
1814 2, 0x2d,
1815 1, 0x21,
1816 2, 0x2b,
1817 7, 0x10401
1818 };
1819
1820 const char *cnvName;
1821 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1822 UErrorCode errorCode=U_ZERO_ERROR;
1823 UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1824 if(U_FAILURE(errorCode)) {
1825 log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1826 return;
1827 }
1828 TestNextUChar(cnv, source, limit, results, "UTF-7");
1829 /* Test the condition when source >= sourceLimit */
1830 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1831 cnvName = ucnv_getName(cnv, &errorCode);
1832 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1833 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1834 }
1835 ucnv_close(cnv);
1836}
1837
1838static void TestIMAP() {
1839 /* test input */
1840 static const uint8_t in[]={
1841 /* H - &Jjo- - ! &- &2AHcAQ- \ */
1842 0x48,
1843 0x2d,
1844 0x26, 0x4a, 0x6a, 0x6f,
1845 0x2d, 0x2d,
1846 0x21,
1847 0x26, 0x2d,
1848 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1849 };
1850
1851 /* expected test results */
1852 static const int32_t results[]={
1853 /* number of bytes read, code point */
1854 1, 0x48,
1855 1, 0x2d,
1856 4, 0x263a, /* <WHITE SMILING FACE> */
1857 2, 0x2d,
1858 1, 0x21,
1859 2, 0x26,
1860 7, 0x10401
1861 };
1862
1863 const char *cnvName;
1864 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1865 UErrorCode errorCode=U_ZERO_ERROR;
1866 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1867 if(U_FAILURE(errorCode)) {
1868 log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1869 return;
1870 }
1871 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1872 /* Test the condition when source >= sourceLimit */
1873 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1874 cnvName = ucnv_getName(cnv, &errorCode);
1875 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1876 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1877 }
1878 ucnv_close(cnv);
1879}
1880
1881static void TestUTF8() {
1882 /* test input */
1883 static const uint8_t in[]={
1884 0x61,
1885 0xc2, 0x80,
1886 0xe0, 0xa0, 0x80,
1887 0xf0, 0x90, 0x80, 0x80,
1888 0xf4, 0x84, 0x8c, 0xa1,
1889 0xf0, 0x90, 0x90, 0x81
1890 };
1891
1892 /* expected test results */
1893 static const int32_t results[]={
1894 /* number of bytes read, code point */
1895 1, 0x61,
1896 2, 0x80,
1897 3, 0x800,
1898 4, 0x10000,
1899 4, 0x104321,
1900 4, 0x10401
1901 };
1902
1903 /* error test input */
1904 static const uint8_t in2[]={
1905 0x61,
1906 0xc0, 0x80, /* illegal non-shortest form */
1907 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1908 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1909 0xc0, 0xc0, /* illegal trail byte */
1910 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1911 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1912 0xfe, /* illegal byte altogether */
1913 0x62
1914 };
1915
1916 /* expected error test results */
1917 static const int32_t results2[]={
1918 /* number of bytes read, code point */
1919 1, 0x61,
1920 22, 0x62
1921 };
1922
1923 UConverterToUCallback cb;
1924 const void *p;
1925
1926 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1927 UErrorCode errorCode=U_ZERO_ERROR;
1928 UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1929 if(U_FAILURE(errorCode)) {
1930 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1931 return;
1932 }
1933 TestNextUChar(cnv, source, limit, results, "UTF-8");
1934 /* Test the condition when source >= sourceLimit */
1935 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1936
1937 /* test error behavior with a skip callback */
1938 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1939 source=(const char *)in2;
1940 limit=(const char *)(in2+sizeof(in2));
1941 TestNextUChar(cnv, source, limit, results2, "UTF-8");
1942
1943 ucnv_close(cnv);
1944}
1945
1946static void TestCESU8() {
1947 /* test input */
1948 static const uint8_t in[]={
1949 0x61,
1950 0xc2, 0x80,
1951 0xe0, 0xa0, 0x80,
1952 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1953 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1954 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1955 0xef, 0xbf, 0xbc
1956 };
1957
1958 /* expected test results */
1959 static const int32_t results[]={
1960 /* number of bytes read, code point */
1961 1, 0x61,
1962 2, 0x80,
1963 3, 0x800,
1964 6, 0x10000,
1965 3, 0xdc01,
1966 -1,0xd802, /* may read 3 or 6 bytes */
1967 -1,0x10ffff,/* may read 0 or 3 bytes */
1968 3, 0xfffc
1969 };
1970
1971 /* error test input */
1972 static const uint8_t in2[]={
1973 0x61,
1974 0xc0, 0x80, /* illegal non-shortest form */
1975 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1976 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1977 0xc0, 0xc0, /* illegal trail byte */
1978 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */
1979 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */
1980 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */
1981 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1982 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1983 0xfe, /* illegal byte altogether */
1984 0x62
1985 };
1986
1987 /* expected error test results */
1988 static const int32_t results2[]={
1989 /* number of bytes read, code point */
1990 1, 0x61,
1991 34, 0x62
1992 };
1993
1994 UConverterToUCallback cb;
1995 const void *p;
1996
1997 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1998 UErrorCode errorCode=U_ZERO_ERROR;
1999 UConverter *cnv=ucnv_open("CESU-8", &errorCode);
2000 if(U_FAILURE(errorCode)) {
2001 log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
2002 return;
2003 }
2004 TestNextUChar(cnv, source, limit, results, "CESU-8");
2005 /* Test the condition when source >= sourceLimit */
2006 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2007
2008 /* test error behavior with a skip callback */
2009 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2010 source=(const char *)in2;
2011 limit=(const char *)(in2+sizeof(in2));
2012 TestNextUChar(cnv, source, limit, results2, "CESU-8");
2013
2014 ucnv_close(cnv);
2015}
2016
2017static void TestUTF16() {
2018 /* test input */
2019 static const uint8_t in1[]={
2020 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2021 };
2022 static const uint8_t in2[]={
2023 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2024 };
2025 static const uint8_t in3[]={
2026 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2027 };
2028
2029 /* expected test results */
2030 static const int32_t results1[]={
2031 /* number of bytes read, code point */
2032 4, 0x4e00,
2033 2, 0xfeff
2034 };
2035 static const int32_t results2[]={
2036 /* number of bytes read, code point */
2037 4, 0x004e,
2038 2, 0xfffe
2039 };
2040 static const int32_t results3[]={
2041 /* number of bytes read, code point */
2042 2, 0xfefe,
2043 2, 0x4e00,
2044 2, 0xfeff,
2045 4, 0x20001
2046 };
2047
2048 const char *source, *limit;
2049
2050 UErrorCode errorCode=U_ZERO_ERROR;
2051 UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2052 if(U_FAILURE(errorCode)) {
2053 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2054 return;
2055 }
2056
2057 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2058 TestNextUChar(cnv, source, limit, results1, "UTF-16");
2059
2060 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2061 ucnv_resetToUnicode(cnv);
2062 TestNextUChar(cnv, source, limit, results2, "UTF-16");
2063
2064 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2065 ucnv_resetToUnicode(cnv);
2066 TestNextUChar(cnv, source, limit, results3, "UTF-16");
2067
2068 /* Test the condition when source >= sourceLimit */
2069 ucnv_resetToUnicode(cnv);
2070 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2071
2072 ucnv_close(cnv);
2073}
2074
2075static void TestUTF16BE() {
2076 /* test input */
2077 static const uint8_t in[]={
2078 0x00, 0x61,
2079 0x00, 0xc0,
2080 0x00, 0x31,
2081 0x00, 0xf4,
2082 0xce, 0xfe,
2083 0xd8, 0x01, 0xdc, 0x01
2084 };
2085
2086 /* expected test results */
2087 static const int32_t results[]={
2088 /* number of bytes read, code point */
2089 2, 0x61,
2090 2, 0xc0,
2091 2, 0x31,
2092 2, 0xf4,
2093 2, 0xcefe,
2094 4, 0x10401
2095 };
2096
2097 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2098 UErrorCode errorCode=U_ZERO_ERROR;
2099 UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2100 if(U_FAILURE(errorCode)) {
2101 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2102 return;
2103 }
2104 TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2105 /* Test the condition when source >= sourceLimit */
2106 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2107 /*Test for the condition where there is an invalid character*/
2108 {
2109 static const uint8_t source2[]={0x61};
2110 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2111 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2112 }
2113#if 0
2114 /*
2115 * Test disabled because currently the UTF-16BE/LE converters are supposed
2116 * to not set errors for unpaired surrogates.
2117 * This may change with
2118 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2119 */
2120
2121 /*Test for the condition where there is a surrogate pair*/
2122 {
2123 const uint8_t source2[]={0xd8, 0x01};
2124 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2125 }
2126#endif
2127 ucnv_close(cnv);
2128}
2129
2130static void
2131TestUTF16LE() {
2132 /* test input */
2133 static const uint8_t in[]={
2134 0x61, 0x00,
2135 0x31, 0x00,
2136 0x4e, 0x2e,
2137 0x4e, 0x00,
2138 0x01, 0xd8, 0x01, 0xdc
2139 };
2140
2141 /* expected test results */
2142 static const int32_t results[]={
2143 /* number of bytes read, code point */
2144 2, 0x61,
2145 2, 0x31,
2146 2, 0x2e4e,
2147 2, 0x4e,
2148 4, 0x10401
2149 };
2150
2151 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2152 UErrorCode errorCode=U_ZERO_ERROR;
2153 UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2154 if(U_FAILURE(errorCode)) {
2155 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2156 return;
2157 }
2158 TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2159 /* Test the condition when source >= sourceLimit */
2160 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2161 /*Test for the condition where there is an invalid character*/
2162 {
2163 static const uint8_t source2[]={0x61};
2164 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2165 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2166 }
2167#if 0
2168 /*
2169 * Test disabled because currently the UTF-16BE/LE converters are supposed
2170 * to not set errors for unpaired surrogates.
2171 * This may change with
2172 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2173 */
2174
2175 /*Test for the condition where there is a surrogate character*/
2176 {
2177 static const uint8_t source2[]={0x01, 0xd8};
2178 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2179 }
2180#endif
2181
2182 ucnv_close(cnv);
2183}
2184
2185static void TestUTF32() {
2186 /* test input */
2187 static const uint8_t in1[]={
2188 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff
2189 };
2190 static const uint8_t in2[]={
2191 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00
2192 };
2193 static const uint8_t in3[]={
2194 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01
2195 };
2196
2197 /* expected test results */
2198 static const int32_t results1[]={
2199 /* number of bytes read, code point */
2200 8, 0x100f00,
2201 4, 0xfeff
2202 };
2203 static const int32_t results2[]={
2204 /* number of bytes read, code point */
2205 8, 0x0f1000,
2206 4, 0xfffe
2207 };
2208 static const int32_t results3[]={
2209 /* number of bytes read, code point */
2210 4, 0xfefe,
2211 4, 0x100f00,
2212 4, 0xfffd, /* unmatched surrogate */
2213 4, 0xfffd /* unmatched surrogate */
2214 };
2215
2216 const char *source, *limit;
2217
2218 UErrorCode errorCode=U_ZERO_ERROR;
2219 UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2220 if(U_FAILURE(errorCode)) {
2221 log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2222 return;
2223 }
2224
2225 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2226 TestNextUChar(cnv, source, limit, results1, "UTF-32");
2227
2228 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2229 ucnv_resetToUnicode(cnv);
2230 TestNextUChar(cnv, source, limit, results2, "UTF-32");
2231
2232 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2233 ucnv_resetToUnicode(cnv);
2234 TestNextUChar(cnv, source, limit, results3, "UTF-32");
2235
2236 /* Test the condition when source >= sourceLimit */
2237 ucnv_resetToUnicode(cnv);
2238 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2239
2240 ucnv_close(cnv);
2241}
2242
2243static void
2244TestUTF32BE() {
2245 /* test input */
2246 static const uint8_t in[]={
2247 0x00, 0x00, 0x00, 0x61,
2248 0x00, 0x00, 0x30, 0x61,
2249 0x00, 0x00, 0xdc, 0x00,
2250 0x00, 0x00, 0xd8, 0x00,
2251 0x00, 0x00, 0xdf, 0xff,
2252 0x00, 0x00, 0xff, 0xfe,
2253 0x00, 0x10, 0xab, 0xcd,
2254 0x00, 0x10, 0xff, 0xff
2255 };
2256
2257 /* expected test results */
2258 static const int32_t results[]={
2259 /* number of bytes read, code point */
2260 4, 0x61,
2261 4, 0x3061,
2262 4, 0xfffd,
2263 4, 0xfffd,
2264 4, 0xfffd,
2265 4, 0xfffe,
2266 4, 0x10abcd,
2267 4, 0x10ffff
2268 };
2269
2270 /* error test input */
2271 static const uint8_t in2[]={
2272 0x00, 0x00, 0x00, 0x61,
2273 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
2274 0x00, 0x00, 0x00, 0x62,
2275 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2276 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
2277 0x00, 0x00, 0x01, 0x62,
2278 0x00, 0x00, 0x02, 0x62
2279 };
2280
2281 /* expected error test results */
2282 static const int32_t results2[]={
2283 /* number of bytes read, code point */
2284 4, 0x61,
2285 8, 0x62,
2286 12, 0x162,
2287 4, 0x262
2288 };
2289
2290 UConverterToUCallback cb;
2291 const void *p;
2292
2293 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2294 UErrorCode errorCode=U_ZERO_ERROR;
2295 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2296 if(U_FAILURE(errorCode)) {
2297 log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2298 return;
2299 }
2300 TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2301
2302 /* Test the condition when source >= sourceLimit */
2303 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2304
2305 /* test error behavior with a skip callback */
2306 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2307 source=(const char *)in2;
2308 limit=(const char *)(in2+sizeof(in2));
2309 TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2310
2311 ucnv_close(cnv);
2312}
2313
2314static void
2315TestUTF32LE() {
2316 /* test input */
2317 static const uint8_t in[]={
2318 0x61, 0x00, 0x00, 0x00,
2319 0x61, 0x30, 0x00, 0x00,
2320 0x00, 0xdc, 0x00, 0x00,
2321 0x00, 0xd8, 0x00, 0x00,
2322 0xff, 0xdf, 0x00, 0x00,
2323 0xfe, 0xff, 0x00, 0x00,
2324 0xcd, 0xab, 0x10, 0x00,
2325 0xff, 0xff, 0x10, 0x00
2326 };
2327
2328 /* expected test results */
2329 static const int32_t results[]={
2330 /* number of bytes read, code point */
2331 4, 0x61,
2332 4, 0x3061,
2333 4, 0xfffd,
2334 4, 0xfffd,
2335 4, 0xfffd,
2336 4, 0xfffe,
2337 4, 0x10abcd,
2338 4, 0x10ffff
2339 };
2340
2341 /* error test input */
2342 static const uint8_t in2[]={
2343 0x61, 0x00, 0x00, 0x00,
2344 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
2345 0x62, 0x00, 0x00, 0x00,
2346 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2347 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
2348 0x62, 0x01, 0x00, 0x00,
2349 0x62, 0x02, 0x00, 0x00,
2350 };
2351
2352 /* expected error test results */
2353 static const int32_t results2[]={
2354 /* number of bytes read, code point */
2355 4, 0x61,
2356 8, 0x62,
2357 12, 0x162,
2358 4, 0x262,
2359 };
2360
2361 UConverterToUCallback cb;
2362 const void *p;
2363
2364 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2365 UErrorCode errorCode=U_ZERO_ERROR;
2366 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2367 if(U_FAILURE(errorCode)) {
2368 log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2369 return;
2370 }
2371 TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2372
2373 /* Test the condition when source >= sourceLimit */
2374 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2375
2376 /* test error behavior with a skip callback */
2377 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2378 source=(const char *)in2;
2379 limit=(const char *)(in2+sizeof(in2));
2380 TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2381
2382 ucnv_close(cnv);
2383}
2384
2385static void
2386TestLATIN1() {
2387 /* test input */
2388 static const uint8_t in[]={
2389 0x61,
2390 0x31,
2391 0x32,
2392 0xc0,
2393 0xf0,
2394 0xf4,
2395 };
2396
2397 /* expected test results */
2398 static const int32_t results[]={
2399 /* number of bytes read, code point */
2400 1, 0x61,
2401 1, 0x31,
2402 1, 0x32,
2403 1, 0xc0,
2404 1, 0xf0,
2405 1, 0xf4,
2406 };
2407 static const uint16_t in1[] = {
2408 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2409 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2410 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2411 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2412 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2413 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2414 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2415 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2416 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2417 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2418 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2419 0xcb, 0x82
2420 };
2421 static const uint8_t out1[] = {
2422 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2423 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2424 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2425 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2426 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2427 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2428 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2429 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2430 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2431 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2432 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2433 0xcb, 0x82
2434 };
2435 static const uint16_t in2[]={
2436 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2437 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2438 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2439 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2440 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2441 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2442 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2443 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2444 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2445 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2446 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2447 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2448 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2449 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2450 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2451 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2452 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2453 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2454 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2455 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2456 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2457 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2458 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2459 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2460 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2461 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2462 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2463 0x37, 0x20, 0x2A, 0x2F,
2464 };
2465 static const unsigned char out2[]={
2466 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2467 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2468 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2469 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2470 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2471 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2472 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2473 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2474 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2475 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2476 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2477 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2478 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2479 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2480 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2481 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2482 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2483 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2484 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2485 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2486 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2487 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2488 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2489 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2490 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2491 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2492 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2493 0x37, 0x20, 0x2A, 0x2F,
2494 };
2495 const char *source=(const char *)in;
2496 const char *limit=(const char *)in+sizeof(in);
2497
2498 UErrorCode errorCode=U_ZERO_ERROR;
2499 UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2500 if(U_FAILURE(errorCode)) {
2501 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2502 return;
2503 }
2504 TestNextUChar(cnv, source, limit, results, "LATIN_1");
2505 /* Test the condition when source >= sourceLimit */
2506 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2507 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2508 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2509
2510 ucnv_close(cnv);
2511}
2512
2513static void
2514TestSBCS() {
2515 /* test input */
2516 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2517 /* expected test results */
2518 static const int32_t results[]={
2519 /* number of bytes read, code point */
2520 1, 0x61,
2521 1, 0xbf,
2522 1, 0xc4,
2523 1, 0x2021,
2524 1, 0xf8ff,
2525 1, 0x00d9
2526 };
2527
2528 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2529 UErrorCode errorCode=U_ZERO_ERROR;
2530 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2531 if(U_FAILURE(errorCode)) {
2532 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2533 return;
2534 }
2535 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2536 /* Test the condition when source >= sourceLimit */
2537 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2538 /*Test for Illegal character */ /*
2539 {
2540 static const uint8_t input1[]={ 0xA1 };
2541 const char* illegalsource=(const char*)input1;
2542 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2543 }
2544 */
2545 ucnv_close(cnv);
2546}
2547
2548static void
2549TestDBCS() {
2550 /* test input */
2551 static const uint8_t in[]={
2552 0x44, 0x6a,
2553 0xc4, 0x9c,
2554 0x7a, 0x74,
2555 0x46, 0xab,
2556 0x42, 0x5b,
2557
2558 };
2559
2560 /* expected test results */
2561 static const int32_t results[]={
2562 /* number of bytes read, code point */
2563 2, 0x00a7,
2564 2, 0xe1d2,
2565 2, 0x6962,
2566 2, 0xf842,
2567 2, 0xffe5,
2568 };
2569
2570 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2571 UErrorCode errorCode=U_ZERO_ERROR;
2572
2573 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2574 if(U_FAILURE(errorCode)) {
2575 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2576 return;
2577 }
2578 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2579 /* Test the condition when source >= sourceLimit */
2580 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2581 /*Test for the condition where there is an invalid character*/
2582 {
2583 static const uint8_t source2[]={0x1a, 0x1b};
2584 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2585 }
2586 /*Test for the condition where we have a truncated char*/
2587 {
2588 static const uint8_t source1[]={0xc4};
2589 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2590 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2591 }
2592 ucnv_close(cnv);
2593}
2594
2595static void
2596TestMBCS() {
2597 /* test input */
2598 static const uint8_t in[]={
2599 0x01,
2600 0xa6, 0xa3,
2601 0x00,
2602 0xa6, 0xa1,
2603 0x08,
2604 0xc2, 0x76,
2605 0xc2, 0x78,
2606
2607 };
2608
2609 /* expected test results */
2610 static const int32_t results[]={
2611 /* number of bytes read, code point */
2612 1, 0x0001,
2613 2, 0x250c,
2614 1, 0x0000,
2615 2, 0x2500,
2616 1, 0x0008,
2617 2, 0xd60c,
2618 2, 0xd60e,
2619 };
2620
2621 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2622 UErrorCode errorCode=U_ZERO_ERROR;
2623
2624 UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2625 if(U_FAILURE(errorCode)) {
2626 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2627 return;
2628 }
2629 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2630 /* Test the condition when source >= sourceLimit */
2631 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2632 /*Test for the condition where there is an invalid character*/
2633 {
2634 static const uint8_t source2[]={0xa1, 0x80};
2635 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2636 }
2637 /*Test for the condition where we have a truncated char*/
2638 {
2639 static const uint8_t source1[]={0xc4};
2640 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2641 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2642 }
2643 ucnv_close(cnv);
2644
2645}
2646
2647#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2648static void
2649TestICCRunout() {
2650/* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2651
2652 const char *cnvName = "ibm-1363";
2653 UErrorCode status = U_ZERO_ERROR;
2654 const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2655 /* UChar expectUData[] = { 0x00a1, 0x001a }; */
2656 const char *source = sourceData;
2657 const char *sourceLim = sourceData+sizeof(sourceData);
2658 UChar c1, c2, c3;
2659 UConverter *cnv=ucnv_open(cnvName, &status);
2660 if(U_FAILURE(status)) {
2661 log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status));
2662 return;
2663 }
2664
2665#if 0
2666 {
2667 UChar targetBuf[256];
2668 UChar *target = targetBuf;
2669 UChar *targetLim = target+256;
2670 ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status);
2671
2672 log_info("After convert: target@%d, source@%d, status%s\n",
2673 target-targetBuf, source-sourceData, u_errorName(status));
2674
2675 if(U_FAILURE(status)) {
2676 log_err("Failed to convert: %s\n", u_errorName(status));
2677 } else {
2678
2679 }
2680 }
2681#endif
2682
2683 c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2684 log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status));
2685
2686 c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2687 log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status));
2688
2689 c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2690 log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status));
2691
2692 if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) {
2693 log_verbose("OK\n");
2694 } else {
2695 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2696 }
2697
2698 ucnv_close(cnv);
2699
2700}
2701#endif
2702
2703#ifdef U_ENABLE_GENERIC_ISO_2022
2704
2705static void
2706TestISO_2022() {
2707 /* test input */
2708 static const uint8_t in[]={
2709 0x1b, 0x25, 0x42,
2710 0x31,
2711 0x32,
2712 0x61,
2713 0xc2, 0x80,
2714 0xe0, 0xa0, 0x80,
2715 0xf0, 0x90, 0x80, 0x80
2716 };
2717
2718
2719
2720 /* expected test results */
2721 static const int32_t results[]={
2722 /* number of bytes read, code point */
2723 4, 0x0031, /* 4 bytes including the escape sequence */
2724 1, 0x0032,
2725 1, 0x61,
2726 2, 0x80,
2727 3, 0x800,
2728 4, 0x10000
2729 };
2730
2731 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2732 UErrorCode errorCode=U_ZERO_ERROR;
2733 UConverter *cnv;
2734
2735 cnv=ucnv_open("ISO_2022", &errorCode);
2736 if(U_FAILURE(errorCode)) {
2737 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2738 return;
2739 }
2740 TestNextUChar(cnv, source, limit, results, "ISO_2022");
2741
2742 /* Test the condition when source >= sourceLimit */
2743 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2744 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2745 /*Test for the condition where we have a truncated char*/
2746 {
2747 static const uint8_t source1[]={0xc4};
2748 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2749 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2750 }
2751 /*Test for the condition where there is an invalid character*/
2752 {
2753 static const uint8_t source2[]={0xa1, 0x01};
2754 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2755 }
2756 ucnv_close(cnv);
2757}
2758
2759#endif
2760
2761static void
2762TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2763 const UChar* uSource;
2764 const UChar* uSourceLimit;
2765 const char* cSource;
2766 const char* cSourceLimit;
2767 UChar *uTargetLimit =NULL;
2768 UChar *uTarget;
2769 char *cTarget;
2770 const char *cTargetLimit;
2771 char *cBuf;
2772 UChar *uBuf; /*,*test;*/
2773 int32_t uBufSize = 120;
2774 int len=0;
2775 int i=2;
2776 UErrorCode errorCode=U_ZERO_ERROR;
2777 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2778 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2779 ucnv_reset(cnv);
2780 for(;--i>0; ){
2781 uSource = (UChar*) source;
2782 uSourceLimit=(const UChar*)sourceLimit;
2783 cTarget = cBuf;
2784 uTarget = uBuf;
2785 cSource = cBuf;
2786 cTargetLimit = cBuf;
2787 uTargetLimit = uBuf;
2788
2789 do{
2790
2791 cTargetLimit = cTargetLimit+ i;
2792 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2793 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2794 errorCode=U_ZERO_ERROR;
2795 continue;
2796 }
2797
2798 if(U_FAILURE(errorCode)){
2799 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2800 return;
2801 }
2802
2803 }while (uSource<uSourceLimit);
2804
2805 cSourceLimit =cTarget;
2806 do{
2807 uTargetLimit=uTargetLimit+i;
2808 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2809 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2810 errorCode=U_ZERO_ERROR;
2811 continue;
2812 }
2813 if(U_FAILURE(errorCode)){
2814 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2815 return;
2816 }
2817 }while(cSource<cSourceLimit);
2818
2819 uSource = source;
2820 /*test =uBuf;*/
2821 for(len=0;len<(int)(source - sourceLimit);len++){
2822 if(uBuf[len]!=uSource[len]){
2823 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2824 }
2825 }
2826 }
2827 free(uBuf);
2828 free(cBuf);
2829}
2830/* Test for Jitterbug 778 */
2831static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2832 const UChar* uSource;
2833 const UChar* uSourceLimit;
2834 const char* cSource;
2835 UChar *uTargetLimit =NULL;
2836 UChar *uTarget;
2837 char *cTarget;
2838 const char *cTargetLimit;
2839 char *cBuf;
2840 UChar *uBuf,*test;
2841 int32_t uBufSize = 120;
2842 int numCharsInTarget=0;
2843 UErrorCode errorCode=U_ZERO_ERROR;
2844 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2845 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2846 uSource = source;
2847 uSourceLimit=sourceLimit;
2848 cTarget = cBuf;
2849 cTargetLimit = cBuf +uBufSize*5;
2850 uTarget = uBuf;
2851 uTargetLimit = uBuf+ uBufSize*5;
2852 ucnv_reset(cnv);
2853 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
2854 if(U_FAILURE(errorCode)){
2855 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2856 return;
2857 }
2858 cSource = cBuf;
2859 test =uBuf;
2860 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2861 if(U_FAILURE(errorCode)){
2862 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2863 return;
2864 }
2865 uSource = source;
2866 while(uSource<uSourceLimit){
2867 if(*test!=*uSource){
2868
2869 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2870 }
2871 uSource++;
2872 test++;
2873 }
2874 free(uBuf);
2875 free(cBuf);
2876}
2877
2878static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2879 const UChar* uSource;
2880 const UChar* uSourceLimit;
2881 const char* cSource;
2882 const char* cSourceLimit;
2883 UChar *uTargetLimit =NULL;
2884 UChar *uTarget;
2885 char *cTarget;
2886 const char *cTargetLimit;
2887 char *cBuf;
2888 UChar *uBuf; /*,*test;*/
2889 int32_t uBufSize = 120;
2890 int len=0;
2891 int i=2;
2892 const UChar *temp = sourceLimit;
2893 UErrorCode errorCode=U_ZERO_ERROR;
2894 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2895 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2896
2897 ucnv_reset(cnv);
2898 for(;--i>0;){
2899 uSource = (UChar*) source;
2900 cTarget = cBuf;
2901 uTarget = uBuf;
2902 cSource = cBuf;
2903 cTargetLimit = cBuf;
2904 uTargetLimit = uBuf+uBufSize*5;
2905 cTargetLimit = cTargetLimit+uBufSize*10;
2906 uSourceLimit=uSource;
2907 do{
2908
2909 if (uSourceLimit < sourceLimit) {
2910 uSourceLimit = uSourceLimit+1;
2911 }
2912 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2913 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2914 errorCode=U_ZERO_ERROR;
2915 continue;
2916 }
2917
2918 if(U_FAILURE(errorCode)){
2919 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2920 return;
2921 }
2922
2923 }while (uSource<temp);
2924
2925 cSourceLimit =cBuf;
2926 do{
2927 if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2928 cSourceLimit = cSourceLimit+1;
2929 }
2930 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2931 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2932 errorCode=U_ZERO_ERROR;
2933 continue;
2934 }
2935 if(U_FAILURE(errorCode)){
2936 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2937 return;
2938 }
2939 }while(cSource<cTarget);
2940
2941 uSource = source;
2942 /*test =uBuf;*/
2943 for(;len<(int)(source - sourceLimit);len++){
2944 if(uBuf[len]!=uSource[len]){
2945 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2946 }
2947 }
2948 }
2949 free(uBuf);
2950 free(cBuf);
2951}
2952static void
2953TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2954 const uint16_t results[], const char* message){
2955/* const char* s0; */
2956 const char* s=(char*)source;
2957 const uint16_t *r=results;
2958 UErrorCode errorCode=U_ZERO_ERROR;
2959 uint32_t c,exC;
2960 ucnv_reset(cnv);
2961 while(s<limit) {
2962 /* s0=s; */
2963 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2964 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2965 break; /* no more significant input */
2966 } else if(U_FAILURE(errorCode)) {
2967 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2968 break;
2969 } else {
2970 if(UTF_IS_FIRST_SURROGATE(*r)){
2971 int i =0, len = 2;
2972 UTF_NEXT_CHAR_SAFE(r, i, len, exC, FALSE);
2973 r++;
2974 }else{
2975 exC = *r;
2976 }
2977 if(c!=(uint32_t)(exC))
2978 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c);
2979 }
2980 r++;
2981 }
2982}
2983
2984static int TestJitterbug930(const char* enc){
2985 UErrorCode err = U_ZERO_ERROR;
2986 UConverter*converter;
2987 char out[80];
2988 char*target = out;
2989 UChar in[4];
2990 const UChar*source = in;
2991 int32_t off[80];
2992 int32_t* offsets = off;
2993 int numOffWritten=0;
2994 UBool flush = 0;
2995 converter = my_ucnv_open(enc, &err);
2996
2997 in[0] = 0x41; /* 0x4E00;*/
2998 in[1] = 0x4E01;
2999 in[2] = 0x4E02;
3000 in[3] = 0x4E03;
3001
3002 memset(off, '*', sizeof(off));
3003
3004 ucnv_fromUnicode (converter,
3005 &target,
3006 target+2,
3007 &source,
3008 source+3,
3009 offsets,
3010 flush,
3011 &err);
3012
3013 /* writes three bytes into the output buffer: 41 1B 24
3014 * but offsets contains 0 1 1
3015 */
3016 while(*offsets< off[10]){
3017 numOffWritten++;
3018 offsets++;
3019 }
3020 log_verbose("Testing Jitterbug 930 for encoding %s",enc);
3021 if(numOffWritten!= (int)(target-out)){
3022 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
3023 }
3024
3025 err = U_ZERO_ERROR;
3026
3027 memset(off,'*' , sizeof(off));
3028
3029 flush = 1;
3030 offsets=off;
3031 ucnv_fromUnicode (converter,
3032 &target,
3033 target+4,
3034 &source,
3035 source,
3036 offsets,
3037 flush,
3038 &err);
3039 numOffWritten=0;
3040 while(*offsets< off[10]){
3041 numOffWritten++;
3042 if(*offsets!= -1){
3043 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
3044 }
3045 offsets++;
3046 }
3047
3048 /* writes 42 43 7A into output buffer,
3049 * offsets contains -1 -1 -1
3050 */
3051 ucnv_close(converter);
3052 return 0;
3053}
3054
3055static void
3056TestHZ() {
3057 /* test input */
3058 static const uint16_t in[]={
3059 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3060 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3061 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3062 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3063 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3064 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3065 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3066 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3067 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3068 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3069 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3070 0x005A, 0x005B, 0x005C, 0x000A
3071 };
3072 const UChar* uSource;
3073 const UChar* uSourceLimit;
3074 const char* cSource;
3075 const char* cSourceLimit;
3076 UChar *uTargetLimit =NULL;
3077 UChar *uTarget;
3078 char *cTarget;
3079 const char *cTargetLimit;
3080 char *cBuf;
3081 UChar *uBuf,*test;
3082 int32_t uBufSize = 120;
3083 UErrorCode errorCode=U_ZERO_ERROR;
3084 UConverter *cnv;
3085 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3086 int32_t* myOff= offsets;
3087 cnv=ucnv_open("HZ", &errorCode);
3088 if(U_FAILURE(errorCode)) {
3089 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3090 return;
3091 }
3092
3093 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3094 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3095 uSource = (const UChar*)in;
3096 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3097 cTarget = cBuf;
3098 cTargetLimit = cBuf +uBufSize*5;
3099 uTarget = uBuf;
3100 uTargetLimit = uBuf+ uBufSize*5;
3101 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3102 if(U_FAILURE(errorCode)){
3103 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3104 return;
3105 }
3106 cSource = cBuf;
3107 cSourceLimit =cTarget;
3108 test =uBuf;
3109 myOff=offsets;
3110 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3111 if(U_FAILURE(errorCode)){
3112 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3113 return;
3114 }
3115 uSource = (const UChar*)in;
3116 while(uSource<uSourceLimit){
3117 if(*test!=*uSource){
3118
3119 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3120 }
3121 uSource++;
3122 test++;
3123 }
3124 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
3125 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3126 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3127 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3128 TestJitterbug930("csISO2022JP");
3129 ucnv_close(cnv);
3130 free(offsets);
3131 free(uBuf);
3132 free(cBuf);
3133}
3134
3135static void
3136TestISCII(){
3137 /* test input */
3138 static const uint16_t in[]={
3139 /* test full range of Devanagari */
3140 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3141 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3142 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3143 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3144 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3145 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3146 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3147 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3148 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3149 0x096D,0x096E,0x096F,
3150 /* test Soft halant*/
3151 0x0915,0x094d, 0x200D,
3152 /* test explicit halant */
3153 0x0915,0x094d, 0x200c,
3154 /* test double danda */
3155 0x965,
3156 /* test ASCII */
3157 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3158 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3159 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3160 /* tests from Lotus */
3161 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3162 0x0930,0x094D,0x200D,
3163 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3164 0x0915,0x0921,0x002B,0x095F,
3165 /* tamil range */
3166 0x0B86, 0xB87, 0xB88,
3167 /* telugu range */
3168 0x0C05, 0x0C02, 0x0C03,0x0c31,
3169 /* kannada range */
3170 0x0C85, 0xC82, 0x0C83,
3171 /* test Abbr sign and Anudatta */
3172 0x0970, 0x952,
3173 /* 0x0958,
3174 0x0959,
3175 0x095A,
3176 0x095B,
3177 0x095C,
3178 0x095D,
3179 0x095E,
3180 0x095F,*/
3181 0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3182 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3183 0x090C ,
3184 0x0962,
3185 0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3186 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3187 0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3188 0x093D /* Avagraha 0xEA, 0xE9*/,
3189 0x0958,
3190 0x0959,
3191 0x095A,
3192 0x095B,
3193 0x095C,
3194 0x095D,
3195 0x095E,
3196 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3197 };
3198 static const unsigned char byteArr[]={
3199
3200 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3201 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3202 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3203 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3204 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3205 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3206 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3207 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3208 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3209 0xf8,0xf9,0xfa,
3210 /* test soft halant */
3211 0xb3, 0xE8, 0xE9,
3212 /* test explicit halant */
3213 0xb3, 0xE8, 0xE8,
3214 /* test double danda */
3215 0xea, 0xea,
3216 /* test ASCII */
3217 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3218 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3219 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3220 /* test ATR code */
3221
3222 /* tests from Lotus */
3223 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3224 0xEF,0x42,0xCF,0xE8,0xD9,
3225 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3226 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3227 /* tamil range */
3228 0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3229 /* telugu range */
3230 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3231 /* kannada range */
3232 0xEF, 0x48,0xa4, 0xa2, 0xa3,
3233 /* anudatta and abbreviation sign */
3234 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3235
3236
3237 0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3238
3239 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3240
3241 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3242
3243 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3244
3245 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3246
3247 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3248
3249 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3250
3251 0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3252
3253 0xB3, 0xE9, /* Ka + NUKTA */
3254
3255 0xB4, 0xE9, /* Kha + NUKTA */
3256
3257 0xB5, 0xE9, /* Ga + NUKTA */
3258
3259 0xBA, 0xE9,
3260
3261 0xBF, 0xE9,
3262
3263 0xC0, 0xE9,
3264
3265 0xC9, 0xE9,
3266 /* INV halant RA */
3267 0xD9, 0xE8, 0xCF,
3268 0x00, 0x00A0,
3269 /* just consume unhandled codepoints */
3270 0xEF, 0x30,
3271
3272 };
3273 testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE);
3274 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3275
3276}
3277
3278static void
3279TestISO_2022_JP() {
3280 /* test input */
3281 static const uint16_t in[]={
3282 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3283 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3284 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3285 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3286 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3287 0x201D, 0x3014, 0x000D, 0x000A,
3288 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3289 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3290 };
3291 const UChar* uSource;
3292 const UChar* uSourceLimit;
3293 const char* cSource;
3294 const char* cSourceLimit;
3295 UChar *uTargetLimit =NULL;
3296 UChar *uTarget;
3297 char *cTarget;
3298 const char *cTargetLimit;
3299 char *cBuf;
3300 UChar *uBuf,*test;
3301 int32_t uBufSize = 120;
3302 UErrorCode errorCode=U_ZERO_ERROR;
3303 UConverter *cnv;
3304 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3305 int32_t* myOff= offsets;
3306 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3307 if(U_FAILURE(errorCode)) {
3308 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3309 return;
3310 }
3311
3312 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3313 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3314 uSource = (const UChar*)in;
3315 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3316 cTarget = cBuf;
3317 cTargetLimit = cBuf +uBufSize*5;
3318 uTarget = uBuf;
3319 uTargetLimit = uBuf+ uBufSize*5;
3320 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3321 if(U_FAILURE(errorCode)){
3322 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3323 return;
3324 }
3325 cSource = cBuf;
3326 cSourceLimit =cTarget;
3327 test =uBuf;
3328 myOff=offsets;
3329 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3330 if(U_FAILURE(errorCode)){
3331 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3332 return;
3333 }
3334
3335 uSource = (const UChar*)in;
3336 while(uSource<uSourceLimit){
3337 if(*test!=*uSource){
3338
3339 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3340 }
3341 uSource++;
3342 test++;
3343 }
3344
3345 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3346 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3347 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3348 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3349 TestJitterbug930("csISO2022JP");
3350 ucnv_close(cnv);
3351 free(uBuf);
3352 free(cBuf);
3353 free(offsets);
3354}
3355
3356static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3357 const UChar* uSource;
3358 const UChar* uSourceLimit;
3359 const char* cSource;
3360 const char* cSourceLimit;
3361 UChar *uTargetLimit =NULL;
3362 UChar *uTarget;
3363 char *cTarget;
3364 const char *cTargetLimit;
3365 char *cBuf;
3366 UChar *uBuf,*test;
3367 int32_t uBufSize = 120*10;
3368 UErrorCode errorCode=U_ZERO_ERROR;
3369 UConverter *cnv;
3370 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3371 int32_t* myOff= offsets;
3372 cnv=my_ucnv_open(conv, &errorCode);
3373 if(U_FAILURE(errorCode)) {
3374 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3375 return;
3376 }
3377
3378 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
3379 cBuf =(char*)malloc(uBufSize * sizeof(char));
3380 uSource = (const UChar*)in;
3381 uSourceLimit=uSource+len;
3382 cTarget = cBuf;
3383 cTargetLimit = cBuf +uBufSize;
3384 uTarget = uBuf;
3385 uTargetLimit = uBuf+ uBufSize;
3386 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3387 if(U_FAILURE(errorCode)){
3388 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3389 return;
3390 }
3391 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3392 cSource = cBuf;
3393 cSourceLimit =cTarget;
3394 test =uBuf;
3395 myOff=offsets;
3396 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3397 if(U_FAILURE(errorCode)){
3398 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3399 return;
3400 }
3401
3402 uSource = (const UChar*)in;
3403 while(uSource<uSourceLimit){
3404 if(*test!=*uSource){
3405 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3406 }
3407 uSource++;
3408 test++;
3409 }
3410 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3411 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
3412 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3413 if(byteArr && byteArrLen!=0){
3414 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3415 TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
3416 {
3417 cSource = byteArr;
3418 cSourceLimit = cSource+byteArrLen;
3419 test=uBuf;
3420 myOff = offsets;
3421 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3422 if(U_FAILURE(errorCode)){
3423 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3424 return;
3425 }
3426
3427 uSource = (const UChar*)in;
3428 while(uSource<uSourceLimit){
3429 if(*test!=*uSource){
3430 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3431 }
3432 uSource++;
3433 test++;
3434 }
3435 }
3436 }
3437
3438 ucnv_close(cnv);
3439 free(uBuf);
3440 free(cBuf);
3441 free(offsets);
3442}
3443static UChar U_CALLCONV
3444_charAt(int32_t offset, void *context) {
3445 return ((char*)context)[offset];
3446}
3447
3448static int32_t
3449unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3450 int32_t srcIndex=0;
3451 int32_t dstIndex=0;
3452 if(U_FAILURE(*status)){
3453 return 0;
3454 }
3455 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3456 *status = U_ILLEGAL_ARGUMENT_ERROR;
3457 return 0;
3458 }
3459 if(srcLen==-1){
3460 srcLen = (int32_t)uprv_strlen(src);
3461 }
3462
3463 for (; srcIndex<srcLen; ) {
3464 UChar32 c = src[srcIndex++];
3465 if (c == 0x005C /*'\\'*/) {
3466 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3467 if (c == (UChar32)0xFFFFFFFF) {
3468 *status=U_INVALID_CHAR_FOUND; /* return empty string */
3469 break; /* invalid escape sequence */
3470 }
3471 }
3472 if(dstIndex < dstLen){
3473 if(c>0xFFFF){
3474 dst[dstIndex++] = UTF16_LEAD(c);
3475 if(dstIndex<dstLen){
3476 dst[dstIndex]=UTF16_TRAIL(c);
3477 }else{
3478 *status=U_BUFFER_OVERFLOW_ERROR;
3479 }
3480 }else{
3481 dst[dstIndex]=(UChar)c;
3482 }
3483
3484 }else{
3485 *status = U_BUFFER_OVERFLOW_ERROR;
3486 }
3487 dstIndex++; /* for preflighting */
3488 }
3489 return dstIndex;
3490}
3491
3492static void
3493TestFullRoundtrip(const char* cp){
3494 UChar usource[10] ={0};
3495 UChar nsrc[10] = {0};
3496 uint32_t i=1;
3497 int len=0, ulen;
3498 nsrc[0]=0x0061;
3499 /* Test codepoint 0 */
3500 TestConv(usource,1,cp,"",NULL,0);
3501 TestConv(usource,2,cp,"",NULL,0);
3502 nsrc[2]=0x5555;
3503 TestConv(nsrc,3,cp,"",NULL,0);
3504
3505 for(;i<=0x10FFFF;i++){
3506 if(i==0xD800){
3507 i=0xDFFF;
3508 continue;
3509 }
3510 if(i<=0xFFFF){
3511 usource[0] =(UChar) i;
3512 len=1;
3513 }else{
3514 usource[0]=UTF16_LEAD(i);
3515 usource[1]=UTF16_TRAIL(i);
3516 len=2;
3517 }
3518 ulen=len;
3519 if(i==0x80) {
3520 usource[2]=0;
3521 }
3522 /* Test only single code points */
3523 TestConv(usource,ulen,cp,"",NULL,0);
3524 /* Test codepoint repeated twice */
3525 usource[ulen]=usource[0];
3526 usource[ulen+1]=usource[1];
3527 ulen+=len;
3528 TestConv(usource,ulen,cp,"",NULL,0);
3529 /* Test codepoint repeated 3 times */
3530 usource[ulen]=usource[0];
3531 usource[ulen+1]=usource[1];
3532 ulen+=len;
3533 TestConv(usource,ulen,cp,"",NULL,0);
3534 /* Test codepoint in between 2 codepoints */
3535 nsrc[1]=usource[0];
3536 nsrc[2]=usource[1];
3537 nsrc[len+1]=0x5555;
3538 TestConv(nsrc,len+2,cp,"",NULL,0);
3539 uprv_memset(usource,0,sizeof(UChar)*10);
3540 }
3541}
3542
3543static void
3544TestRoundTrippingAllUTF(void){
3545 if(!getTestOption(QUICK_OPTION)){
3546 log_verbose("Running exhaustive round trip test for BOCU-1\n");
3547 TestFullRoundtrip("BOCU-1");
3548 log_verbose("Running exhaustive round trip test for SCSU\n");
3549 TestFullRoundtrip("SCSU");
3550 log_verbose("Running exhaustive round trip test for UTF-8\n");
3551 TestFullRoundtrip("UTF-8");
3552 log_verbose("Running exhaustive round trip test for CESU-8\n");
3553 TestFullRoundtrip("CESU-8");
3554 log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3555 TestFullRoundtrip("UTF-16BE");
3556 log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3557 TestFullRoundtrip("UTF-16LE");
3558 log_verbose("Running exhaustive round trip test for UTF-16\n");
3559 TestFullRoundtrip("UTF-16");
3560 log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3561 TestFullRoundtrip("UTF-32BE");
3562 log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3563 TestFullRoundtrip("UTF-32LE");
3564 log_verbose("Running exhaustive round trip test for UTF-32\n");
3565 TestFullRoundtrip("UTF-32");
3566 log_verbose("Running exhaustive round trip test for UTF-7\n");
3567 TestFullRoundtrip("UTF-7");
3568 log_verbose("Running exhaustive round trip test for UTF-7\n");
3569 TestFullRoundtrip("UTF-7,version=1");
3570 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3571 TestFullRoundtrip("IMAP-mailbox-name");
3572 log_verbose("Running exhaustive round trip test for GB18030\n");
3573 TestFullRoundtrip("GB18030");
3574 }
3575}
3576
3577static void
3578TestSCSU() {
3579
3580 static const uint16_t germanUTF16[]={
3581 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3582 };
3583
3584 static const uint8_t germanSCSU[]={
3585 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3586 };
3587
3588 static const uint16_t russianUTF16[]={
3589 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3590 };
3591
3592 static const uint8_t russianSCSU[]={
3593 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3594 };
3595
3596 static const uint16_t japaneseUTF16[]={
3597 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3598 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3599 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3600 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3601 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3602 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3603 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3604 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3605 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3606 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3607 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3608 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3609 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3610 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3611 0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3612 };
3613
3614 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3615 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3616 static const uint8_t japaneseSCSU[]={
3617 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3618 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3619 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3620 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3621 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3622 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3623 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3624 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3625 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3626 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3627 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3628 0xcb, 0x82
3629 };
3630
3631 static const uint16_t allFeaturesUTF16[]={
3632 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3633 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3634 0x01df, 0xf000, 0xdbff, 0xdfff
3635 };
3636
3637 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3638 * result here (34B vs. 35B)
3639 */
3640 static const uint8_t allFeaturesSCSU[]={
3641 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3642 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3643 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3644 0xdf, 0x14, 0x80, 0x15, 0xff
3645 };
3646 static const uint16_t monkeyIn[]={
3647 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3648 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3649 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3650 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3651 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3652 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3653 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3654 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3655 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3656 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3657 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3658 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3659 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3660 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3661 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3662 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3663 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3664 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3665 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3666 /* test non-BMP code points */
3667 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3668 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3669 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3670 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3671 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3672 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3673 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3674 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3675 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3676 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3677 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3678
3679
3680 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3681 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3682 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3683 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3684 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3685 };
3686 static const char *fTestCases [] = {
3687 "\\ud800\\udc00", /* smallest surrogate*/
3688 "\\ud8ff\\udcff",
3689 "\\udBff\\udFff", /* largest surrogate pair*/
3690 "\\ud834\\udc00",
3691 "\\U0010FFFF",
3692 "Hello \\u9292 \\u9192 World!",
3693 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3694 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3695
3696 "\\u0648\\u06c8", /* catch missing reset*/
3697 "\\u0648\\u06c8",
3698
3699 "\\u4444\\uE001", /* lowest quotable*/
3700 "\\u4444\\uf2FF", /* highest quotable*/
3701 "\\u4444\\uf188\\u4444",
3702 "\\u4444\\uf188\\uf288",
3703 "\\u4444\\uf188abc\\u0429\\uf288",
3704 "\\u9292\\u2222",
3705 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3706 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3707 "Hello World!123456",
3708 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3709
3710 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/
3711 "abc\\u4411d", /* uses SQU*/
3712 "abc\\u4411\\u4412d",/* uses SCU*/
3713 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3714 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3715 "\\u9292\\u2222",
3716 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3717 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3718 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3719
3720 "", /* empty input*/
3721 "\\u0000", /* smallest BMP character*/
3722 "\\uFFFF", /* largest BMP character*/
3723
3724 /* regression tests*/
3725 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3726 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3727 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3728 "\\u0041\\u00df\\u0401\\u015f",
3729 "\\u9066\\u2123abc",
3730 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3731 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3732 };
3733 int i=0;
3734 for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){
3735 const char* cSrc = fTestCases[i];
3736 UErrorCode status = U_ZERO_ERROR;
3737 int32_t cSrcLen,srcLen;
3738 UChar* src;
3739 /* UConverter* cnv = ucnv_open("SCSU",&status); */
3740 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
3741 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3742 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3743 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3744 TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3745 free(src);
3746 }
3747 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3748 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3749 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3750 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3751 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3752 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3753 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3754}
3755
3756#if !UCONFIG_NO_LEGACY_CONVERSION
3757static void TestJitterbug2346(){
3758 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3759 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3760 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3761
3762 UChar uTarget[500]={'\0'};
3763 UChar* utarget=uTarget;
3764 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3765
3766 char cTarget[500]={'\0'};
3767 char* ctarget=cTarget;
3768 char* ctargetLimit=cTarget+sizeof(cTarget);
3769 const char* csource=source;
3770 UChar* temp = expected;
3771 UErrorCode err=U_ZERO_ERROR;
3772
3773 UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3774 if(U_FAILURE(err)) {
3775 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3776 return;
3777 }
3778 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3779 if(U_FAILURE(err)) {
3780 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3781 return;
3782 }
3783 utargetLimit=utarget;
3784 utarget = uTarget;
3785 while(utarget<utargetLimit){
3786 if(*temp!=*utarget){
3787
3788 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3789 }
3790 utarget++;
3791 temp++;
3792 }
3793 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3794 if(U_FAILURE(err)) {
3795 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3796 return;
3797 }
3798 ctargetLimit=ctarget;
3799 ctarget =cTarget;
3800 ucnv_close(conv);
3801
3802
3803}
3804
3805static void
3806TestISO_2022_JP_1() {
3807 /* test input */
3808 static const uint16_t in[]={
3809 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3810 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3811 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3812 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3813 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3814 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3815 0x201D, 0x000D, 0x000A,
3816 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3817 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3818 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3819 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3820 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3821 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3822 };
3823 const UChar* uSource;
3824 const UChar* uSourceLimit;
3825 const char* cSource;
3826 const char* cSourceLimit;
3827 UChar *uTargetLimit =NULL;
3828 UChar *uTarget;
3829 char *cTarget;
3830 const char *cTargetLimit;
3831 char *cBuf;
3832 UChar *uBuf,*test;
3833 int32_t uBufSize = 120;
3834 UErrorCode errorCode=U_ZERO_ERROR;
3835 UConverter *cnv;
3836
3837 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3838 if(U_FAILURE(errorCode)) {
3839 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3840 return;
3841 }
3842
3843 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3844 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3845 uSource = (const UChar*)in;
3846 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3847 cTarget = cBuf;
3848 cTargetLimit = cBuf +uBufSize*5;
3849 uTarget = uBuf;
3850 uTargetLimit = uBuf+ uBufSize*5;
3851 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3852 if(U_FAILURE(errorCode)){
3853 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3854 return;
3855 }
3856 cSource = cBuf;
3857 cSourceLimit =cTarget;
3858 test =uBuf;
3859 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3860 if(U_FAILURE(errorCode)){
3861 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3862 return;
3863 }
3864 uSource = (const UChar*)in;
3865 while(uSource<uSourceLimit){
3866 if(*test!=*uSource){
3867
3868 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3869 }
3870 uSource++;
3871 test++;
3872 }
3873 /*ucnv_close(cnv);
3874 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3875 /*Test for the condition where there is an invalid character*/
3876 ucnv_reset(cnv);
3877 {
3878 static const uint8_t source2[]={0x0e,0x24,0x053};
3879 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3880 }
3881 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3882 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3883 ucnv_close(cnv);
3884 free(uBuf);
3885 free(cBuf);
3886}
3887
3888static void
3889TestISO_2022_JP_2() {
3890 /* test input */
3891 static const uint16_t in[]={
3892 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3893 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3894 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3895 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3896 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3897 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3898 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3899 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3900 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3901 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3902 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3903 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3904 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3905 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3906 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3907 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3908 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3909 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3910 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3911 };
3912 const UChar* uSource;
3913 const UChar* uSourceLimit;
3914 const char* cSource;
3915 const char* cSourceLimit;
3916 UChar *uTargetLimit =NULL;
3917 UChar *uTarget;
3918 char *cTarget;
3919 const char *cTargetLimit;
3920 char *cBuf;
3921 UChar *uBuf,*test;
3922 int32_t uBufSize = 120;
3923 UErrorCode errorCode=U_ZERO_ERROR;
3924 UConverter *cnv;
3925 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3926 int32_t* myOff= offsets;
3927 cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3928 if(U_FAILURE(errorCode)) {
3929 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3930 return;
3931 }
3932
3933 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3934 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3935 uSource = (const UChar*)in;
3936 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3937 cTarget = cBuf;
3938 cTargetLimit = cBuf +uBufSize*5;
3939 uTarget = uBuf;
3940 uTargetLimit = uBuf+ uBufSize*5;
3941 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3942 if(U_FAILURE(errorCode)){
3943 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3944 return;
3945 }
3946 cSource = cBuf;
3947 cSourceLimit =cTarget;
3948 test =uBuf;
3949 myOff=offsets;
3950 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3951 if(U_FAILURE(errorCode)){
3952 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3953 return;
3954 }
3955 uSource = (const UChar*)in;
3956 while(uSource<uSourceLimit){
3957 if(*test!=*uSource){
3958
3959 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3960 }
3961 uSource++;
3962 test++;
3963 }
3964 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3965 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3966 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3967 /*Test for the condition where there is an invalid character*/
3968 ucnv_reset(cnv);
3969 {
3970 static const uint8_t source2[]={0x0e,0x24,0x053};
3971 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
3972 }
3973 ucnv_close(cnv);
3974 free(uBuf);
3975 free(cBuf);
3976 free(offsets);
3977}
3978
3979static void
3980TestISO_2022_KR() {
3981 /* test input */
3982 static const uint16_t in[]={
3983 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
3984 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
3985 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3986 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3987 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
3988 ,0x53E3,0x53E4,0x000A,0x000D};
3989 const UChar* uSource;
3990 const UChar* uSourceLimit;
3991 const char* cSource;
3992 const char* cSourceLimit;
3993 UChar *uTargetLimit =NULL;
3994 UChar *uTarget;
3995 char *cTarget;
3996 const char *cTargetLimit;
3997 char *cBuf;
3998 UChar *uBuf,*test;
3999 int32_t uBufSize = 120;
4000 UErrorCode errorCode=U_ZERO_ERROR;
4001 UConverter *cnv;
4002 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4003 int32_t* myOff= offsets;
4004 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
4005 if(U_FAILURE(errorCode)) {
4006 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4007 return;
4008 }
4009
4010 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4011 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4012 uSource = (const UChar*)in;
4013 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4014 cTarget = cBuf;
4015 cTargetLimit = cBuf +uBufSize*5;
4016 uTarget = uBuf;
4017 uTargetLimit = uBuf+ uBufSize*5;
4018 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4019 if(U_FAILURE(errorCode)){
4020 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4021 return;
4022 }
4023 cSource = cBuf;
4024 cSourceLimit =cTarget;
4025 test =uBuf;
4026 myOff=offsets;
4027 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4028 if(U_FAILURE(errorCode)){
4029 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4030 return;
4031 }
4032 uSource = (const UChar*)in;
4033 while(uSource<uSourceLimit){
4034 if(*test!=*uSource){
4035 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4036 }
4037 uSource++;
4038 test++;
4039 }
4040 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4041 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4042 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4043 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4044 TestJitterbug930("csISO2022KR");
4045 /*Test for the condition where there is an invalid character*/
4046 ucnv_reset(cnv);
4047 {
4048 static const uint8_t source2[]={0x1b,0x24,0x053};
4049 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4050 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4051 }
4052 ucnv_close(cnv);
4053 free(uBuf);
4054 free(cBuf);
4055 free(offsets);
4056}
4057
4058static void
4059TestISO_2022_KR_1() {
4060 /* test input */
4061 static const uint16_t in[]={
4062 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4063 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4064 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4065 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4066 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4067 ,0x53E3,0x53E4,0x000A,0x000D};
4068 const UChar* uSource;
4069 const UChar* uSourceLimit;
4070 const char* cSource;
4071 const char* cSourceLimit;
4072 UChar *uTargetLimit =NULL;
4073 UChar *uTarget;
4074 char *cTarget;
4075 const char *cTargetLimit;
4076 char *cBuf;
4077 UChar *uBuf,*test;
4078 int32_t uBufSize = 120;
4079 UErrorCode errorCode=U_ZERO_ERROR;
4080 UConverter *cnv;
4081 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4082 int32_t* myOff= offsets;
4083 cnv=ucnv_open("ibm-25546", &errorCode);
4084 if(U_FAILURE(errorCode)) {
4085 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4086 return;
4087 }
4088
4089 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4090 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4091 uSource = (const UChar*)in;
4092 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4093 cTarget = cBuf;
4094 cTargetLimit = cBuf +uBufSize*5;
4095 uTarget = uBuf;
4096 uTargetLimit = uBuf+ uBufSize*5;
4097 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4098 if(U_FAILURE(errorCode)){
4099 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4100 return;
4101 }
4102 cSource = cBuf;
4103 cSourceLimit =cTarget;
4104 test =uBuf;
4105 myOff=offsets;
4106 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4107 if(U_FAILURE(errorCode)){
4108 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4109 return;
4110 }
4111 uSource = (const UChar*)in;
4112 while(uSource<uSourceLimit){
4113 if(*test!=*uSource){
4114 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4115 }
4116 uSource++;
4117 test++;
4118 }
4119 ucnv_reset(cnv);
4120 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4121 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4122 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4123 ucnv_reset(cnv);
4124 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4125 /*Test for the condition where there is an invalid character*/
4126 ucnv_reset(cnv);
4127 {
4128 static const uint8_t source2[]={0x1b,0x24,0x053};
4129 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4130 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4131 }
4132 ucnv_close(cnv);
4133 free(uBuf);
4134 free(cBuf);
4135 free(offsets);
4136}
4137
4138static void TestJitterbug2411(){
4139 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4140 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4141 UConverter* kr=NULL, *kr1=NULL;
4142 UErrorCode errorCode = U_ZERO_ERROR;
4143 UChar tgt[100]={'\0'};
4144 UChar* target = tgt;
4145 UChar* targetLimit = target+100;
4146 kr=ucnv_open("iso-2022-kr", &errorCode);
4147 if(U_FAILURE(errorCode)) {
4148 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4149 return;
4150 }
4151 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4152 if(U_FAILURE(errorCode)) {
4153 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4154 return;
4155 }
4156 kr1 = ucnv_open("ibm-25546", &errorCode);
4157 if(U_FAILURE(errorCode)) {
4158 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4159 return;
4160 }
4161 target = tgt;
4162 targetLimit = target+100;
4163 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4164
4165 if(U_FAILURE(errorCode)) {
4166 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4167 return;
4168 }
4169
4170 ucnv_close(kr);
4171 ucnv_close(kr1);
4172
4173}
4174
4175static void
4176TestJIS(){
4177 /* From Unicode moved to testdata/conversion.txt */
4178 /*To Unicode*/
4179 {
4180 static const uint8_t sampleTextJIS[] = {
4181 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4182 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4183 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4184 };
4185 static const uint16_t expectedISO2022JIS[] = {
4186 0x0041, 0x0042,
4187 0xFF81, 0xFF82,
4188 0x3000
4189 };
4190 static const int32_t toISO2022JISOffs[]={
4191 3,4,
4192 8,9,
4193 16
4194 };
4195
4196 static const uint8_t sampleTextJIS7[] = {
4197 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4198 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4199 0x1b,0x24,0x42,0x21,0x21,
4200 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */
4201 0x21,0x22,
4202 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4203 };
4204 static const uint16_t expectedISO2022JIS7[] = {
4205 0x0041, 0x0042,
4206 0xFF81, 0xFF82,
4207 0x3000,
4208 0xFF81, 0xFF82,
4209 0x3001,
4210 0x3000
4211 };
4212 static const int32_t toISO2022JIS7Offs[]={
4213 3,4,
4214 8,9,
4215 13,16,
4216 17,
4217 19,27
4218 };
4219 static const uint8_t sampleTextJIS8[] = {
4220 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4221 0xa1,0xc8,0xd9,/*Katakana Set*/
4222 0x1b,0x28,0x42,
4223 0x41,0x42,
4224 0xb1,0xc3, /*Katakana Set*/
4225 0x1b,0x24,0x42,0x21,0x21
4226 };
4227 static const uint16_t expectedISO2022JIS8[] = {
4228 0x0041, 0x0042,
4229 0xff61, 0xff88, 0xff99,
4230 0x0041, 0x0042,
4231 0xff71, 0xff83,
4232 0x3000
4233 };
4234 static const int32_t toISO2022JIS8Offs[]={
4235 3, 4, 5, 6,
4236 7, 11, 12, 13,
4237 14, 18,
4238 };
4239
4240 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4241 sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE);
4242 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4243 sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE);
4244 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4245 sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE);
4246 }
4247
4248}
4249
4250
4251#if 0
4252 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
4253
4254static void TestJitterbug915(){
4255/* tests for roundtripping of the below sequence
4256\x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * /
4257\x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4258\x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4259\x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4260\x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4261\x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4262\x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4263*/
4264 static const char cSource[]={
4265 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4266 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4267 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4268 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4269 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4270 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4271 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4272 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4273 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4274 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4275 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4276 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4277 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4278 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4279 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4280 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4281 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4282 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4283 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4284 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4285 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4286 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4287 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4288 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4289 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4290 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4291 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4292 0x37, 0x20, 0x2A, 0x2F
4293 };
4294 UChar uTarget[500]={'\0'};
4295 UChar* utarget=uTarget;
4296 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4297
4298 char cTarget[500]={'\0'};
4299 char* ctarget=cTarget;
4300 char* ctargetLimit=cTarget+sizeof(cTarget);
4301 const char* csource=cSource;
4302 const char* tempSrc = cSource;
4303 UErrorCode err=U_ZERO_ERROR;
4304
4305 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4306 if(U_FAILURE(err)) {
4307 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4308 return;
4309 }
4310 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4311 if(U_FAILURE(err)) {
4312 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4313 return;
4314 }
4315 utargetLimit=utarget;
4316 utarget = uTarget;
4317 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4318 if(U_FAILURE(err)) {
4319 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4320 return;
4321 }
4322 ctargetLimit=ctarget;
4323 ctarget =cTarget;
4324 while(ctarget<ctargetLimit){
4325 if(*ctarget != *tempSrc){
4326 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4327 }
4328 ++ctarget;
4329 ++tempSrc;
4330 }
4331
4332 ucnv_close(conv);
4333}
4334
4335static void
4336TestISO_2022_CN_EXT() {
4337 /* test input */
4338 static const uint16_t in[]={
4339 /* test Non-BMP code points */
4340 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4341 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4342 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4343 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4344 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4345 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4346 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4347 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4348 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4349 0xD869, 0xDED5,
4350
4351 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4352 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4353 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4354 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4355 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4356 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4357 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4358 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4359 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4360 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4361 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4362 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4363 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4364 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4365 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4366 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4367 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4368 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4369
4370 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4371
4372 };
4373
4374 const UChar* uSource;
4375 const UChar* uSourceLimit;
4376 const char* cSource;
4377 const char* cSourceLimit;
4378 UChar *uTargetLimit =NULL;
4379 UChar *uTarget;
4380 char *cTarget;
4381 const char *cTargetLimit;
4382 char *cBuf;
4383 UChar *uBuf,*test;
4384 int32_t uBufSize = 180;
4385 UErrorCode errorCode=U_ZERO_ERROR;
4386 UConverter *cnv;
4387 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4388 int32_t* myOff= offsets;
4389 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4390 if(U_FAILURE(errorCode)) {
4391 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4392 return;
4393 }
4394
4395 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4396 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4397 uSource = (const UChar*)in;
4398 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4399 cTarget = cBuf;
4400 cTargetLimit = cBuf +uBufSize*5;
4401 uTarget = uBuf;
4402 uTargetLimit = uBuf+ uBufSize*5;
4403 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4404 if(U_FAILURE(errorCode)){
4405 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4406 return;
4407 }
4408 cSource = cBuf;
4409 cSourceLimit =cTarget;
4410 test =uBuf;
4411 myOff=offsets;
4412 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4413 if(U_FAILURE(errorCode)){
4414 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4415 return;
4416 }
4417 uSource = (const UChar*)in;
4418 while(uSource<uSourceLimit){
4419 if(*test!=*uSource){
4420 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4421 }
4422 else{
4423 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4424 }
4425 uSource++;
4426 test++;
4427 }
4428 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4429 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4430 /*Test for the condition where there is an invalid character*/
4431 ucnv_reset(cnv);
4432 {
4433 static const uint8_t source2[]={0x0e,0x24,0x053};
4434 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4435 }
4436 ucnv_close(cnv);
4437 free(uBuf);
4438 free(cBuf);
4439 free(offsets);
4440}
4441#endif
4442
4443static void
4444TestISO_2022_CN() {
4445 /* test input */
4446 static const uint16_t in[]={
4447 /* jitterbug 951 */
4448 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4449 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4450 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4451 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4452 0x0020, 0x0045, 0x004e, 0x0044,
4453 /**/
4454 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4455 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4456 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4457 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4458 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4459 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4460 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4461 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4462 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4463 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4464 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4465 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4466 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4467 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4468 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4469 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4470 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4471
4472 };
4473 const UChar* uSource;
4474 const UChar* uSourceLimit;
4475 const char* cSource;
4476 const char* cSourceLimit;
4477 UChar *uTargetLimit =NULL;
4478 UChar *uTarget;
4479 char *cTarget;
4480 const char *cTargetLimit;
4481 char *cBuf;
4482 UChar *uBuf,*test;
4483 int32_t uBufSize = 180;
4484 UErrorCode errorCode=U_ZERO_ERROR;
4485 UConverter *cnv;
4486 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4487 int32_t* myOff= offsets;
4488 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4489 if(U_FAILURE(errorCode)) {
4490 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4491 return;
4492 }
4493
4494 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4495 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4496 uSource = (const UChar*)in;
4497 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4498 cTarget = cBuf;
4499 cTargetLimit = cBuf +uBufSize*5;
4500 uTarget = uBuf;
4501 uTargetLimit = uBuf+ uBufSize*5;
4502 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4503 if(U_FAILURE(errorCode)){
4504 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4505 return;
4506 }
4507 cSource = cBuf;
4508 cSourceLimit =cTarget;
4509 test =uBuf;
4510 myOff=offsets;
4511 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4512 if(U_FAILURE(errorCode)){
4513 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4514 return;
4515 }
4516 uSource = (const UChar*)in;
4517 while(uSource<uSourceLimit){
4518 if(*test!=*uSource){
4519 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4520 }
4521 else{
4522 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4523 }
4524 uSource++;
4525 test++;
4526 }
4527 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4528 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4529 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4530 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4531 TestJitterbug930("csISO2022CN");
4532 /*Test for the condition where there is an invalid character*/
4533 ucnv_reset(cnv);
4534 {
4535 static const uint8_t source2[]={0x0e,0x24,0x053};
4536 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4537 }
4538
4539 ucnv_close(cnv);
4540 free(uBuf);
4541 free(cBuf);
4542 free(offsets);
4543}
4544
4545/* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4546typedef struct {
4547 const char * converterName;
4548 const char * inputText;
4549 int inputTextLength;
4550} EmptySegmentTest;
4551
4552/* Callback for TestJitterbug6175, should only get called for empty segment errors */
4553static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
4554 int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
4555 if (reason > UCNV_IRREGULAR) {
4556 return;
4557 }
4558 if (reason != UCNV_IRREGULAR) {
4559 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4560 }
4561 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4562 *err = U_ZERO_ERROR;
4563 ucnv_cbToUWriteSub(toArgs,0,err);
4564}
4565
4566enum { kEmptySegmentToUCharsMax = 64 };
4567static void TestJitterbug6175(void) {
4568 static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4569 static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4570 static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4571 static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4572 static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4573 static const EmptySegmentTest emptySegmentTests[] = {
4574 /* converterName inputText inputTextLength */
4575 { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
4576 { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
4577 { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
4578 { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
4579 { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) },
4580 /* terminator: */
4581 { NULL, NULL, 0, }
4582 };
4583 const EmptySegmentTest * testPtr;
4584 for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
4585 UErrorCode err = U_ZERO_ERROR;
4586 UConverter * cnv = ucnv_open(testPtr->converterName, &err);
4587 if (U_FAILURE(err)) {
4588 log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
4589 return;
4590 }
4591 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
4592 if (U_FAILURE(err)) {
4593 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
4594 ucnv_close(cnv);
4595 return;
4596 }
4597 {
4598 UChar toUChars[kEmptySegmentToUCharsMax];
4599 UChar * toUCharsPtr = toUChars;
4600 const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
4601 const char * inCharsPtr = testPtr->inputText;
4602 const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength;
4603 ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err);
4604 }
4605 ucnv_close(cnv);
4606 }
4607}
4608
4609static void
4610TestEBCDIC_STATEFUL() {
4611 /* test input */
4612 static const uint8_t in[]={
4613 0x61,
4614 0x1a,
4615 0x0f, 0x4b,
4616 0x42,
4617 0x40,
4618 0x36,
4619 };
4620
4621 /* expected test results */
4622 static const int32_t results[]={
4623 /* number of bytes read, code point */
4624 1, 0x002f,
4625 1, 0x0092,
4626 2, 0x002e,
4627 1, 0xff62,
4628 1, 0x0020,
4629 1, 0x0096,
4630
4631 };
4632 static const uint8_t in2[]={
4633 0x0f,
4634 0xa1,
4635 0x01
4636 };
4637
4638 /* expected test results */
4639 static const int32_t results2[]={
4640 /* number of bytes read, code point */
4641 2, 0x203E,
4642 1, 0x0001,
4643 };
4644
4645 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4646 UErrorCode errorCode=U_ZERO_ERROR;
4647 UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4648 if(U_FAILURE(errorCode)) {
4649 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4650 return;
4651 }
4652 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4653 ucnv_reset(cnv);
4654 /* Test the condition when source >= sourceLimit */
4655 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4656 ucnv_reset(cnv);
4657 /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4658 {
4659 static const uint8_t source1[]={0x0f};
4660 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4661 }
4662 /*Test for the condition where there is an invalid character*/
4663 ucnv_reset(cnv);
4664 {
4665 static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4666 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4667 }
4668 ucnv_reset(cnv);
4669 source=(const char*)in2;
4670 limit=(const char*)in2+sizeof(in2);
4671 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4672 ucnv_close(cnv);
4673
4674}
4675
4676static void
4677TestGB18030() {
4678 /* test input */
4679 static const uint8_t in[]={
4680 0x24,
4681 0x7f,
4682 0x81, 0x30, 0x81, 0x30,
4683 0xa8, 0xbf,
4684 0xa2, 0xe3,
4685 0xd2, 0xbb,
4686 0x82, 0x35, 0x8f, 0x33,
4687 0x84, 0x31, 0xa4, 0x39,
4688 0x90, 0x30, 0x81, 0x30,
4689 0xe3, 0x32, 0x9a, 0x35
4690#if 0
4691 /*
4692 * Feature removed markus 2000-oct-26
4693 * Only some codepages must match surrogate pairs into supplementary code points -
4694 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4695 * GB 18030 provides direct encodings for supplementary code points, therefore
4696 * it must not combine two single-encoded surrogates into one code point.
4697 */
4698 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4699#endif
4700 };
4701
4702 /* expected test results */
4703 static const int32_t results[]={
4704 /* number of bytes read, code point */
4705 1, 0x24,
4706 1, 0x7f,
4707 4, 0x80,
4708 2, 0x1f9,
4709 2, 0x20ac,
4710 2, 0x4e00,
4711 4, 0x9fa6,
4712 4, 0xffff,
4713 4, 0x10000,
4714 4, 0x10ffff
4715#if 0
4716 /* Feature removed. See comment above. */
4717 8, 0x10000
4718#endif
4719 };
4720
4721/* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4722 UErrorCode errorCode=U_ZERO_ERROR;
4723 UConverter *cnv=ucnv_open("gb18030", &errorCode);
4724 if(U_FAILURE(errorCode)) {
4725 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4726 return;
4727 }
4728 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4729 ucnv_close(cnv);
4730}
4731
4732static void
4733TestLMBCS() {
4734 /* LMBCS-1 string */
4735 static const uint8_t pszLMBCS[]={
4736 0x61,
4737 0x01, 0x29,
4738 0x81,
4739 0xA0,
4740 0x0F, 0x27,
4741 0x0F, 0x91,
4742 0x14, 0x0a, 0x74,
4743 0x14, 0xF6, 0x02,
4744 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4745 0x10, 0x88, 0xA0,
4746 };
4747
4748 /* Unicode UChar32 equivalents */
4749 static const UChar32 pszUnicode32[]={
4750 /* code point */
4751 0x00000061,
4752 0x00002013,
4753 0x000000FC,
4754 0x000000E1,
4755 0x00000007,
4756 0x00000091,
4757 0x00000a74,
4758 0x00000200,
4759 0x00023456, /* code point for surrogate pair */
4760 0x00005516
4761 };
4762
4763/* Unicode UChar equivalents */
4764 static const UChar pszUnicode[]={
4765 /* code point */
4766 0x0061,
4767 0x2013,
4768 0x00FC,
4769 0x00E1,
4770 0x0007,
4771 0x0091,
4772 0x0a74,
4773 0x0200,
4774 0xD84D, /* low surrogate */
4775 0xDC56, /* high surrogate */
4776 0x5516
4777 };
4778
4779/* expected test results */
4780 static const int offsets32[]={
4781 /* number of bytes read, code point */
4782 0,
4783 1,
4784 3,
4785 4,
4786 5,
4787 7,
4788 9,
4789 12,
4790 15,
4791 21,
4792 24
4793 };
4794
4795/* expected test results */
4796 static const int offsets[]={
4797 /* number of bytes read, code point */
4798 0,
4799 1,
4800 3,
4801 4,
4802 5,
4803 7,
4804 9,
4805 12,
4806 15,
4807 18,
4808 21,
4809 24
4810 };
4811
4812
4813 UConverter *cnv;
4814
4815#define NAME_LMBCS_1 "LMBCS-1"
4816#define NAME_LMBCS_2 "LMBCS-2"
4817
4818
4819 /* Some basic open/close/property tests on some LMBCS converters */
4820 {
4821
4822 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */
4823 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/
4824 char get_subchars [1];
4825 const char * get_name;
4826 UConverter *cnv1;
4827 UConverter *cnv2;
4828
4829 int8_t len = sizeof(get_subchars);
4830
4831 UErrorCode errorCode=U_ZERO_ERROR;
4832
4833 /* Open */
4834 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4835 if(U_FAILURE(errorCode)) {
4836 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4837 return;
4838 }
4839 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4840 if(U_FAILURE(errorCode)) {
4841 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4842 return;
4843 }
4844
4845 /* Name */
4846 get_name = ucnv_getName (cnv1, &errorCode);
4847 if (strcmp(NAME_LMBCS_1,get_name)){
4848 log_err("Unexpected converter name: %s\n", get_name);
4849 }
4850 get_name = ucnv_getName (cnv2, &errorCode);
4851 if (strcmp(NAME_LMBCS_2,get_name)){
4852 log_err("Unexpected converter name: %s\n", get_name);
4853 }
4854
4855 /* substitution chars */
4856 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4857 if(U_FAILURE(errorCode)) {
4858 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4859 }
4860 if (len!=1){
4861 log_err("Unexpected length of sub chars\n");
4862 }
4863 if (get_subchars[0] != expected_subchars[0]){
4864 log_err("Unexpected value of sub chars\n");
4865 }
4866 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4867 if(U_FAILURE(errorCode)) {
4868 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4869 }
4870 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4871 if(U_FAILURE(errorCode)) {
4872 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4873 }
4874 if (len!=1){
4875 log_err("Unexpected length of sub chars\n");
4876 }
4877 if (get_subchars[0] != new_subchars[0]){
4878 log_err("Unexpected value of sub chars\n");
4879 }
4880 ucnv_close(cnv1);
4881 ucnv_close(cnv2);
4882
4883 }
4884
4885 /* LMBCS to Unicode - offsets */
4886 {
4887 UErrorCode errorCode=U_ZERO_ERROR;
4888
4889 const char * pSource = (const char *)pszLMBCS;
4890 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4891
4892 UChar Out [sizeof(pszUnicode) + 1];
4893 UChar * pOut = Out;
4894 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
4895
4896 int32_t off [sizeof(offsets)];
4897
4898 /* last 'offset' in expected results is just the final size.
4899 (Makes other tests easier). Compensate here: */
4900
4901 off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS);
4902
4903
4904
4905 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4906 if(U_FAILURE(errorCode)) {
4907 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4908 return;
4909 }
4910
4911
4912
4913 ucnv_toUnicode (cnv,
4914 &pOut,
4915 OutLimit,
4916 &pSource,
4917 sourceLimit,
4918 off,
4919 TRUE,
4920 &errorCode);
4921
4922
4923 if (memcmp(off,offsets,sizeof(offsets)))
4924 {
4925 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4926 }
4927 if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4928 {
4929 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4930 }
4931 ucnv_close(cnv);
4932 }
4933 {
4934 /* LMBCS to Unicode - getNextUChar */
4935 const char * sourceStart;
4936 const char *source=(const char *)pszLMBCS;
4937 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4938 const UChar32 *results= pszUnicode32;
4939 const int *off = offsets32;
4940
4941 UErrorCode errorCode=U_ZERO_ERROR;
4942 UChar32 uniChar;
4943
4944 cnv=ucnv_open("LMBCS-1", &errorCode);
4945 if(U_FAILURE(errorCode)) {
4946 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4947 return;
4948 }
4949 else
4950 {
4951
4952 while(source<limit) {
4953 sourceStart=source;
4954 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
4955 if(U_FAILURE(errorCode)) {
4956 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
4957 break;
4958 } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
4959 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4960 uniChar, (source-sourceStart), *results, *off);
4961 break;
4962 }
4963 results++;
4964 off++;
4965 }
4966 }
4967 ucnv_close(cnv);
4968 }
4969 { /* test locale & optimization group operations: Unicode to LMBCS */
4970
4971 UErrorCode errorCode=U_ZERO_ERROR;
4972 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
4973 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
4974 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
4975 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
4976 const UChar * pUniOut = uniString;
4977 UChar * pUniIn = uniString;
4978 uint8_t lmbcsString [4];
4979 const char * pLMBCSOut = (const char *)lmbcsString;
4980 char * pLMBCSIn = (char *)lmbcsString;
4981
4982 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
4983 ucnv_fromUnicode (cnv16he,
4984 &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
4985 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
4986 NULL, 1, &errorCode);
4987
4988 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
4989 {
4990 log_err("LMBCS-16,locale=he gives unexpected translation\n");
4991 }
4992
4993 pLMBCSIn= (char *)lmbcsString;
4994 pUniOut = uniString;
4995 ucnv_fromUnicode (cnv01us,
4996 &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
4997 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
4998 NULL, 1, &errorCode);
4999
5000 if (lmbcsString[0] != 0x9F)
5001 {
5002 log_err("LMBCS-1,locale=US gives unexpected translation\n");
5003 }
5004
5005 /* single byte char from mbcs char set */
5006 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */
5007 pLMBCSOut = (const char *)lmbcsString;
5008 pUniIn = uniString;
5009 ucnv_toUnicode (cnv16jp,
5010 &pUniIn, pUniIn + 1,
5011 &pLMBCSOut, (pLMBCSOut + 1),
5012 NULL, 1, &errorCode);
5013 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5014 {
5015 log_err("Unexpected results from LMBCS-16 single byte char\n");
5016 }
5017 /* convert to group 1: should be 3 bytes */
5018 pLMBCSIn = (char *)lmbcsString;
5019 pUniOut = uniString;
5020 ucnv_fromUnicode (cnv01us,
5021 &pLMBCSIn, (const char *)(pLMBCSIn + 3),
5022 &pUniOut, pUniOut + 1,
5023 NULL, 1, &errorCode);
5024 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
5025 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
5026 {
5027 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5028 }
5029 pLMBCSOut = (const char *)lmbcsString;
5030 pUniIn = uniString;
5031 ucnv_toUnicode (cnv01us,
5032 &pUniIn, pUniIn + 1,
5033 &pLMBCSOut, (const char *)(pLMBCSOut + 3),
5034 NULL, 1, &errorCode);
5035 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5036 {
5037 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5038 }
5039 pLMBCSIn = (char *)lmbcsString;
5040 pUniOut = uniString;
5041 ucnv_fromUnicode (cnv16jp,
5042 &pLMBCSIn, (const char *)(pLMBCSIn + 1),
5043 &pUniOut, pUniOut + 1,
5044 NULL, 1, &errorCode);
5045 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
5046 {
5047 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5048 }
5049 ucnv_close(cnv16he);
5050 ucnv_close(cnv16jp);
5051 ucnv_close(cnv01us);
5052 }
5053 {
5054 /* Small source buffer testing, LMBCS -> Unicode */
5055
5056 UErrorCode errorCode=U_ZERO_ERROR;
5057
5058 const char * pSource = (const char *)pszLMBCS;
5059 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
5060 int codepointCount = 0;
5061
5062 UChar Out [sizeof(pszUnicode) + 1];
5063 UChar * pOut = Out;
5064 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
5065
5066
5067 cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
5068 if(U_FAILURE(errorCode)) {
5069 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5070 return;
5071 }
5072
5073
5074 while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
5075 {
5076 ucnv_toUnicode (cnv,
5077 &pOut,
5078 OutLimit,
5079 &pSource,
5080 (pSource+1), /* claim that this is a 1- byte buffer */
5081 NULL,
5082 FALSE, /* FALSE means there might be more chars in the next buffer */
5083 &errorCode);
5084
5085 if (U_SUCCESS (errorCode))
5086 {
5087 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
5088 {
5089 /* we are on to the next code point: check value */
5090
5091 if (Out[0] != pszUnicode[codepointCount]){
5092 log_err("LMBCS->Uni result %lx should have been %lx \n",
5093 Out[0], pszUnicode[codepointCount]);
5094 }
5095
5096 pOut = Out; /* reset for accumulating next code point */
5097 codepointCount++;
5098 }
5099 }
5100 else
5101 {
5102 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
5103 }
5104 }
5105 {
5106 /* limits & surrogate error testing */
5107 char LIn [sizeof(pszLMBCS)];
5108 const char * pLIn = LIn;
5109
5110 char LOut [sizeof(pszLMBCS)];
5111 char * pLOut = LOut;
5112
5113 UChar UOut [sizeof(pszUnicode)];
5114 UChar * pUOut = UOut;
5115
5116 UChar UIn [sizeof(pszUnicode)];
5117 const UChar * pUIn = UIn;
5118
5119 int32_t off [sizeof(offsets)];
5120 UChar32 uniChar;
5121
5122 errorCode=U_ZERO_ERROR;
5123
5124 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5125 pUIn++;
5126 ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode);
5127 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5128 {
5129 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
5130 }
5131 pUIn--;
5132
5133 errorCode=U_ZERO_ERROR;
5134 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
5135 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5136 {
5137 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
5138 }
5139 errorCode=U_ZERO_ERROR;
5140
5141 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
5142 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5143 {
5144 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
5145 }
5146 errorCode=U_ZERO_ERROR;
5147
5148 /* 0 byte source request - no error, no pointer movement */
5149 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
5150 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
5151 if(U_FAILURE(errorCode)) {
5152 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
5153 }
5154 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5155 {
5156 log_err("Unexpected pointer move in 0 byte source request \n");
5157 }
5158 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5159 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
5160 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
5161 {
5162 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5163 }
5164 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5165 {
5166 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5167 }
5168 errorCode = U_ZERO_ERROR;
5169
5170 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5171
5172 pUIn = pszUnicode;
5173 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode);
5174 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5175 {
5176 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5177 }
5178
5179 errorCode = U_ZERO_ERROR;
5180
5181 pLIn = (const char *)pszLMBCS;
5182 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
5183 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
5184 {
5185 log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5186 }
5187
5188 /* unpaired or chopped LMBCS surrogates */
5189
5190 /* OK high surrogate, Low surrogate is chopped */
5191 LIn [0] = (char)0x14;
5192 LIn [1] = (char)0xD8;
5193 LIn [2] = (char)0x01;
5194 LIn [3] = (char)0x14;
5195 LIn [4] = (char)0xDC;
5196 pLIn = LIn;
5197 errorCode = U_ZERO_ERROR;
5198 pUOut = UOut;
5199
5200 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
5201 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5202 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5203 {
5204 log_err("Unexpected results on chopped low surrogate\n");
5205 }
5206
5207 /* chopped at surrogate boundary */
5208 LIn [0] = (char)0x14;
5209 LIn [1] = (char)0xD8;
5210 LIn [2] = (char)0x01;
5211 pLIn = LIn;
5212 errorCode = U_ZERO_ERROR;
5213 pUOut = UOut;
5214
5215 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
5216 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5217 {
5218 log_err("Unexpected results on chopped at surrogate boundary \n");
5219 }
5220
5221 /* unpaired surrogate plus valid Unichar */
5222 LIn [0] = (char)0x14;
5223 LIn [1] = (char)0xD8;
5224 LIn [2] = (char)0x01;
5225 LIn [3] = (char)0x14;
5226 LIn [4] = (char)0xC9;
5227 LIn [5] = (char)0xD0;
5228 pLIn = LIn;
5229 errorCode = U_ZERO_ERROR;
5230 pUOut = UOut;
5231
5232 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
5233 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5234 {
5235 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5236 }
5237
5238 /* unpaired surrogate plus chopped Unichar */
5239 LIn [0] = (char)0x14;
5240 LIn [1] = (char)0xD8;
5241 LIn [2] = (char)0x01;
5242 LIn [3] = (char)0x14;
5243 LIn [4] = (char)0xC9;
5244
5245 pLIn = LIn;
5246 errorCode = U_ZERO_ERROR;
5247 pUOut = UOut;
5248
5249 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5250 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5251 {
5252 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5253 }
5254
5255 /* unpaired surrogate plus valid non-Unichar */
5256 LIn [0] = (char)0x14;
5257 LIn [1] = (char)0xD8;
5258 LIn [2] = (char)0x01;
5259 LIn [3] = (char)0x0F;
5260 LIn [4] = (char)0x3B;
5261
5262 pLIn = LIn;
5263 errorCode = U_ZERO_ERROR;
5264 pUOut = UOut;
5265
5266 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5267 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5268 {
5269 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5270 }
5271
5272 /* unpaired surrogate plus chopped non-Unichar */
5273 LIn [0] = (char)0x14;
5274 LIn [1] = (char)0xD8;
5275 LIn [2] = (char)0x01;
5276 LIn [3] = (char)0x0F;
5277
5278 pLIn = LIn;
5279 errorCode = U_ZERO_ERROR;
5280 pUOut = UOut;
5281
5282 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
5283
5284 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5285 {
5286 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5287 }
5288 }
5289 }
5290 ucnv_close(cnv); /* final cleanup */
5291}
5292
5293
5294static void TestJitterbug255()
5295{
5296 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5297 const char *testBuffer = (const char *)testBytes;
5298 const char *testEnd = (const char *)testBytes + sizeof(testBytes);
5299 UErrorCode status = U_ZERO_ERROR;
5300 /*UChar32 result;*/
5301 UConverter *cnv = 0;
5302
5303 cnv = ucnv_open("shift-jis", &status);
5304 if (U_FAILURE(status) || cnv == 0) {
5305 log_data_err("Failed to open the converter for SJIS.\n");
5306 return;
5307 }
5308 while (testBuffer != testEnd)
5309 {
5310 /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
5311 if (U_FAILURE(status))
5312 {
5313 log_err("Failed to convert the next UChar for SJIS.\n");
5314 break;
5315 }
5316 }
5317 ucnv_close(cnv);
5318}
5319
5320static void TestEBCDICUS4XML()
5321{
5322 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5323 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5324 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5325 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5326 char target_x[] = {0x00, 0x00, 0x00, 0x00};
5327 UChar *unicodes = unicodes_x;
5328 const UChar *toUnicodeMaps = toUnicodeMaps_x;
5329 char *target = target_x;
5330 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5331 UErrorCode status = U_ZERO_ERROR;
5332 UConverter *cnv = 0;
5333
5334 cnv = ucnv_open("ebcdic-xml-us", &status);
5335 if (U_FAILURE(status) || cnv == 0) {
5336 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5337 return;
5338 }
5339 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5340 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5341 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5342 u_errorName(status));
5343 printUSeqErr(unicodes_x, 3);
5344 printUSeqErr(toUnicodeMaps, 3);
5345 }
5346 status = U_ZERO_ERROR;
5347 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5348 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5349 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5350 u_errorName(status));
5351 printSeqErr((const unsigned char*)target_x, 3);
5352 printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5353 }
5354 ucnv_close(cnv);
5355}
5356#endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5357
5358#if !UCONFIG_NO_COLLATION
5359
5360static void TestJitterbug981(){
5361 const UChar* rules;
5362 int32_t rules_length, target_cap, bytes_needed, buff_size;
5363 UErrorCode status = U_ZERO_ERROR;
5364 UConverter *utf8cnv;
5365 UCollator* myCollator;
5366 char *buff;
5367 int numNeeded=0;
5368 utf8cnv = ucnv_open ("utf8", &status);
5369 if(U_FAILURE(status)){
5370 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status));
5371 return;
5372 }
5373 myCollator = ucol_open("zh", &status);
5374 if(U_FAILURE(status)){
5375 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status));
5376 ucnv_close(utf8cnv);
5377 return;
5378 }
5379
5380 rules = ucol_getRules(myCollator, &rules_length);
5381 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5382 buff = malloc(buff_size);
5383
5384 target_cap = 0;
5385 do {
5386 ucnv_reset(utf8cnv);
5387 status = U_ZERO_ERROR;
5388 if(target_cap >= buff_size) {
5389 log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5390 break;
5391 }
5392 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5393 rules, rules_length, &status);
5394 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5395 if(numNeeded!=0 && numNeeded!= bytes_needed){
5396 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5397 break;
5398 }
5399 numNeeded = bytes_needed;
5400 } while (status == U_BUFFER_OVERFLOW_ERROR);
5401 ucol_close(myCollator);
5402 ucnv_close(utf8cnv);
5403 free(buff);
5404}
5405
5406#endif
5407
5408static void TestJitterbug1293(){
5409 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5410 char target[256];
5411 UErrorCode status = U_ZERO_ERROR;
5412 UConverter* conv=NULL;
5413 int32_t target_cap, bytes_needed, numNeeded = 0;
5414 conv = ucnv_open("shift-jis",&status);
5415 if(U_FAILURE(status)){
5416 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5417 return;
5418 }
5419
5420 do{
5421 target_cap =0;
5422 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5423 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5424 if(numNeeded!=0 && numNeeded!= bytes_needed){
5425 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5426 }
5427 numNeeded = bytes_needed;
5428 } while (status == U_BUFFER_OVERFLOW_ERROR);
5429 if(U_FAILURE(status)){
5430 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
5431 return;
5432 }
5433 ucnv_close(conv);
5434}
5435static void TestJB5275_1(){
5436
5437 static const char* data = "\x3B\xB3\x0A" /* Easy characters */
5438 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5439 /* Switch script: */
5440 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5441 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5442 "\xEF\x40\x3B\xB3\x0A";
5443 static const UChar expected[] ={
5444 0x003b, 0x0a15, 0x000a, /* Easy characters */
5445 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5446 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5447 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5448 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5449 };
5450
5451 UErrorCode status = U_ZERO_ERROR;
5452 UConverter* conv = ucnv_open("iscii-gur", &status);
5453 UChar dest[100] = {'\0'};
5454 UChar* target = dest;
5455 UChar* targetLimit = dest+100;
5456 const char* source = data;
5457 const char* sourceLimit = data+strlen(data);
5458 const UChar* exp = expected;
5459
5460 if (U_FAILURE(status)) {
5461 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status));
5462 return;
5463 }
5464
5465 log_verbose("Testing switching back to default script when new line is encountered.\n");
5466 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5467 if(U_FAILURE(status)){
5468 log_err("conversion failed: %s \n", u_errorName(status));
5469 }
5470 targetLimit = target;
5471 target = dest;
5472 printUSeq(target, targetLimit-target);
5473 while(target<targetLimit){
5474 if(*exp!=*target){
5475 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5476 }
5477 target++;
5478 exp++;
5479 }
5480 ucnv_close(conv);
5481}
5482
5483static void TestJB5275(){
5484 static const char* data =
5485 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */
5486 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */
5487 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */
5488 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5489 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */
5490 "\xEF\x48\x38\xB3\x0A" /* Kannada test */
5491 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */
5492 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */
5493 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */
5494 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */;
5495 static const UChar expected[] ={
5496 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5497 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */
5498 0x0038, 0x0C95, 0x000A, /* Kannada test */
5499 0x0039, 0x0D15, 0x000A, /* Malayalam test */
5500 0x003A, 0x0A95, 0x000A, /* Gujarati test */
5501 0x003B, 0x0A15, 0x000A, /* Punjabi test */
5502 };
5503
5504 UErrorCode status = U_ZERO_ERROR;
5505 UConverter* conv = ucnv_open("iscii", &status);
5506 UChar dest[100] = {'\0'};
5507 UChar* target = dest;
5508 UChar* targetLimit = dest+100;
5509 const char* source = data;
5510 const char* sourceLimit = data+strlen(data);
5511 const UChar* exp = expected;
5512 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5513 if(U_FAILURE(status)){
5514 log_err("conversion failed: %s \n", u_errorName(status));
5515 }
5516 targetLimit = target;
5517 target = dest;
5518
5519 printUSeq(target, targetLimit-target);
5520
5521 while(target<targetLimit){
5522 if(*exp!=*target){
5523 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5524 }
5525 target++;
5526 exp++;
5527 }
5528 ucnv_close(conv);
5529}