]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/test/cintltst/nucnvtst.c
ICU-66108.tar.gz
[apple/icu.git] / icuSources / test / cintltst / nucnvtst.c
... / ...
CommitLineData
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8/*******************************************************************************
9*
10* File nucnvtst.c
11*
12* Modification History:
13* Name Description
14* Steven R. Loomis 7/8/1999 Adding input buffer test
15********************************************************************************
16*/
17#include <stdio.h>
18#include "cstring.h"
19#include "unicode/uloc.h"
20#include "unicode/ucnv.h"
21#include "unicode/ucnv_err.h"
22#include "unicode/ucnv_cb.h"
23#include "cintltst.h"
24#include "unicode/utypes.h"
25#include "unicode/ustring.h"
26#include "unicode/ucol.h"
27#include "unicode/utf16.h"
28#include "cmemory.h"
29#include "nucnvtst.h"
30
31static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
32static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
33#if !UCONFIG_NO_COLLATION
34static void TestJitterbug981(void);
35#endif
36#if !UCONFIG_NO_LEGACY_CONVERSION
37static void TestJitterbug1293(void);
38#endif
39static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
40static void TestConverterTypesAndStarters(void);
41static void TestAmbiguous(void);
42static void TestSignatureDetection(void);
43static void TestUTF7(void);
44static void TestIMAP(void);
45static void TestUTF8(void);
46static void TestCESU8(void);
47static void TestUTF16(void);
48static void TestUTF16BE(void);
49static void TestUTF16LE(void);
50static void TestUTF32(void);
51static void TestUTF32BE(void);
52static void TestUTF32LE(void);
53static void TestLATIN1(void);
54
55#if !UCONFIG_NO_LEGACY_CONVERSION
56static void TestSBCS(void);
57static void TestDBCS(void);
58static void TestMBCS(void);
59#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
60static void TestICCRunout(void);
61#endif
62
63#ifdef U_ENABLE_GENERIC_ISO_2022
64static void TestISO_2022(void);
65#endif
66
67static void TestISO_2022_JP(void);
68static void TestISO_2022_JP_1(void);
69static void TestISO_2022_JP_2(void);
70static void TestISO_2022_KR(void);
71static void TestISO_2022_KR_1(void);
72static void TestISO_2022_CN(void);
73#if 0
74 /*
75 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
76 */
77static void TestISO_2022_CN_EXT(void);
78#endif
79static void TestJIS(void);
80static void TestHZ(void);
81#endif
82
83static void TestSCSU(void);
84
85#if !UCONFIG_NO_LEGACY_CONVERSION
86static void TestEBCDIC_STATEFUL(void);
87static void TestGB18030(void);
88static void TestLMBCS(void);
89static void TestJitterbug255(void);
90static void TestEBCDICUS4XML(void);
91#if 0
92 /*
93 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
94 */
95static void TestJitterbug915(void);
96#endif
97static void TestISCII(void);
98
99static void TestCoverageMBCS(void);
100static void TestJitterbug2346(void);
101static void TestJitterbug2411(void);
102static void TestJB5275(void);
103static void TestJB5275_1(void);
104static void TestJitterbug6175(void);
105
106static void TestIsFixedWidth(void);
107#endif
108
109static void TestInBufSizes(void);
110
111static void TestRoundTrippingAllUTF(void);
112static void TestConv(const uint16_t in[],
113 int len,
114 const char* conv,
115 const char* lang,
116 char byteArr[],
117 int byteArrLen);
118
119/* open a converter, using test data if it begins with '@' */
120static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
121
122
123#define NEW_MAX_BUFFER 999
124
125static int32_t gInBufferSize = NEW_MAX_BUFFER;
126static int32_t gOutBufferSize = NEW_MAX_BUFFER;
127static char gNuConvTestName[1024];
128
129#define nct_min(x,y) ((x<y) ? x : y)
130
131static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
132{
133 if(cnv && cnv[0] == '@') {
134 return ucnv_openPackage(loadTestData(err), cnv+1, err);
135 } else {
136 return ucnv_open(cnv, err);
137 }
138}
139
140static void printSeq(const unsigned char* a, int len)
141{
142 int i=0;
143 log_verbose("{");
144 while (i<len)
145 log_verbose("0x%02x ", a[i++]);
146 log_verbose("}\n");
147}
148
149static void printUSeq(const UChar* a, int len)
150{
151 int i=0;
152 log_verbose("{U+");
153 while (i<len) log_verbose("0x%04x ", a[i++]);
154 log_verbose("}\n");
155}
156
157static void printSeqErr(const unsigned char* a, int len)
158{
159 int i=0;
160 fprintf(stderr, "{");
161 while (i<len)
162 fprintf(stderr, "0x%02x ", a[i++]);
163 fprintf(stderr, "}\n");
164}
165
166static void printUSeqErr(const UChar* a, int len)
167{
168 int i=0;
169 fprintf(stderr, "{U+");
170 while (i<len)
171 fprintf(stderr, "0x%04x ", a[i++]);
172 fprintf(stderr,"}\n");
173}
174
175static void
176TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
177{
178 const char* s0;
179 const char* s=(char*)source;
180 const int32_t *r=results;
181 UErrorCode errorCode=U_ZERO_ERROR;
182 UChar32 c;
183
184 while(s<limit) {
185 s0=s;
186 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
187 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
188 break; /* no more significant input */
189 } else if(U_FAILURE(errorCode)) {
190 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
191 break;
192 } else if(
193 /* test the expected number of input bytes only if >=0 */
194 (*r>=0 && (int32_t)(s-s0)!=*r) ||
195 c!=*(r+1)
196 ) {
197 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
198 message, c, (s-s0), *(r+1), *r);
199 break;
200 }
201 r+=2;
202 }
203}
204
205static void
206TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
207{
208 const char* s=(char*)source;
209 UErrorCode errorCode=U_ZERO_ERROR;
210 uint32_t c;
211 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
212 if(errorCode != expected){
213 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
214 }
215 if(c != 0xFFFD && c != 0xffff){
216 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
217 }
218
219}
220
221static void TestInBufSizes(void)
222{
223 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
224#if 1
225 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
226 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
227 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
228 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
229 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
230 TestNewConvertWithBufferSizes(1,1);
231 TestNewConvertWithBufferSizes(2,3);
232 TestNewConvertWithBufferSizes(3,2);
233#endif
234}
235
236static void TestOutBufSizes(void)
237{
238#if 1
239 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
240 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
241 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
242 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
243 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
244 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
245
246#endif
247}
248
249
250void addTestNewConvert(TestNode** root)
251{
252#if !UCONFIG_NO_FILE_IO
253 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
254 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
255#endif
256 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
257 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
258 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
259 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
260 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
261 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
262
263 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
264 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
265 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
266 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
267 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
268 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
269 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
270 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
271
272#if !UCONFIG_NO_LEGACY_CONVERSION
273 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
274#endif
275
276 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
277
278#if !UCONFIG_NO_LEGACY_CONVERSION
279 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
280#if !UCONFIG_NO_FILE_IO
281 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
282 addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout");
283#endif
284 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
285
286#ifdef U_ENABLE_GENERIC_ISO_2022
287 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
288#endif
289
290 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
291 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
292 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
293 addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
294 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
295 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
296 addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
297 /*
298 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
299 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
300 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
301 */
302 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
303#endif
304
305 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
306
307#if !UCONFIG_NO_LEGACY_CONVERSION
308 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
309 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
310 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
311 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
312 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
313 addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275");
314 addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1");
315#if !UCONFIG_NO_COLLATION
316 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
317#endif
318
319 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
320#endif
321
322
323#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
324 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
325#endif
326
327 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
328
329#if !UCONFIG_NO_LEGACY_CONVERSION
330 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
331 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
332 addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
333
334 addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth");
335#endif
336}
337
338
339/* Note that this test already makes use of statics, so it's not really
340 multithread safe.
341 This convenience function lets us make the error messages actually useful.
342*/
343
344static void setNuConvTestName(const char *codepage, const char *direction)
345{
346 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
347 codepage,
348 direction,
349 (int)gInBufferSize,
350 (int)gOutBufferSize);
351}
352
353typedef enum
354{
355 TC_OK = 0, /* test was OK */
356 TC_MISMATCH = 1, /* Match failed - err was printed */
357 TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */
358} ETestConvertResult;
359
360/* Note: This function uses global variables and it will not do offset
361checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
362static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
363 const char *codepage, const int32_t *expectOffsets , UBool useFallback)
364{
365 UErrorCode status = U_ZERO_ERROR;
366 UConverter *conv = 0;
367 char junkout[NEW_MAX_BUFFER]; /* FIX */
368 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
369 char *p;
370 const UChar *src;
371 char *end;
372 char *targ;
373 int32_t *offs;
374 int i;
375 int32_t realBufferSize;
376 char *realBufferEnd;
377 const UChar *realSourceEnd;
378 const UChar *sourceLimit;
379 UBool checkOffsets = TRUE;
380 UBool doFlush;
381
382 for(i=0;i<NEW_MAX_BUFFER;i++)
383 junkout[i] = (char)0xF0;
384 for(i=0;i<NEW_MAX_BUFFER;i++)
385 junokout[i] = 0xFF;
386
387 setNuConvTestName(codepage, "FROM");
388
389 log_verbose("\n========= %s\n", gNuConvTestName);
390
391 conv = my_ucnv_open(codepage, &status);
392
393 if(U_FAILURE(status))
394 {
395 log_data_err("Couldn't open converter %s\n",codepage);
396 return TC_FAIL;
397 }
398 if(useFallback){
399 ucnv_setFallback(conv,useFallback);
400 }
401
402 log_verbose("Converter opened..\n");
403
404 src = source;
405 targ = junkout;
406 offs = junokout;
407
408 realBufferSize = UPRV_LENGTHOF(junkout);
409 realBufferEnd = junkout + realBufferSize;
410 realSourceEnd = source + sourceLen;
411
412 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
413 checkOffsets = FALSE;
414
415 do
416 {
417 end = nct_min(targ + gOutBufferSize, realBufferEnd);
418 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
419
420 doFlush = (UBool)(sourceLimit == realSourceEnd);
421
422 if(targ == realBufferEnd) {
423 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
424 return TC_FAIL;
425 }
426 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
427
428
429 status = U_ZERO_ERROR;
430
431 ucnv_fromUnicode (conv,
432 &targ,
433 end,
434 &src,
435 sourceLimit,
436 checkOffsets ? offs : NULL,
437 doFlush, /* flush if we're at the end of the input data */
438 &status);
439 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
440
441 if(U_FAILURE(status)) {
442 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
443 return TC_FAIL;
444 }
445
446 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
447 sourceLen, targ-junkout);
448
449 if(getTestOption(VERBOSITY_OPTION))
450 {
451 char junk[9999];
452 char offset_str[9999];
453 char *ptr;
454
455 junk[0] = 0;
456 offset_str[0] = 0;
457 for(ptr = junkout;ptr<targ;ptr++) {
458 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
459 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
460 }
461
462 log_verbose(junk);
463 printSeq((const uint8_t *)expect, expectLen);
464 if ( checkOffsets ) {
465 log_verbose("\nOffsets:");
466 log_verbose(offset_str);
467 }
468 log_verbose("\n");
469 }
470 ucnv_close(conv);
471
472 if(expectLen != targ-junkout) {
473 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
474 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
475 fprintf(stderr, "Got:\n");
476 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
477 fprintf(stderr, "Expected:\n");
478 printSeqErr((const unsigned char*)expect, expectLen);
479 return TC_MISMATCH;
480 }
481
482 if (checkOffsets && (expectOffsets != 0) ) {
483 log_verbose("comparing %d offsets..\n", targ-junkout);
484 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
485 log_err("did not get the expected offsets. %s\n", gNuConvTestName);
486 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
487 log_err("\n");
488 log_err("Got : ");
489 for(p=junkout;p<targ;p++) {
490 log_err("%d,", junokout[p-junkout]);
491 }
492 log_err("\n");
493 log_err("Expected: ");
494 for(i=0; i<(targ-junkout); i++) {
495 log_err("%d,", expectOffsets[i]);
496 }
497 log_err("\n");
498 }
499 }
500
501 log_verbose("comparing..\n");
502 if(!memcmp(junkout, expect, expectLen)) {
503 log_verbose("Matches!\n");
504 return TC_OK;
505 } else {
506 log_err("String does not match u->%s\n", gNuConvTestName);
507 printUSeqErr(source, sourceLen);
508 fprintf(stderr, "Got:\n");
509 printSeqErr((const unsigned char *)junkout, expectLen);
510 fprintf(stderr, "Expected:\n");
511 printSeqErr((const unsigned char *)expect, expectLen);
512
513 return TC_MISMATCH;
514 }
515}
516
517/* Note: This function uses global variables and it will not do offset
518checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
519static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
520 const char *codepage, const int32_t *expectOffsets, UBool useFallback)
521{
522 UErrorCode status = U_ZERO_ERROR;
523 UConverter *conv = 0;
524 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
525 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
526 const char *src;
527 const char *realSourceEnd;
528 const char *srcLimit;
529 UChar *p;
530 UChar *targ;
531 UChar *end;
532 int32_t *offs;
533 int i;
534 UBool checkOffsets = TRUE;
535
536 int32_t realBufferSize;
537 UChar *realBufferEnd;
538
539
540 for(i=0;i<NEW_MAX_BUFFER;i++)
541 junkout[i] = 0xFFFE;
542
543 for(i=0;i<NEW_MAX_BUFFER;i++)
544 junokout[i] = -1;
545
546 setNuConvTestName(codepage, "TO");
547
548 log_verbose("\n========= %s\n", gNuConvTestName);
549
550 conv = my_ucnv_open(codepage, &status);
551
552 if(U_FAILURE(status))
553 {
554 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
555 return TC_FAIL;
556 }
557 if(useFallback){
558 ucnv_setFallback(conv,useFallback);
559 }
560 log_verbose("Converter opened..\n");
561
562 src = (const char *)source;
563 targ = junkout;
564 offs = junokout;
565
566 realBufferSize = UPRV_LENGTHOF(junkout);
567 realBufferEnd = junkout + realBufferSize;
568 realSourceEnd = src + sourcelen;
569
570 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
571 checkOffsets = FALSE;
572
573 do
574 {
575 end = nct_min( targ + gOutBufferSize, realBufferEnd);
576 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
577
578 if(targ == realBufferEnd)
579 {
580 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
581 return TC_FAIL;
582 }
583 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
584
585 /* oldTarg = targ; */
586
587 status = U_ZERO_ERROR;
588
589 ucnv_toUnicode (conv,
590 &targ,
591 end,
592 &src,
593 srcLimit,
594 checkOffsets ? offs : NULL,
595 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
596 &status);
597
598 /* offs += (targ-oldTarg); */
599
600 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
601
602 if(U_FAILURE(status))
603 {
604 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
605 return TC_FAIL;
606 }
607
608 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
609 sourcelen, targ-junkout);
610 if(getTestOption(VERBOSITY_OPTION))
611 {
612 char junk[9999];
613 char offset_str[9999];
614 UChar *ptr;
615
616 junk[0] = 0;
617 offset_str[0] = 0;
618
619 for(ptr = junkout;ptr<targ;ptr++)
620 {
621 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
622 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
623 }
624
625 log_verbose(junk);
626 printUSeq(expect, expectlen);
627 if ( checkOffsets )
628 {
629 log_verbose("\nOffsets:");
630 log_verbose(offset_str);
631 }
632 log_verbose("\n");
633 }
634 ucnv_close(conv);
635
636 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
637
638 if (checkOffsets && (expectOffsets != 0))
639 {
640 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
641 log_err("did not get the expected offsets. %s\n",gNuConvTestName);
642 log_err("Got: ");
643 for(p=junkout;p<targ;p++) {
644 log_err("%d,", junokout[p-junkout]);
645 }
646 log_err("\n");
647 log_err("Expected: ");
648 for(i=0; i<(targ-junkout); i++) {
649 log_err("%d,", expectOffsets[i]);
650 }
651 log_err("\n");
652 log_err("output: ");
653 for(i=0; i<(targ-junkout); i++) {
654 log_err("%X,", junkout[i]);
655 }
656 log_err("\n");
657 log_err("input: ");
658 for(i=0; i<(src-(const char *)source); i++) {
659 log_err("%X,", (unsigned char)source[i]);
660 }
661 log_err("\n");
662 }
663 }
664
665 if(!memcmp(junkout, expect, expectlen*2))
666 {
667 log_verbose("Matches!\n");
668 return TC_OK;
669 }
670 else
671 {
672 log_err("String does not match. %s\n", gNuConvTestName);
673 log_verbose("String does not match. %s\n", gNuConvTestName);
674 printf("\nGot:");
675 printUSeqErr(junkout, expectlen);
676 printf("\nExpected:");
677 printUSeqErr(expect, expectlen);
678 return TC_MISMATCH;
679 }
680}
681
682
683static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
684{
685/** test chars #1 */
686 /* 1 2 3 1Han 2Han 3Han . */
687 static const UChar sampleText[] =
688 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
689 static const UChar sampleTextRoundTripUnmappable[] =
690 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
691
692
693 static const uint8_t expectedUTF8[] =
694 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
695 static const int32_t toUTF8Offs[] =
696 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
697 static const int32_t fmUTF8Offs[] =
698 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
699
700#ifdef U_ENABLE_GENERIC_ISO_2022
701 /* Same as UTF8, but with ^[%B preceeding */
702 static const const uint8_t expectedISO2022[] =
703 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
704 static const int32_t toISO2022Offs[] =
705 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
706 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
707 static const int32_t fmISO2022Offs[] =
708 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
709#endif
710
711 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
712 static const uint8_t expectedIBM930[] =
713 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
714 static const int32_t toIBM930Offs[] =
715 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
716 static const int32_t fmIBM930Offs[] =
717 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
718
719 /* 1 2 3 0 h1 h2 h3 . MBCS*/
720 static const uint8_t expectedIBM943[] =
721 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
722 static const int32_t toIBM943Offs [] =
723 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
724 static const int32_t fmIBM943Offs[] =
725 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
726
727 /* 1 2 3 0 h1 h2 h3 . DBCS*/
728 static const uint8_t expectedIBM9027[] =
729 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
730 static const int32_t toIBM9027Offs [] =
731 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
732
733 /* 1 2 3 0 <?> <?> <?> . SBCS*/
734 static const uint8_t expectedIBM920[] =
735 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
736 static const int32_t toIBM920Offs [] =
737 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
738
739 /* 1 2 3 0 <?> <?> <?> . SBCS*/
740 static const uint8_t expectedISO88593[] =
741 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
742 static const int32_t toISO88593Offs[] =
743 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
744
745 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
746 static const uint8_t expectedLATIN1[] =
747 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
748 static const int32_t toLATIN1Offs[] =
749 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
750
751
752 /* etc */
753 static const uint8_t expectedUTF16BE[] =
754 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
755 static const int32_t toUTF16BEOffs[]=
756 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
757 static const int32_t fmUTF16BEOffs[] =
758 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
759
760 static const uint8_t expectedUTF16LE[] =
761 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
762 static const int32_t toUTF16LEOffs[]=
763 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
764 static const int32_t fmUTF16LEOffs[] =
765 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
766
767 static const uint8_t expectedUTF32BE[] =
768 { 0x00, 0x00, 0x00, 0x31,
769 0x00, 0x00, 0x00, 0x32,
770 0x00, 0x00, 0x00, 0x33,
771 0x00, 0x00, 0x00, 0x00,
772 0x00, 0x00, 0x4e, 0x00,
773 0x00, 0x00, 0x4e, 0x8c,
774 0x00, 0x00, 0x4e, 0x09,
775 0x00, 0x00, 0x00, 0x2e,
776 0x00, 0x02, 0x00, 0x21 };
777 static const int32_t toUTF32BEOffs[]=
778 { 0x00, 0x00, 0x00, 0x00,
779 0x01, 0x01, 0x01, 0x01,
780 0x02, 0x02, 0x02, 0x02,
781 0x03, 0x03, 0x03, 0x03,
782 0x04, 0x04, 0x04, 0x04,
783 0x05, 0x05, 0x05, 0x05,
784 0x06, 0x06, 0x06, 0x06,
785 0x07, 0x07, 0x07, 0x07,
786 0x08, 0x08, 0x08, 0x08,
787 0x08, 0x08, 0x08, 0x08 };
788 static const int32_t fmUTF32BEOffs[] =
789 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
790
791 static const uint8_t expectedUTF32LE[] =
792 { 0x31, 0x00, 0x00, 0x00,
793 0x32, 0x00, 0x00, 0x00,
794 0x33, 0x00, 0x00, 0x00,
795 0x00, 0x00, 0x00, 0x00,
796 0x00, 0x4e, 0x00, 0x00,
797 0x8c, 0x4e, 0x00, 0x00,
798 0x09, 0x4e, 0x00, 0x00,
799 0x2e, 0x00, 0x00, 0x00,
800 0x21, 0x00, 0x02, 0x00 };
801 static const int32_t toUTF32LEOffs[]=
802 { 0x00, 0x00, 0x00, 0x00,
803 0x01, 0x01, 0x01, 0x01,
804 0x02, 0x02, 0x02, 0x02,
805 0x03, 0x03, 0x03, 0x03,
806 0x04, 0x04, 0x04, 0x04,
807 0x05, 0x05, 0x05, 0x05,
808 0x06, 0x06, 0x06, 0x06,
809 0x07, 0x07, 0x07, 0x07,
810 0x08, 0x08, 0x08, 0x08,
811 0x08, 0x08, 0x08, 0x08 };
812 static const int32_t fmUTF32LEOffs[] =
813 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
814
815
816
817
818/** Test chars #2 **/
819
820 /* Sahha [health], slashed h's */
821 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
822 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
823
824 /* LMBCS */
825 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
826 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
827 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
828 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
829 /*********************************** START OF CODE finally *************/
830
831 gInBufferSize = insize;
832 gOutBufferSize = outsize;
833
834 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
835
836
837 /*UTF-8*/
838 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
839 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
840
841 log_verbose("Test surrogate behaviour for UTF8\n");
842 {
843 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
844 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
845 0xf0, 0x90, 0x90, 0x81,
846 0xef, 0xbf, 0xbd
847 };
848 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
849 testConvertFromU(testinput, UPRV_LENGTHOF(testinput),
850 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
851
852
853 }
854
855#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
856 /*ISO-2022*/
857 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
858 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
859#endif
860
861 /*UTF16 LE*/
862 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
863 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
864 /*UTF16 BE*/
865 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
866 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
867 /*UTF32 LE*/
868 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
869 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
870 /*UTF32 BE*/
871 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
872 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
873
874 /*LATIN_1*/
875 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
876 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
877
878#if !UCONFIG_NO_LEGACY_CONVERSION
879 /*EBCDIC_STATEFUL*/
880 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
881 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
882
883 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
884 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
885
886 /*MBCS*/
887
888 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
889 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
890 /*DBCS*/
891 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
892 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
893 /*SBCS*/
894 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
895 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
896 /*SBCS*/
897 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
898 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
899#endif
900
901
902/****/
903
904 /*UTF-8*/
905 testConvertToU(expectedUTF8, sizeof(expectedUTF8),
906 sampleText, UPRV_LENGTHOF(sampleText), "utf8", fmUTF8Offs,FALSE);
907#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
908 /*ISO-2022*/
909 testConvertToU(expectedISO2022, sizeof(expectedISO2022),
910 sampleText, UPRV_LENGTHOF(sampleText), "ISO_2022", fmISO2022Offs,FALSE);
911#endif
912
913 /*UTF16 LE*/
914 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
915 sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE);
916 /*UTF16 BE*/
917 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
918 sampleText, UPRV_LENGTHOF(sampleText), "utf-16be", fmUTF16BEOffs,FALSE);
919 /*UTF32 LE*/
920 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
921 sampleText, UPRV_LENGTHOF(sampleText), "utf-32le", fmUTF32LEOffs,FALSE);
922 /*UTF32 BE*/
923 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
924 sampleText, UPRV_LENGTHOF(sampleText), "utf-32be", fmUTF32BEOffs,FALSE);
925
926#if !UCONFIG_NO_LEGACY_CONVERSION
927 /*EBCDIC_STATEFUL*/
928 testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable,
929 UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-930", fmIBM930Offs,FALSE);
930 /*MBCS*/
931 testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable,
932 UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-943", fmIBM943Offs,FALSE);
933#endif
934
935 /* Try it again to make sure it still works */
936 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
937 sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE);
938
939#if !UCONFIG_NO_LEGACY_CONVERSION
940 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
941 malteseUChars, UPRV_LENGTHOF(malteseUChars), "latin3", NULL,FALSE);
942
943 testConvertFromU(malteseUChars, UPRV_LENGTHOF(malteseUChars),
944 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
945
946 /*LMBCS*/
947 testConvertFromU(LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars),
948 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
949 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
950 LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars), "LMBCS-1", fmLMBCSOffs,FALSE);
951#endif
952
953 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
954 {
955 /* encode directly set D and set O */
956 static const uint8_t utf7[] = {
957 /*
958 Hi Mom -+Jjo--!
959 A+ImIDkQ.
960 +-
961 +ZeVnLIqe-
962 */
963 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
964 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
965 0x2b, 0x2d,
966 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
967 };
968 static const UChar unicode[] = {
969 /*
970 Hi Mom -<WHITE SMILING FACE>-!
971 A<NOT IDENTICAL TO><ALPHA>.
972 +
973 [Japanese word "nihongo"]
974 */
975 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
976 0x41, 0x2262, 0x0391, 0x2e,
977 0x2b,
978 0x65e5, 0x672c, 0x8a9e
979 };
980 static const int32_t toUnicodeOffsets[] = {
981 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
982 15, 17, 19, 23,
983 24,
984 27, 29, 32
985 };
986 static const int32_t fromUnicodeOffsets[] = {
987 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
988 11, 12, 12, 12, 13, 13, 13, 13, 14,
989 15, 15,
990 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
991 };
992
993 /* same but escaping set O (the exclamation mark) */
994 static const uint8_t utf7Restricted[] = {
995 /*
996 Hi Mom -+Jjo--+ACE-
997 A+ImIDkQ.
998 +-
999 +ZeVnLIqe-
1000 */
1001 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
1002 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
1003 0x2b, 0x2d,
1004 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
1005 };
1006 static const int32_t toUnicodeOffsetsR[] = {
1007 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1008 19, 21, 23, 27,
1009 28,
1010 31, 33, 36
1011 };
1012 static const int32_t fromUnicodeOffsetsR[] = {
1013 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1014 11, 12, 12, 12, 13, 13, 13, 13, 14,
1015 15, 15,
1016 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
1017 };
1018
1019 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
1020
1021 testConvertToU(utf7, sizeof(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7", toUnicodeOffsets,FALSE);
1022
1023 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
1024
1025 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, UPRV_LENGTHOF(unicode), "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
1026 }
1027
1028 /*
1029 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1030 * modified according to RFC 2060,
1031 * and supplemented with the one example in RFC 2060 itself.
1032 */
1033 {
1034 static const uint8_t imap[] = {
1035 /* Hi Mom -&Jjo--!
1036 A&ImIDkQ-.
1037 &-
1038 &ZeVnLIqe-
1039 \
1040 ~peter
1041 /mail
1042 /&ZeVnLIqe-
1043 /&U,BTFw-
1044 */
1045 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1046 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1047 0x26, 0x2d,
1048 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1049 0x5c,
1050 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1051 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1052 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1053 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1054 };
1055 static const UChar unicode[] = {
1056 /* Hi Mom -<WHITE SMILING FACE>-!
1057 A<NOT IDENTICAL TO><ALPHA>.
1058 &
1059 [Japanese word "nihongo"]
1060 \
1061 ~peter
1062 /mail
1063 /<65e5, 672c, 8a9e>
1064 /<53f0, 5317>
1065 */
1066 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1067 0x41, 0x2262, 0x0391, 0x2e,
1068 0x26,
1069 0x65e5, 0x672c, 0x8a9e,
1070 0x5c,
1071 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1072 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1073 0x2f, 0x65e5, 0x672c, 0x8a9e,
1074 0x2f, 0x53f0, 0x5317
1075 };
1076 static const int32_t toUnicodeOffsets[] = {
1077 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1078 15, 17, 19, 24,
1079 25,
1080 28, 30, 33,
1081 37,
1082 38, 39, 40, 41, 42, 43,
1083 44, 45, 46, 47, 48,
1084 49, 51, 53, 56,
1085 60, 62, 64
1086 };
1087 static const int32_t fromUnicodeOffsets[] = {
1088 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1089 11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1090 15, 15,
1091 16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1092 19,
1093 20, 21, 22, 23, 24, 25,
1094 26, 27, 28, 29, 30,
1095 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1096 35, 36, 36, 36, 37, 37, 37, 37, 37
1097 };
1098
1099 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
1100
1101 testConvertToU(imap, sizeof(imap), unicode, UPRV_LENGTHOF(unicode), "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
1102 }
1103
1104 /* Test UTF-8 bad data handling*/
1105 {
1106 static const uint8_t utf8[]={
1107 0x61,
1108 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1109 0x00,
1110 0x62,
1111 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1112 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1113 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */
1114 0xdf, 0xbf, /* 7ff */
1115 0xbf, /* truncated tail */
1116 0xf4, 0x90, 0x80, 0x80, /* 110000 */
1117 0x02
1118 };
1119
1120 static const uint16_t utf8Expected[]={
1121 0x0061,
1122 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1123 0x0000,
1124 0x0062,
1125 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1126 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1127 0xdbff, 0xdfff,
1128 0x07ff,
1129 0xfffd,
1130 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1131 0x0002
1132 };
1133
1134 static const int32_t utf8Offsets[]={
1135 0,
1136 1, 2, 3, 4,
1137 5,
1138 6,
1139 7, 8, 9, 10, 11,
1140 12, 13, 14, 15, 16,
1141 17, 17,
1142 21,
1143 23,
1144 24, 25, 26, 27,
1145 28
1146 };
1147 testConvertToU(utf8, sizeof(utf8),
1148 utf8Expected, UPRV_LENGTHOF(utf8Expected), "utf-8", utf8Offsets ,FALSE);
1149
1150 }
1151
1152 /* Test UTF-32BE bad data handling*/
1153 {
1154 static const uint8_t utf32[]={
1155 0x00, 0x00, 0x00, 0x61,
1156 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
1157 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1158 0x00, 0x00, 0x00, 0x62,
1159 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1160 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
1161 0x00, 0x00, 0x01, 0x62,
1162 0x00, 0x00, 0x02, 0x62
1163 };
1164 static const uint16_t utf32Expected[]={
1165 0x0061,
1166 0xfffd, /* 0x110000 out of range */
1167 0xDBFF, /* 0x10FFFF in range */
1168 0xDFFF,
1169 0x0062,
1170 0xfffd, /* 0xffffffff out of range */
1171 0xfffd, /* 0x7fffffff out of range */
1172 0x0162,
1173 0x0262
1174 };
1175 static const int32_t utf32Offsets[]={
1176 0, 4, 8, 8, 12, 16, 20, 24, 28
1177 };
1178 static const uint8_t utf32ExpectedBack[]={
1179 0x00, 0x00, 0x00, 0x61,
1180 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */
1181 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1182 0x00, 0x00, 0x00, 0x62,
1183 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */
1184 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */
1185 0x00, 0x00, 0x01, 0x62,
1186 0x00, 0x00, 0x02, 0x62
1187 };
1188 static const int32_t utf32OffsetsBack[]={
1189 0,0,0,0,
1190 1,1,1,1,
1191 2,2,2,2,
1192 4,4,4,4,
1193 5,5,5,5,
1194 6,6,6,6,
1195 7,7,7,7,
1196 8,8,8,8
1197 };
1198
1199 testConvertToU(utf32, sizeof(utf32),
1200 utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32be", utf32Offsets ,FALSE);
1201 testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1202 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE);
1203 }
1204
1205 /* Test UTF-32LE bad data handling*/
1206 {
1207 static const uint8_t utf32[]={
1208 0x61, 0x00, 0x00, 0x00,
1209 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
1210 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1211 0x62, 0x00, 0x00, 0x00,
1212 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1213 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
1214 0x62, 0x01, 0x00, 0x00,
1215 0x62, 0x02, 0x00, 0x00,
1216 };
1217
1218 static const uint16_t utf32Expected[]={
1219 0x0061,
1220 0xfffd, /* 0x110000 out of range */
1221 0xDBFF, /* 0x10FFFF in range */
1222 0xDFFF,
1223 0x0062,
1224 0xfffd, /* 0xffffffff out of range */
1225 0xfffd, /* 0x7fffffff out of range */
1226 0x0162,
1227 0x0262
1228 };
1229 static const int32_t utf32Offsets[]={
1230 0, 4, 8, 8, 12, 16, 20, 24, 28
1231 };
1232 static const uint8_t utf32ExpectedBack[]={
1233 0x61, 0x00, 0x00, 0x00,
1234 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */
1235 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1236 0x62, 0x00, 0x00, 0x00,
1237 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */
1238 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */
1239 0x62, 0x01, 0x00, 0x00,
1240 0x62, 0x02, 0x00, 0x00
1241 };
1242 static const int32_t utf32OffsetsBack[]={
1243 0,0,0,0,
1244 1,1,1,1,
1245 2,2,2,2,
1246 4,4,4,4,
1247 5,5,5,5,
1248 6,6,6,6,
1249 7,7,7,7,
1250 8,8,8,8
1251 };
1252 testConvertToU(utf32, sizeof(utf32),
1253 utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32le", utf32Offsets,FALSE );
1254 testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1255 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE);
1256 }
1257}
1258
1259static void TestCoverageMBCS(){
1260#if 0
1261 UErrorCode status = U_ZERO_ERROR;
1262 const char *directory = loadTestData(&status);
1263 char* tdpath = NULL;
1264 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1265 int len = strlen(directory);
1266 char* index=NULL;
1267
1268 tdpath = (char*) malloc(sizeof(char) * (len * 2));
1269 uprv_strcpy(saveDirectory,u_getDataDirectory());
1270 log_verbose("Retrieved data directory %s \n",saveDirectory);
1271 uprv_strcpy(tdpath,directory);
1272 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1273
1274 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1275 *(index+1)=0;
1276 }
1277 u_setDataDirectory(tdpath);
1278 log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1279#endif
1280
1281 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
1282 which is test file for MBCS conversion with single-byte codepage data.*/
1283 {
1284
1285 /* MBCS with single byte codepage data test1.ucm*/
1286 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1287 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1288 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, };
1289
1290 /*from Unicode*/
1291 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1292 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
1293 }
1294
1295 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
1296 which is test file for MBCS conversion with three-byte codepage data.*/
1297 {
1298
1299 /* MBCS with three byte codepage data test3.ucm*/
1300 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1301 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,};
1302 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1303
1304 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1305 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1306 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1307
1308 /*from Unicode*/
1309 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1310 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1311
1312 /*to Unicode*/
1313 testConvertToU(test3input, sizeof(test3input),
1314 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test3", fromtest3Offs ,FALSE);
1315
1316 }
1317
1318 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
1319 which is test file for MBCS conversion with four-byte codepage data.*/
1320 {
1321
1322 /* MBCS with three byte codepage data test4.ucm*/
1323 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1324 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,};
1325 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1326
1327 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1328 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1329 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1330
1331 /*from Unicode*/
1332 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1333 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1334
1335 /*to Unicode*/
1336 testConvertToU(test4input, sizeof(test4input),
1337 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test4", fromtest4Offs,FALSE );
1338
1339 }
1340#if 0
1341 free(tdpath);
1342 /* restore the original data directory */
1343 log_verbose("Setting the data directory to %s \n", saveDirectory);
1344 u_setDataDirectory(saveDirectory);
1345 free(saveDirectory);
1346#endif
1347
1348}
1349
1350static void TestConverterType(const char *convName, UConverterType convType) {
1351 UConverter* myConverter;
1352 UErrorCode err = U_ZERO_ERROR;
1353
1354 myConverter = my_ucnv_open(convName, &err);
1355
1356 if (U_FAILURE(err)) {
1357 log_data_err("Failed to create an %s converter\n", convName);
1358 return;
1359 }
1360 else
1361 {
1362 if (ucnv_getType(myConverter)!=convType) {
1363 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1364 convName, convType);
1365 }
1366 else {
1367 log_verbose("ucnv_getType %s ok\n", convName);
1368 }
1369 }
1370 ucnv_close(myConverter);
1371}
1372
1373static void TestConverterTypesAndStarters()
1374{
1375#if !UCONFIG_NO_LEGACY_CONVERSION
1376 UConverter* myConverter;
1377 UErrorCode err = U_ZERO_ERROR;
1378 UBool mystarters[256];
1379
1380/* const UBool expectedKSCstarters[256] = {
1381 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1382 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1383 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1384 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1385 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1386 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1387 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1388 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1389 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1390 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1391 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1392 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1393 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1394 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1395 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1396 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1397 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1398 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1399 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1400 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1401 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1402 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1403 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1404 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1405 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1406 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1407
1408
1409 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types.");
1410
1411 myConverter = ucnv_open("ksc", &err);
1412 if (U_FAILURE(err)) {
1413 log_data_err("Failed to create an ibm-ksc converter\n");
1414 return;
1415 }
1416 else
1417 {
1418 if (ucnv_getType(myConverter)!=UCNV_MBCS)
1419 log_err("ucnv_getType Failed for ibm-949\n");
1420 else
1421 log_verbose("ucnv_getType ibm-949 ok\n");
1422
1423 if(myConverter!=NULL)
1424 ucnv_getStarters(myConverter, mystarters, &err);
1425
1426 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1427 log_err("Failed ucnv_getStarters for ksc\n");
1428 else
1429 log_verbose("ucnv_getStarters ok\n");*/
1430
1431 }
1432 ucnv_close(myConverter);
1433
1434 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1435 TestConverterType("ibm-878", UCNV_SBCS);
1436#endif
1437
1438 TestConverterType("iso-8859-1", UCNV_LATIN_1);
1439
1440 TestConverterType("ibm-1208", UCNV_UTF8);
1441
1442 TestConverterType("utf-8", UCNV_UTF8);
1443 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1444 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1445 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1446 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1447
1448#if !UCONFIG_NO_LEGACY_CONVERSION
1449
1450#if defined(U_ENABLE_GENERIC_ISO_2022)
1451 TestConverterType("iso-2022", UCNV_ISO_2022);
1452#endif
1453
1454 TestConverterType("hz", UCNV_HZ);
1455#endif
1456
1457 TestConverterType("scsu", UCNV_SCSU);
1458
1459#if !UCONFIG_NO_LEGACY_CONVERSION
1460 TestConverterType("x-iscii-de", UCNV_ISCII);
1461#endif
1462
1463 TestConverterType("ascii", UCNV_US_ASCII);
1464 TestConverterType("utf-7", UCNV_UTF7);
1465 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1466 TestConverterType("bocu-1", UCNV_BOCU1);
1467}
1468
1469static void
1470TestAmbiguousConverter(UConverter *cnv) {
1471 static const char inBytes[3]={ 0x61, 0x5B, 0x5c };
1472 UChar outUnicode[20]={ 0, 0, 0, 0 };
1473
1474 const char *s;
1475 UChar *u;
1476 UErrorCode errorCode;
1477 UBool isAmbiguous;
1478
1479 /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1480 errorCode=U_ZERO_ERROR;
1481 s=inBytes;
1482 u=outUnicode;
1483 ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode);
1484 if(U_FAILURE(errorCode)) {
1485 /* we do not care about general failures in this test; the input may just not be mappable */
1486 return;
1487 }
1488
1489 if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) {
1490 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1491 /* There are some encodings that are partially ASCII based,
1492 like the ISO-7 and GSM series of codepages, which we ignore. */
1493 return;
1494 }
1495
1496 isAmbiguous=ucnv_isAmbiguous(cnv);
1497
1498 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1499 if((outUnicode[2]!=0x5c)!=isAmbiguous) {
1500 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1501 ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous);
1502 return;
1503 }
1504
1505 if(outUnicode[2]!=0x5c) {
1506 /* needs fixup, fix it */
1507 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1508 if(outUnicode[2]!=0x5c) {
1509 /* the fix failed */
1510 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1511 return;
1512 }
1513 }
1514}
1515
1516static void TestAmbiguous()
1517{
1518 UErrorCode status = U_ZERO_ERROR;
1519 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1520 static const char target[] = {
1521 /* "\\usr\\local\\share\\data\\icutest.txt" */
1522 0x5c, 0x75, 0x73, 0x72,
1523 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1524 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1525 0x5c, 0x64, 0x61, 0x74, 0x61,
1526 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1527 0
1528 };
1529 UChar asciiResult[200], sjisResult[200];
1530 int32_t /*asciiLength = 0,*/ sjisLength = 0, i;
1531 const char *name;
1532
1533 /* enumerate all converters */
1534 status=U_ZERO_ERROR;
1535 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1536 cnv=ucnv_open(name, &status);
1537 if(U_SUCCESS(status)) {
1538 TestAmbiguousConverter(cnv);
1539 ucnv_close(cnv);
1540 } else {
1541 log_err("error: unable to open available converter \"%s\"\n", name);
1542 status=U_ZERO_ERROR;
1543 }
1544 }
1545
1546#if !UCONFIG_NO_LEGACY_CONVERSION
1547 sjis_cnv = ucnv_open("ibm-943", &status);
1548 if (U_FAILURE(status))
1549 {
1550 log_data_err("Failed to create a SJIS converter\n");
1551 return;
1552 }
1553 ascii_cnv = ucnv_open("LATIN-1", &status);
1554 if (U_FAILURE(status))
1555 {
1556 log_data_err("Failed to create a LATIN-1 converter\n");
1557 ucnv_close(sjis_cnv);
1558 return;
1559 }
1560 /* convert target from SJIS to Unicode */
1561 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, UPRV_LENGTHOF(sjisResult), target, (int32_t)strlen(target), &status);
1562 if (U_FAILURE(status))
1563 {
1564 log_err("Failed to convert the SJIS string.\n");
1565 ucnv_close(sjis_cnv);
1566 ucnv_close(ascii_cnv);
1567 return;
1568 }
1569 /* convert target from Latin-1 to Unicode */
1570 /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, UPRV_LENGTHOF(asciiResult), target, (int32_t)strlen(target), &status);
1571 if (U_FAILURE(status))
1572 {
1573 log_err("Failed to convert the Latin-1 string.\n");
1574 ucnv_close(sjis_cnv);
1575 ucnv_close(ascii_cnv);
1576 return;
1577 }
1578 if (!ucnv_isAmbiguous(sjis_cnv))
1579 {
1580 log_err("SJIS converter should contain ambiguous character mappings.\n");
1581 ucnv_close(sjis_cnv);
1582 ucnv_close(ascii_cnv);
1583 return;
1584 }
1585 if (u_strcmp(sjisResult, asciiResult) == 0)
1586 {
1587 log_err("File separators for SJIS don't need to be fixed.\n");
1588 }
1589 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1590 if (u_strcmp(sjisResult, asciiResult) != 0)
1591 {
1592 log_err("Fixing file separator for SJIS failed.\n");
1593 }
1594 ucnv_close(sjis_cnv);
1595 ucnv_close(ascii_cnv);
1596#endif
1597}
1598
1599static void
1600TestSignatureDetection(){
1601 /* with null terminated strings */
1602 {
1603 static const char* data[] = {
1604 "\xFE\xFF\x00\x00", /* UTF-16BE */
1605 "\xFF\xFE\x00\x00", /* UTF-16LE */
1606 "\xEF\xBB\xBF\x00", /* UTF-8 */
1607 "\x0E\xFE\xFF\x00", /* SCSU */
1608
1609 "\xFE\xFF", /* UTF-16BE */
1610 "\xFF\xFE", /* UTF-16LE */
1611 "\xEF\xBB\xBF", /* UTF-8 */
1612 "\x0E\xFE\xFF", /* SCSU */
1613
1614 "\xFE\xFF\x41\x42", /* UTF-16BE */
1615 "\xFF\xFE\x41\x41", /* UTF-16LE */
1616 "\xEF\xBB\xBF\x41", /* UTF-8 */
1617 "\x0E\xFE\xFF\x41", /* SCSU */
1618
1619 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */
1620 "\x2B\x2F\x76\x38\x41", /* UTF-7 */
1621 "\x2B\x2F\x76\x39\x41", /* UTF-7 */
1622 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */
1623 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */
1624
1625 "\xDD\x73\x66\x73" /* UTF-EBCDIC */
1626 };
1627 static const char* expected[] = {
1628 "UTF-16BE",
1629 "UTF-16LE",
1630 "UTF-8",
1631 "SCSU",
1632
1633 "UTF-16BE",
1634 "UTF-16LE",
1635 "UTF-8",
1636 "SCSU",
1637
1638 "UTF-16BE",
1639 "UTF-16LE",
1640 "UTF-8",
1641 "SCSU",
1642
1643 "UTF-7",
1644 "UTF-7",
1645 "UTF-7",
1646 "UTF-7",
1647 "UTF-7",
1648 "UTF-EBCDIC"
1649 };
1650 static const int32_t expectedLength[] ={
1651 2,
1652 2,
1653 3,
1654 3,
1655
1656 2,
1657 2,
1658 3,
1659 3,
1660
1661 2,
1662 2,
1663 3,
1664 3,
1665
1666 5,
1667 4,
1668 4,
1669 4,
1670 4,
1671 4
1672 };
1673 int i=0;
1674 UErrorCode err;
1675 int32_t signatureLength = -1;
1676 const char* source = NULL;
1677 const char* enc = NULL;
1678 for( ; i<UPRV_LENGTHOF(data); i++){
1679 err = U_ZERO_ERROR;
1680 source = data[i];
1681 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1682 if(U_FAILURE(err)){
1683 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1684 continue;
1685 }
1686 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1687 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1688 continue;
1689 }
1690 if(signatureLength != expectedLength[i]){
1691 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1692 }
1693 }
1694 }
1695 {
1696 static const char* data[] = {
1697 "\xFE\xFF\x00", /* UTF-16BE */
1698 "\xFF\xFE\x00", /* UTF-16LE */
1699 "\xEF\xBB\xBF\x00", /* UTF-8 */
1700 "\x0E\xFE\xFF\x00", /* SCSU */
1701 "\x00\x00\xFE\xFF", /* UTF-32BE */
1702 "\xFF\xFE\x00\x00", /* UTF-32LE */
1703 "\xFE\xFF", /* UTF-16BE */
1704 "\xFF\xFE", /* UTF-16LE */
1705 "\xEF\xBB\xBF", /* UTF-8 */
1706 "\x0E\xFE\xFF", /* SCSU */
1707 "\x00\x00\xFE\xFF", /* UTF-32BE */
1708 "\xFF\xFE\x00\x00", /* UTF-32LE */
1709 "\xFE\xFF\x41\x42", /* UTF-16BE */
1710 "\xFF\xFE\x41\x41", /* UTF-16LE */
1711 "\xEF\xBB\xBF\x41", /* UTF-8 */
1712 "\x0E\xFE\xFF\x41", /* SCSU */
1713 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1714 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1715 "\xFB\xEE\x28", /* BOCU-1 */
1716 "\xFF\x41\x42" /* NULL */
1717 };
1718 static const int len[] = {
1719 3,
1720 3,
1721 4,
1722 4,
1723 4,
1724 4,
1725 2,
1726 2,
1727 3,
1728 3,
1729 4,
1730 4,
1731 4,
1732 4,
1733 4,
1734 4,
1735 5,
1736 5,
1737 3,
1738 3
1739 };
1740
1741 static const char* expected[] = {
1742 "UTF-16BE",
1743 "UTF-16LE",
1744 "UTF-8",
1745 "SCSU",
1746 "UTF-32BE",
1747 "UTF-32LE",
1748 "UTF-16BE",
1749 "UTF-16LE",
1750 "UTF-8",
1751 "SCSU",
1752 "UTF-32BE",
1753 "UTF-32LE",
1754 "UTF-16BE",
1755 "UTF-16LE",
1756 "UTF-8",
1757 "SCSU",
1758 "UTF-32BE",
1759 "UTF-32LE",
1760 "BOCU-1",
1761 NULL
1762 };
1763 static const int32_t expectedLength[] ={
1764 2,
1765 2,
1766 3,
1767 3,
1768 4,
1769 4,
1770 2,
1771 2,
1772 3,
1773 3,
1774 4,
1775 4,
1776 2,
1777 2,
1778 3,
1779 3,
1780 4,
1781 4,
1782 3,
1783 0
1784 };
1785 int i=0;
1786 UErrorCode err;
1787 int32_t signatureLength = -1;
1788 int32_t sourceLength=-1;
1789 const char* source = NULL;
1790 const char* enc = NULL;
1791 for( ; i<UPRV_LENGTHOF(data); i++){
1792 err = U_ZERO_ERROR;
1793 source = data[i];
1794 sourceLength = len[i];
1795 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1796 if(U_FAILURE(err)){
1797 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1798 continue;
1799 }
1800 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1801 if(expected[i] !=NULL){
1802 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1803 continue;
1804 }
1805 }
1806 if(signatureLength != expectedLength[i]){
1807 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1808 }
1809 }
1810 }
1811}
1812
1813static void TestUTF7() {
1814 /* test input */
1815 static const uint8_t in[]={
1816 /* H - +Jjo- - ! +- +2AHcAQ */
1817 0x48,
1818 0x2d,
1819 0x2b, 0x4a, 0x6a, 0x6f,
1820 0x2d, 0x2d,
1821 0x21,
1822 0x2b, 0x2d,
1823 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1824 };
1825
1826 /* expected test results */
1827 static const int32_t results[]={
1828 /* number of bytes read, code point */
1829 1, 0x48,
1830 1, 0x2d,
1831 4, 0x263a, /* <WHITE SMILING FACE> */
1832 2, 0x2d,
1833 1, 0x21,
1834 2, 0x2b,
1835 7, 0x10401
1836 };
1837
1838 const char *cnvName;
1839 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1840 UErrorCode errorCode=U_ZERO_ERROR;
1841 UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1842 if(U_FAILURE(errorCode)) {
1843 log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode));
1844 return;
1845 }
1846 TestNextUChar(cnv, source, limit, results, "UTF-7");
1847 /* Test the condition when source >= sourceLimit */
1848 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1849 cnvName = ucnv_getName(cnv, &errorCode);
1850 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1851 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1852 }
1853 ucnv_close(cnv);
1854}
1855
1856static void TestIMAP() {
1857 /* test input */
1858 static const uint8_t in[]={
1859 /* H - &Jjo- - ! &- &2AHcAQ- \ */
1860 0x48,
1861 0x2d,
1862 0x26, 0x4a, 0x6a, 0x6f,
1863 0x2d, 0x2d,
1864 0x21,
1865 0x26, 0x2d,
1866 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1867 };
1868
1869 /* expected test results */
1870 static const int32_t results[]={
1871 /* number of bytes read, code point */
1872 1, 0x48,
1873 1, 0x2d,
1874 4, 0x263a, /* <WHITE SMILING FACE> */
1875 2, 0x2d,
1876 1, 0x21,
1877 2, 0x26,
1878 7, 0x10401
1879 };
1880
1881 const char *cnvName;
1882 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1883 UErrorCode errorCode=U_ZERO_ERROR;
1884 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1885 if(U_FAILURE(errorCode)) {
1886 log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode));
1887 return;
1888 }
1889 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1890 /* Test the condition when source >= sourceLimit */
1891 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1892 cnvName = ucnv_getName(cnv, &errorCode);
1893 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1894 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1895 }
1896 ucnv_close(cnv);
1897}
1898
1899static void TestUTF8() {
1900 /* test input */
1901 static const uint8_t in[]={
1902 0x61,
1903 0xc2, 0x80,
1904 0xe0, 0xa0, 0x80,
1905 0xf0, 0x90, 0x80, 0x80,
1906 0xf4, 0x84, 0x8c, 0xa1,
1907 0xf0, 0x90, 0x90, 0x81
1908 };
1909
1910 /* expected test results */
1911 static const int32_t results[]={
1912 /* number of bytes read, code point */
1913 1, 0x61,
1914 2, 0x80,
1915 3, 0x800,
1916 4, 0x10000,
1917 4, 0x104321,
1918 4, 0x10401
1919 };
1920
1921 /* error test input */
1922 static const uint8_t in2[]={
1923 0x61,
1924 0xc0, 0x80, /* illegal non-shortest form */
1925 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1926 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1927 0xc0, 0xc0, /* illegal trail byte */
1928 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1929 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1930 0xfe, /* illegal byte altogether */
1931 0x62
1932 };
1933
1934 /* expected error test results */
1935 static const int32_t results2[]={
1936 /* number of bytes read, code point */
1937 1, 0x61,
1938 22, 0x62
1939 };
1940
1941 UConverterToUCallback cb;
1942 const void *p;
1943
1944 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1945 UErrorCode errorCode=U_ZERO_ERROR;
1946 UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1947 if(U_FAILURE(errorCode)) {
1948 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1949 return;
1950 }
1951 TestNextUChar(cnv, source, limit, results, "UTF-8");
1952 /* Test the condition when source >= sourceLimit */
1953 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1954
1955 /* test error behavior with a skip callback */
1956 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1957 source=(const char *)in2;
1958 limit=(const char *)(in2+sizeof(in2));
1959 TestNextUChar(cnv, source, limit, results2, "UTF-8");
1960
1961 ucnv_close(cnv);
1962}
1963
1964static void TestCESU8() {
1965 /* test input */
1966 static const uint8_t in[]={
1967 0x61,
1968 0xc2, 0x80,
1969 0xe0, 0xa0, 0x80,
1970 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1971 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1972 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1973 0xef, 0xbf, 0xbc
1974 };
1975
1976 /* expected test results */
1977 static const int32_t results[]={
1978 /* number of bytes read, code point */
1979 1, 0x61,
1980 2, 0x80,
1981 3, 0x800,
1982 6, 0x10000,
1983 3, 0xdc01,
1984 -1,0xd802, /* may read 3 or 6 bytes */
1985 -1,0x10ffff,/* may read 0 or 3 bytes */
1986 3, 0xfffc
1987 };
1988
1989 /* error test input */
1990 static const uint8_t in2[]={
1991 0x61,
1992 0xc0, 0x80, /* illegal non-shortest form */
1993 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1994 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1995 0xc0, 0xc0, /* illegal trail byte */
1996 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */
1997 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */
1998 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */
1999 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
2000 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
2001 0xfe, /* illegal byte altogether */
2002 0x62
2003 };
2004
2005 /* expected error test results */
2006 static const int32_t results2[]={
2007 /* number of bytes read, code point */
2008 1, 0x61,
2009 34, 0x62
2010 };
2011
2012 UConverterToUCallback cb;
2013 const void *p;
2014
2015 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
2016 UErrorCode errorCode=U_ZERO_ERROR;
2017 UConverter *cnv=ucnv_open("CESU-8", &errorCode);
2018 if(U_FAILURE(errorCode)) {
2019 log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
2020 return;
2021 }
2022 TestNextUChar(cnv, source, limit, results, "CESU-8");
2023 /* Test the condition when source >= sourceLimit */
2024 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2025
2026 /* test error behavior with a skip callback */
2027 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2028 source=(const char *)in2;
2029 limit=(const char *)(in2+sizeof(in2));
2030 TestNextUChar(cnv, source, limit, results2, "CESU-8");
2031
2032 ucnv_close(cnv);
2033}
2034
2035static void TestUTF16() {
2036 /* test input */
2037 static const uint8_t in1[]={
2038 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2039 };
2040 static const uint8_t in2[]={
2041 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2042 };
2043 static const uint8_t in3[]={
2044 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2045 };
2046
2047 /* expected test results */
2048 static const int32_t results1[]={
2049 /* number of bytes read, code point */
2050 4, 0x4e00,
2051 2, 0xfeff
2052 };
2053 static const int32_t results2[]={
2054 /* number of bytes read, code point */
2055 4, 0x004e,
2056 2, 0xfffe
2057 };
2058 static const int32_t results3[]={
2059 /* number of bytes read, code point */
2060 2, 0xfefe,
2061 2, 0x4e00,
2062 2, 0xfeff,
2063 4, 0x20001
2064 };
2065
2066 const char *source, *limit;
2067
2068 UErrorCode errorCode=U_ZERO_ERROR;
2069 UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2070 if(U_FAILURE(errorCode)) {
2071 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2072 return;
2073 }
2074
2075 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2076 TestNextUChar(cnv, source, limit, results1, "UTF-16");
2077
2078 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2079 ucnv_resetToUnicode(cnv);
2080 TestNextUChar(cnv, source, limit, results2, "UTF-16");
2081
2082 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2083 ucnv_resetToUnicode(cnv);
2084 TestNextUChar(cnv, source, limit, results3, "UTF-16");
2085
2086 /* Test the condition when source >= sourceLimit */
2087 ucnv_resetToUnicode(cnv);
2088 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2089
2090 ucnv_close(cnv);
2091}
2092
2093static void TestUTF16BE() {
2094 /* test input */
2095 static const uint8_t in[]={
2096 0x00, 0x61,
2097 0x00, 0xc0,
2098 0x00, 0x31,
2099 0x00, 0xf4,
2100 0xce, 0xfe,
2101 0xd8, 0x01, 0xdc, 0x01
2102 };
2103
2104 /* expected test results */
2105 static const int32_t results[]={
2106 /* number of bytes read, code point */
2107 2, 0x61,
2108 2, 0xc0,
2109 2, 0x31,
2110 2, 0xf4,
2111 2, 0xcefe,
2112 4, 0x10401
2113 };
2114
2115 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2116 UErrorCode errorCode=U_ZERO_ERROR;
2117 UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2118 if(U_FAILURE(errorCode)) {
2119 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2120 return;
2121 }
2122 TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2123 /* Test the condition when source >= sourceLimit */
2124 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2125 /*Test for the condition where there is an invalid character*/
2126 {
2127 static const uint8_t source2[]={0x61};
2128 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2129 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2130 }
2131#if 0
2132 /*
2133 * Test disabled because currently the UTF-16BE/LE converters are supposed
2134 * to not set errors for unpaired surrogates.
2135 * This may change with
2136 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2137 */
2138
2139 /*Test for the condition where there is a surrogate pair*/
2140 {
2141 const uint8_t source2[]={0xd8, 0x01};
2142 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2143 }
2144#endif
2145 ucnv_close(cnv);
2146}
2147
2148static void
2149TestUTF16LE() {
2150 /* test input */
2151 static const uint8_t in[]={
2152 0x61, 0x00,
2153 0x31, 0x00,
2154 0x4e, 0x2e,
2155 0x4e, 0x00,
2156 0x01, 0xd8, 0x01, 0xdc
2157 };
2158
2159 /* expected test results */
2160 static const int32_t results[]={
2161 /* number of bytes read, code point */
2162 2, 0x61,
2163 2, 0x31,
2164 2, 0x2e4e,
2165 2, 0x4e,
2166 4, 0x10401
2167 };
2168
2169 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2170 UErrorCode errorCode=U_ZERO_ERROR;
2171 UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2172 if(U_FAILURE(errorCode)) {
2173 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2174 return;
2175 }
2176 TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2177 /* Test the condition when source >= sourceLimit */
2178 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2179 /*Test for the condition where there is an invalid character*/
2180 {
2181 static const uint8_t source2[]={0x61};
2182 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2183 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2184 }
2185#if 0
2186 /*
2187 * Test disabled because currently the UTF-16BE/LE converters are supposed
2188 * to not set errors for unpaired surrogates.
2189 * This may change with
2190 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2191 */
2192
2193 /*Test for the condition where there is a surrogate character*/
2194 {
2195 static const uint8_t source2[]={0x01, 0xd8};
2196 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2197 }
2198#endif
2199
2200 ucnv_close(cnv);
2201}
2202
2203static void TestUTF32() {
2204 /* test input */
2205 static const uint8_t in1[]={
2206 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff
2207 };
2208 static const uint8_t in2[]={
2209 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00
2210 };
2211 static const uint8_t in3[]={
2212 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01
2213 };
2214
2215 /* expected test results */
2216 static const int32_t results1[]={
2217 /* number of bytes read, code point */
2218 8, 0x100f00,
2219 4, 0xfeff
2220 };
2221 static const int32_t results2[]={
2222 /* number of bytes read, code point */
2223 8, 0x0f1000,
2224 4, 0xfffe
2225 };
2226 static const int32_t results3[]={
2227 /* number of bytes read, code point */
2228 4, 0xfefe,
2229 4, 0x100f00,
2230 4, 0xfffd, /* unmatched surrogate */
2231 4, 0xfffd /* unmatched surrogate */
2232 };
2233
2234 const char *source, *limit;
2235
2236 UErrorCode errorCode=U_ZERO_ERROR;
2237 UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2238 if(U_FAILURE(errorCode)) {
2239 log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2240 return;
2241 }
2242
2243 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2244 TestNextUChar(cnv, source, limit, results1, "UTF-32");
2245
2246 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2247 ucnv_resetToUnicode(cnv);
2248 TestNextUChar(cnv, source, limit, results2, "UTF-32");
2249
2250 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2251 ucnv_resetToUnicode(cnv);
2252 TestNextUChar(cnv, source, limit, results3, "UTF-32");
2253
2254 /* Test the condition when source >= sourceLimit */
2255 ucnv_resetToUnicode(cnv);
2256 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2257
2258 ucnv_close(cnv);
2259}
2260
2261static void
2262TestUTF32BE() {
2263 /* test input */
2264 static const uint8_t in[]={
2265 0x00, 0x00, 0x00, 0x61,
2266 0x00, 0x00, 0x30, 0x61,
2267 0x00, 0x00, 0xdc, 0x00,
2268 0x00, 0x00, 0xd8, 0x00,
2269 0x00, 0x00, 0xdf, 0xff,
2270 0x00, 0x00, 0xff, 0xfe,
2271 0x00, 0x10, 0xab, 0xcd,
2272 0x00, 0x10, 0xff, 0xff
2273 };
2274
2275 /* expected test results */
2276 static const int32_t results[]={
2277 /* number of bytes read, code point */
2278 4, 0x61,
2279 4, 0x3061,
2280 4, 0xfffd,
2281 4, 0xfffd,
2282 4, 0xfffd,
2283 4, 0xfffe,
2284 4, 0x10abcd,
2285 4, 0x10ffff
2286 };
2287
2288 /* error test input */
2289 static const uint8_t in2[]={
2290 0x00, 0x00, 0x00, 0x61,
2291 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
2292 0x00, 0x00, 0x00, 0x62,
2293 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2294 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
2295 0x00, 0x00, 0x01, 0x62,
2296 0x00, 0x00, 0x02, 0x62
2297 };
2298
2299 /* expected error test results */
2300 static const int32_t results2[]={
2301 /* number of bytes read, code point */
2302 4, 0x61,
2303 8, 0x62,
2304 12, 0x162,
2305 4, 0x262
2306 };
2307
2308 UConverterToUCallback cb;
2309 const void *p;
2310
2311 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2312 UErrorCode errorCode=U_ZERO_ERROR;
2313 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2314 if(U_FAILURE(errorCode)) {
2315 log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2316 return;
2317 }
2318 TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2319
2320 /* Test the condition when source >= sourceLimit */
2321 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2322
2323 /* test error behavior with a skip callback */
2324 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2325 source=(const char *)in2;
2326 limit=(const char *)(in2+sizeof(in2));
2327 TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2328
2329 ucnv_close(cnv);
2330}
2331
2332static void
2333TestUTF32LE() {
2334 /* test input */
2335 static const uint8_t in[]={
2336 0x61, 0x00, 0x00, 0x00,
2337 0x61, 0x30, 0x00, 0x00,
2338 0x00, 0xdc, 0x00, 0x00,
2339 0x00, 0xd8, 0x00, 0x00,
2340 0xff, 0xdf, 0x00, 0x00,
2341 0xfe, 0xff, 0x00, 0x00,
2342 0xcd, 0xab, 0x10, 0x00,
2343 0xff, 0xff, 0x10, 0x00
2344 };
2345
2346 /* expected test results */
2347 static const int32_t results[]={
2348 /* number of bytes read, code point */
2349 4, 0x61,
2350 4, 0x3061,
2351 4, 0xfffd,
2352 4, 0xfffd,
2353 4, 0xfffd,
2354 4, 0xfffe,
2355 4, 0x10abcd,
2356 4, 0x10ffff
2357 };
2358
2359 /* error test input */
2360 static const uint8_t in2[]={
2361 0x61, 0x00, 0x00, 0x00,
2362 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
2363 0x62, 0x00, 0x00, 0x00,
2364 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2365 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
2366 0x62, 0x01, 0x00, 0x00,
2367 0x62, 0x02, 0x00, 0x00,
2368 };
2369
2370 /* expected error test results */
2371 static const int32_t results2[]={
2372 /* number of bytes read, code point */
2373 4, 0x61,
2374 8, 0x62,
2375 12, 0x162,
2376 4, 0x262,
2377 };
2378
2379 UConverterToUCallback cb;
2380 const void *p;
2381
2382 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2383 UErrorCode errorCode=U_ZERO_ERROR;
2384 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2385 if(U_FAILURE(errorCode)) {
2386 log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2387 return;
2388 }
2389 TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2390
2391 /* Test the condition when source >= sourceLimit */
2392 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2393
2394 /* test error behavior with a skip callback */
2395 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2396 source=(const char *)in2;
2397 limit=(const char *)(in2+sizeof(in2));
2398 TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2399
2400 ucnv_close(cnv);
2401}
2402
2403static void
2404TestLATIN1() {
2405 /* test input */
2406 static const uint8_t in[]={
2407 0x61,
2408 0x31,
2409 0x32,
2410 0xc0,
2411 0xf0,
2412 0xf4,
2413 };
2414
2415 /* expected test results */
2416 static const int32_t results[]={
2417 /* number of bytes read, code point */
2418 1, 0x61,
2419 1, 0x31,
2420 1, 0x32,
2421 1, 0xc0,
2422 1, 0xf0,
2423 1, 0xf4,
2424 };
2425 static const uint16_t in1[] = {
2426 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2427 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2428 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2429 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2430 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2431 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2432 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2433 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2434 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2435 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2436 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2437 0xcb, 0x82
2438 };
2439 static const uint8_t out1[] = {
2440 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2441 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2442 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2443 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2444 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2445 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2446 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2447 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2448 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2449 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2450 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2451 0xcb, 0x82
2452 };
2453 static const uint16_t in2[]={
2454 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2455 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2456 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2457 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2458 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2459 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2460 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2461 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2462 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2463 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2464 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2465 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2466 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2467 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2468 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2469 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2470 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2471 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2472 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2473 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2474 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2475 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2476 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2477 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2478 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2479 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2480 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2481 0x37, 0x20, 0x2A, 0x2F,
2482 };
2483 static const unsigned char out2[]={
2484 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2485 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2486 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2487 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2488 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2489 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2490 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2491 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2492 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2493 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2494 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2495 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2496 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2497 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2498 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2499 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2500 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2501 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2502 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2503 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2504 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2505 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2506 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2507 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2508 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2509 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2510 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2511 0x37, 0x20, 0x2A, 0x2F,
2512 };
2513 const char *source=(const char *)in;
2514 const char *limit=(const char *)in+sizeof(in);
2515
2516 UErrorCode errorCode=U_ZERO_ERROR;
2517 UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2518 if(U_FAILURE(errorCode)) {
2519 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2520 return;
2521 }
2522 TestNextUChar(cnv, source, limit, results, "LATIN_1");
2523 /* Test the condition when source >= sourceLimit */
2524 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2525 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2526 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2527
2528 ucnv_close(cnv);
2529}
2530
2531static void
2532TestSBCS() {
2533 /* test input */
2534 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2535 /* expected test results */
2536 static const int32_t results[]={
2537 /* number of bytes read, code point */
2538 1, 0x61,
2539 1, 0xbf,
2540 1, 0xc4,
2541 1, 0x2021,
2542 1, 0xf8ff,
2543 1, 0x00d9
2544 };
2545
2546 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2547 UErrorCode errorCode=U_ZERO_ERROR;
2548 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2549 if(U_FAILURE(errorCode)) {
2550 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2551 return;
2552 }
2553 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2554 /* Test the condition when source >= sourceLimit */
2555 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2556 /*Test for Illegal character */ /*
2557 {
2558 static const uint8_t input1[]={ 0xA1 };
2559 const char* illegalsource=(const char*)input1;
2560 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2561 }
2562 */
2563 ucnv_close(cnv);
2564}
2565
2566static void
2567TestDBCS() {
2568 /* test input */
2569 static const uint8_t in[]={
2570 0x44, 0x6a,
2571 0xc4, 0x9c,
2572 0x7a, 0x74,
2573 0x46, 0xab,
2574 0x42, 0x5b,
2575
2576 };
2577
2578 /* expected test results */
2579 static const int32_t results[]={
2580 /* number of bytes read, code point */
2581 2, 0x00a7,
2582 2, 0xe1d2,
2583 2, 0x6962,
2584 2, 0xf842,
2585 2, 0xffe5,
2586 };
2587
2588 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2589 UErrorCode errorCode=U_ZERO_ERROR;
2590
2591 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2592 if(U_FAILURE(errorCode)) {
2593 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2594 return;
2595 }
2596 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2597 /* Test the condition when source >= sourceLimit */
2598 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2599 /*Test for the condition where there is an invalid character*/
2600 {
2601 static const uint8_t source2[]={0x1a, 0x1b};
2602 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2603 }
2604 /*Test for the condition where we have a truncated char*/
2605 {
2606 static const uint8_t source1[]={0xc4};
2607 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2608 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2609 }
2610 ucnv_close(cnv);
2611}
2612
2613static void
2614TestMBCS() {
2615 /* test input */
2616 static const uint8_t in[]={
2617 0x01,
2618 0xa6, 0xa3,
2619 0x00,
2620 0xa6, 0xa1,
2621 0x08,
2622 0xc2, 0x76,
2623 0xc2, 0x78,
2624
2625 };
2626
2627 /* expected test results */
2628 static const int32_t results[]={
2629 /* number of bytes read, code point */
2630 1, 0x0001,
2631 2, 0x250c,
2632 1, 0x0000,
2633 2, 0x2500,
2634 1, 0x0008,
2635 2, 0xd60c,
2636 2, 0xd60e,
2637 };
2638
2639 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2640 UErrorCode errorCode=U_ZERO_ERROR;
2641
2642 UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2643 if(U_FAILURE(errorCode)) {
2644 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2645 return;
2646 }
2647 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2648 /* Test the condition when source >= sourceLimit */
2649 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2650 /*Test for the condition where there is an invalid character*/
2651 {
2652 static const uint8_t source2[]={0xa1, 0x80};
2653 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2654 }
2655 /*Test for the condition where we have a truncated char*/
2656 {
2657 static const uint8_t source1[]={0xc4};
2658 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2659 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2660 }
2661 ucnv_close(cnv);
2662
2663}
2664
2665#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2666static void
2667TestICCRunout() {
2668/* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2669
2670 const char *cnvName = "ibm-1363";
2671 UErrorCode status = U_ZERO_ERROR;
2672 const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2673 /* UChar expectUData[] = { 0x00a1, 0x001a }; */
2674 const char *source = sourceData;
2675 const char *sourceLim = sourceData+sizeof(sourceData);
2676 UChar c1, c2, c3;
2677 UConverter *cnv=ucnv_open(cnvName, &status);
2678 if(U_FAILURE(status)) {
2679 log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status));
2680 return;
2681 }
2682
2683#if 0
2684 {
2685 UChar targetBuf[256];
2686 UChar *target = targetBuf;
2687 UChar *targetLim = target+256;
2688 ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status);
2689
2690 log_info("After convert: target@%d, source@%d, status%s\n",
2691 target-targetBuf, source-sourceData, u_errorName(status));
2692
2693 if(U_FAILURE(status)) {
2694 log_err("Failed to convert: %s\n", u_errorName(status));
2695 } else {
2696
2697 }
2698 }
2699#endif
2700
2701 c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2702 log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status));
2703
2704 c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2705 log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status));
2706
2707 c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2708 log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status));
2709
2710 if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) {
2711 log_verbose("OK\n");
2712 } else {
2713 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2714 }
2715
2716 ucnv_close(cnv);
2717
2718}
2719#endif
2720
2721#ifdef U_ENABLE_GENERIC_ISO_2022
2722
2723static void
2724TestISO_2022() {
2725 /* test input */
2726 static const uint8_t in[]={
2727 0x1b, 0x25, 0x42,
2728 0x31,
2729 0x32,
2730 0x61,
2731 0xc2, 0x80,
2732 0xe0, 0xa0, 0x80,
2733 0xf0, 0x90, 0x80, 0x80
2734 };
2735
2736
2737
2738 /* expected test results */
2739 static const int32_t results[]={
2740 /* number of bytes read, code point */
2741 4, 0x0031, /* 4 bytes including the escape sequence */
2742 1, 0x0032,
2743 1, 0x61,
2744 2, 0x80,
2745 3, 0x800,
2746 4, 0x10000
2747 };
2748
2749 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2750 UErrorCode errorCode=U_ZERO_ERROR;
2751 UConverter *cnv;
2752
2753 cnv=ucnv_open("ISO_2022", &errorCode);
2754 if(U_FAILURE(errorCode)) {
2755 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2756 return;
2757 }
2758 TestNextUChar(cnv, source, limit, results, "ISO_2022");
2759
2760 /* Test the condition when source >= sourceLimit */
2761 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2762 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2763 /*Test for the condition where we have a truncated char*/
2764 {
2765 static const uint8_t source1[]={0xc4};
2766 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2767 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2768 }
2769 /*Test for the condition where there is an invalid character*/
2770 {
2771 static const uint8_t source2[]={0xa1, 0x01};
2772 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2773 }
2774 ucnv_close(cnv);
2775}
2776
2777#endif
2778
2779static void
2780TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2781 const UChar* uSource;
2782 const UChar* uSourceLimit;
2783 const char* cSource;
2784 const char* cSourceLimit;
2785 UChar *uTargetLimit =NULL;
2786 UChar *uTarget;
2787 char *cTarget;
2788 const char *cTargetLimit;
2789 char *cBuf;
2790 UChar *uBuf; /*,*test;*/
2791 int32_t uBufSize = 120;
2792 int len=0;
2793 int i=2;
2794 UErrorCode errorCode=U_ZERO_ERROR;
2795 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2796 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2797 ucnv_reset(cnv);
2798 for(;--i>0; ){
2799 uSource = (UChar*) source;
2800 uSourceLimit=(const UChar*)sourceLimit;
2801 cTarget = cBuf;
2802 uTarget = uBuf;
2803 cSource = cBuf;
2804 cTargetLimit = cBuf;
2805 uTargetLimit = uBuf;
2806
2807 do{
2808
2809 cTargetLimit = cTargetLimit+ i;
2810 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2811 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2812 errorCode=U_ZERO_ERROR;
2813 continue;
2814 }
2815
2816 if(U_FAILURE(errorCode)){
2817 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2818 return;
2819 }
2820
2821 }while (uSource<uSourceLimit);
2822
2823 cSourceLimit =cTarget;
2824 do{
2825 uTargetLimit=uTargetLimit+i;
2826 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2827 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2828 errorCode=U_ZERO_ERROR;
2829 continue;
2830 }
2831 if(U_FAILURE(errorCode)){
2832 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2833 return;
2834 }
2835 }while(cSource<cSourceLimit);
2836
2837 uSource = source;
2838 /*test =uBuf;*/
2839 for(len=0;len<(int)(source - sourceLimit);len++){
2840 if(uBuf[len]!=uSource[len]){
2841 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2842 }
2843 }
2844 }
2845 free(uBuf);
2846 free(cBuf);
2847}
2848/* Test for Jitterbug 778 */
2849static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2850 const UChar* uSource;
2851 const UChar* uSourceLimit;
2852 const char* cSource;
2853 UChar *uTargetLimit =NULL;
2854 UChar *uTarget;
2855 char *cTarget;
2856 const char *cTargetLimit;
2857 char *cBuf;
2858 UChar *uBuf,*test;
2859 int32_t uBufSize = 120;
2860 int numCharsInTarget=0;
2861 UErrorCode errorCode=U_ZERO_ERROR;
2862 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2863 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2864 uSource = source;
2865 uSourceLimit=sourceLimit;
2866 cTarget = cBuf;
2867 cTargetLimit = cBuf +uBufSize*5;
2868 uTarget = uBuf;
2869 uTargetLimit = uBuf+ uBufSize*5;
2870 ucnv_reset(cnv);
2871 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
2872 if(U_FAILURE(errorCode)){
2873 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2874 return;
2875 }
2876 cSource = cBuf;
2877 test =uBuf;
2878 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2879 if(U_FAILURE(errorCode)){
2880 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2881 return;
2882 }
2883 uSource = source;
2884 while(uSource<uSourceLimit){
2885 if(*test!=*uSource){
2886
2887 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2888 }
2889 uSource++;
2890 test++;
2891 }
2892 free(uBuf);
2893 free(cBuf);
2894}
2895
2896static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2897 const UChar* uSource;
2898 const UChar* uSourceLimit;
2899 const char* cSource;
2900 const char* cSourceLimit;
2901 UChar *uTargetLimit =NULL;
2902 UChar *uTarget;
2903 char *cTarget;
2904 const char *cTargetLimit;
2905 char *cBuf;
2906 UChar *uBuf; /*,*test;*/
2907 int32_t uBufSize = 120;
2908 int len=0;
2909 int i=2;
2910 const UChar *temp = sourceLimit;
2911 UErrorCode errorCode=U_ZERO_ERROR;
2912 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2913 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2914
2915 ucnv_reset(cnv);
2916 for(;--i>0;){
2917 uSource = (UChar*) source;
2918 cTarget = cBuf;
2919 uTarget = uBuf;
2920 cSource = cBuf;
2921 cTargetLimit = cBuf;
2922 uTargetLimit = uBuf+uBufSize*5;
2923 cTargetLimit = cTargetLimit+uBufSize*10;
2924 uSourceLimit=uSource;
2925 do{
2926
2927 if (uSourceLimit < sourceLimit) {
2928 uSourceLimit = uSourceLimit+1;
2929 }
2930 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2931 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2932 errorCode=U_ZERO_ERROR;
2933 continue;
2934 }
2935
2936 if(U_FAILURE(errorCode)){
2937 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2938 return;
2939 }
2940
2941 }while (uSource<temp);
2942
2943 cSourceLimit =cBuf;
2944 do{
2945 if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2946 cSourceLimit = cSourceLimit+1;
2947 }
2948 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2949 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2950 errorCode=U_ZERO_ERROR;
2951 continue;
2952 }
2953 if(U_FAILURE(errorCode)){
2954 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2955 return;
2956 }
2957 }while(cSource<cTarget);
2958
2959 uSource = source;
2960 /*test =uBuf;*/
2961 for(;len<(int)(source - sourceLimit);len++){
2962 if(uBuf[len]!=uSource[len]){
2963 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2964 }
2965 }
2966 }
2967 free(uBuf);
2968 free(cBuf);
2969}
2970static void
2971TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2972 const uint16_t results[], const char* message){
2973/* const char* s0; */
2974 const char* s=(char*)source;
2975 const uint16_t *r=results;
2976 UErrorCode errorCode=U_ZERO_ERROR;
2977 uint32_t c,exC;
2978 ucnv_reset(cnv);
2979 while(s<limit) {
2980 /* s0=s; */
2981 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2982 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2983 break; /* no more significant input */
2984 } else if(U_FAILURE(errorCode)) {
2985 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2986 break;
2987 } else {
2988 if(U16_IS_LEAD(*r)){
2989 int i =0, len = 2;
2990 U16_NEXT(r, i, len, exC);
2991 r++;
2992 }else{
2993 exC = *r;
2994 }
2995 if(c!=(uint32_t)(exC))
2996 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c);
2997 }
2998 r++;
2999 }
3000}
3001
3002static int TestJitterbug930(const char* enc){
3003 UErrorCode err = U_ZERO_ERROR;
3004 UConverter*converter;
3005 char out[80];
3006 char*target = out;
3007 UChar in[4];
3008 const UChar*source = in;
3009 int32_t off[80];
3010 int32_t* offsets = off;
3011 int numOffWritten=0;
3012 UBool flush = 0;
3013 converter = my_ucnv_open(enc, &err);
3014
3015 in[0] = 0x41; /* 0x4E00;*/
3016 in[1] = 0x4E01;
3017 in[2] = 0x4E02;
3018 in[3] = 0x4E03;
3019
3020 memset(off, '*', sizeof(off));
3021
3022 ucnv_fromUnicode (converter,
3023 &target,
3024 target+2,
3025 &source,
3026 source+3,
3027 offsets,
3028 flush,
3029 &err);
3030
3031 /* writes three bytes into the output buffer: 41 1B 24
3032 * but offsets contains 0 1 1
3033 */
3034 while(*offsets< off[10]){
3035 numOffWritten++;
3036 offsets++;
3037 }
3038 log_verbose("Testing Jitterbug 930 for encoding %s",enc);
3039 if(numOffWritten!= (int)(target-out)){
3040 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
3041 }
3042
3043 err = U_ZERO_ERROR;
3044
3045 memset(off,'*' , sizeof(off));
3046
3047 flush = 1;
3048 offsets=off;
3049 ucnv_fromUnicode (converter,
3050 &target,
3051 target+4,
3052 &source,
3053 source,
3054 offsets,
3055 flush,
3056 &err);
3057 numOffWritten=0;
3058 while(*offsets< off[10]){
3059 numOffWritten++;
3060 if(*offsets!= -1){
3061 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
3062 }
3063 offsets++;
3064 }
3065
3066 /* writes 42 43 7A into output buffer,
3067 * offsets contains -1 -1 -1
3068 */
3069 ucnv_close(converter);
3070 return 0;
3071}
3072
3073static void
3074TestHZ() {
3075 /* test input */
3076 static const uint16_t in[]={
3077 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3078 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3079 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3080 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3081 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3082 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3083 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3084 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3085 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3086 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3087 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3088 0x005A, 0x005B, 0x005C, 0x000A
3089 };
3090 const UChar* uSource;
3091 const UChar* uSourceLimit;
3092 const char* cSource;
3093 const char* cSourceLimit;
3094 UChar *uTargetLimit =NULL;
3095 UChar *uTarget;
3096 char *cTarget;
3097 const char *cTargetLimit;
3098 char *cBuf = NULL;
3099 UChar *uBuf = NULL;
3100 UChar *test;
3101 int32_t uBufSize = 120;
3102 UErrorCode errorCode=U_ZERO_ERROR;
3103 UConverter *cnv = NULL;
3104 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3105 int32_t* myOff= offsets;
3106 cnv=ucnv_open("HZ", &errorCode);
3107 if(U_FAILURE(errorCode)) {
3108 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3109 goto cleanup;
3110 }
3111
3112 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3113 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3114 uSource = (const UChar*)in;
3115 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3116 cTarget = cBuf;
3117 cTargetLimit = cBuf +uBufSize*5;
3118 uTarget = uBuf;
3119 uTargetLimit = uBuf+ uBufSize*5;
3120 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3121 if(U_FAILURE(errorCode)){
3122 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3123 goto cleanup;
3124 }
3125 cSource = cBuf;
3126 cSourceLimit =cTarget;
3127 test =uBuf;
3128 myOff=offsets;
3129 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3130 if(U_FAILURE(errorCode)){
3131 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3132 goto cleanup;
3133 }
3134 uSource = (const UChar*)in;
3135 while(uSource<uSourceLimit){
3136 if(*test!=*uSource){
3137
3138 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3139 }
3140 uSource++;
3141 test++;
3142 }
3143 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
3144 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3145 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3146 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3147 TestJitterbug930("csISO2022JP");
3148
3149cleanup:
3150 ucnv_close(cnv);
3151 free(offsets);
3152 free(uBuf);
3153 free(cBuf);
3154}
3155
3156static void
3157TestISCII(){
3158 /* test input */
3159 static const uint16_t in[]={
3160 /* test full range of Devanagari */
3161 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3162 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3163 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3164 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3165 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3166 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3167 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3168 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3169 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3170 0x096D,0x096E,0x096F,
3171 /* test Soft halant*/
3172 0x0915,0x094d, 0x200D,
3173 /* test explicit halant */
3174 0x0915,0x094d, 0x200c,
3175 /* test double danda */
3176 0x965,
3177 /* test ASCII */
3178 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3179 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3180 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3181 /* tests from Lotus */
3182 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3183 0x0930,0x094D,0x200D,
3184 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3185 0x0915,0x0921,0x002B,0x095F,
3186 /* tamil range */
3187 0x0B86, 0xB87, 0xB88,
3188 /* telugu range */
3189 0x0C05, 0x0C02, 0x0C03,0x0c31,
3190 /* kannada range */
3191 0x0C85, 0xC82, 0x0C83,
3192 /* test Abbr sign and Anudatta */
3193 0x0970, 0x952,
3194 /* 0x0958,
3195 0x0959,
3196 0x095A,
3197 0x095B,
3198 0x095C,
3199 0x095D,
3200 0x095E,
3201 0x095F,*/
3202 0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3203 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3204 0x090C ,
3205 0x0962,
3206 0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3207 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3208 0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3209 0x093D /* Avagraha 0xEA, 0xE9*/,
3210 0x0958,
3211 0x0959,
3212 0x095A,
3213 0x095B,
3214 0x095C,
3215 0x095D,
3216 0x095E,
3217 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3218 };
3219 static const unsigned char byteArr[]={
3220
3221 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3222 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3223 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3224 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3225 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3226 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3227 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3228 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3229 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3230 0xf8,0xf9,0xfa,
3231 /* test soft halant */
3232 0xb3, 0xE8, 0xE9,
3233 /* test explicit halant */
3234 0xb3, 0xE8, 0xE8,
3235 /* test double danda */
3236 0xea, 0xea,
3237 /* test ASCII */
3238 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3239 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3240 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3241 /* test ATR code */
3242
3243 /* tests from Lotus */
3244 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3245 0xEF,0x42,0xCF,0xE8,0xD9,
3246 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3247 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3248 /* tamil range */
3249 0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3250 /* telugu range */
3251 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3252 /* kannada range */
3253 0xEF, 0x48,0xa4, 0xa2, 0xa3,
3254 /* anudatta and abbreviation sign */
3255 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3256
3257
3258 0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3259
3260 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3261
3262 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3263
3264 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3265
3266 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3267
3268 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3269
3270 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3271
3272 0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3273
3274 0xB3, 0xE9, /* Ka + NUKTA */
3275
3276 0xB4, 0xE9, /* Kha + NUKTA */
3277
3278 0xB5, 0xE9, /* Ga + NUKTA */
3279
3280 0xBA, 0xE9,
3281
3282 0xBF, 0xE9,
3283
3284 0xC0, 0xE9,
3285
3286 0xC9, 0xE9,
3287 /* INV halant RA */
3288 0xD9, 0xE8, 0xCF,
3289 0x00, 0x00A0,
3290 /* just consume unhandled codepoints */
3291 0xEF, 0x30,
3292
3293 };
3294 testConvertToU(byteArr,(sizeof(byteArr)),in,UPRV_LENGTHOF(in),"x-iscii-de",NULL,TRUE);
3295 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3296
3297}
3298
3299static void
3300TestISO_2022_JP() {
3301 /* test input */
3302 static const uint16_t in[]={
3303 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3304 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3305 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3306 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3307 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3308 0x201D, 0x3014, 0x000D, 0x000A,
3309 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3310 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3311 };
3312 const UChar* uSource;
3313 const UChar* uSourceLimit;
3314 const char* cSource;
3315 const char* cSourceLimit;
3316 UChar *uTargetLimit =NULL;
3317 UChar *uTarget;
3318 char *cTarget;
3319 const char *cTargetLimit;
3320 char *cBuf = NULL;
3321 UChar *uBuf = NULL;
3322 UChar *test;
3323 int32_t uBufSize = 120;
3324 UErrorCode errorCode=U_ZERO_ERROR;
3325 UConverter *cnv = NULL;
3326 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3327 int32_t* myOff= offsets;
3328 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3329 if(U_FAILURE(errorCode)) {
3330 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3331 goto cleanup;
3332 }
3333
3334 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3335 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3336 uSource = (const UChar*)in;
3337 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3338 cTarget = cBuf;
3339 cTargetLimit = cBuf +uBufSize*5;
3340 uTarget = uBuf;
3341 uTargetLimit = uBuf+ uBufSize*5;
3342 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3343 if(U_FAILURE(errorCode)){
3344 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3345 goto cleanup;
3346 }
3347 cSource = cBuf;
3348 cSourceLimit =cTarget;
3349 test =uBuf;
3350 myOff=offsets;
3351 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3352 if(U_FAILURE(errorCode)){
3353 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3354 goto cleanup;
3355 }
3356
3357 uSource = (const UChar*)in;
3358 while(uSource<uSourceLimit){
3359 if(*test!=*uSource){
3360
3361 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3362 }
3363 uSource++;
3364 test++;
3365 }
3366
3367 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3368 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3369 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3370 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3371 TestJitterbug930("csISO2022JP");
3372
3373cleanup:
3374 ucnv_close(cnv);
3375 free(uBuf);
3376 free(cBuf);
3377 free(offsets);
3378}
3379
3380static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3381 const UChar* uSource;
3382 const UChar* uSourceLimit;
3383 const char* cSource;
3384 const char* cSourceLimit;
3385 UChar *uTargetLimit =NULL;
3386 UChar *uTarget;
3387 char *cTarget;
3388 const char *cTargetLimit;
3389 char *cBuf;
3390 UChar *uBuf,*test;
3391 int32_t uBufSize = 120*10;
3392 UErrorCode errorCode=U_ZERO_ERROR;
3393 UConverter *cnv;
3394 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3395 int32_t* myOff= offsets;
3396 cnv=my_ucnv_open(conv, &errorCode);
3397 if(U_FAILURE(errorCode)) {
3398 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3399 return;
3400 }
3401
3402 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
3403 cBuf =(char*)malloc(uBufSize * sizeof(char));
3404 uSource = (const UChar*)in;
3405 uSourceLimit=uSource+len;
3406 cTarget = cBuf;
3407 cTargetLimit = cBuf +uBufSize;
3408 uTarget = uBuf;
3409 uTargetLimit = uBuf+ uBufSize;
3410 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3411 if(U_FAILURE(errorCode)){
3412 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3413 return;
3414 }
3415 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3416 cSource = cBuf;
3417 cSourceLimit =cTarget;
3418 test =uBuf;
3419 myOff=offsets;
3420 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3421 if(U_FAILURE(errorCode)){
3422 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3423 return;
3424 }
3425
3426 uSource = (const UChar*)in;
3427 while(uSource<uSourceLimit){
3428 if(*test!=*uSource){
3429 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3430 }
3431 uSource++;
3432 test++;
3433 }
3434 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3435 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
3436 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3437 if(byteArr && byteArrLen!=0){
3438 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3439 TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
3440 {
3441 cSource = byteArr;
3442 cSourceLimit = cSource+byteArrLen;
3443 test=uBuf;
3444 myOff = offsets;
3445 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3446 if(U_FAILURE(errorCode)){
3447 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3448 return;
3449 }
3450
3451 uSource = (const UChar*)in;
3452 while(uSource<uSourceLimit){
3453 if(*test!=*uSource){
3454 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3455 }
3456 uSource++;
3457 test++;
3458 }
3459 }
3460 }
3461
3462 ucnv_close(cnv);
3463 free(uBuf);
3464 free(cBuf);
3465 free(offsets);
3466}
3467static UChar U_CALLCONV
3468_charAt(int32_t offset, void *context) {
3469 return ((char*)context)[offset];
3470}
3471
3472static int32_t
3473unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3474 int32_t srcIndex=0;
3475 int32_t dstIndex=0;
3476 if(U_FAILURE(*status)){
3477 return 0;
3478 }
3479 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3480 *status = U_ILLEGAL_ARGUMENT_ERROR;
3481 return 0;
3482 }
3483 if(srcLen==-1){
3484 srcLen = (int32_t)uprv_strlen(src);
3485 }
3486
3487 for (; srcIndex<srcLen; ) {
3488 UChar32 c = src[srcIndex++];
3489 if (c == 0x005C /*'\\'*/) {
3490 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3491 if (c == (UChar32)0xFFFFFFFF) {
3492 *status=U_INVALID_CHAR_FOUND; /* return empty string */
3493 break; /* invalid escape sequence */
3494 }
3495 }
3496 if(dstIndex < dstLen){
3497 if(c>0xFFFF){
3498 dst[dstIndex++] = U16_LEAD(c);
3499 if(dstIndex<dstLen){
3500 dst[dstIndex]=U16_TRAIL(c);
3501 }else{
3502 *status=U_BUFFER_OVERFLOW_ERROR;
3503 }
3504 }else{
3505 dst[dstIndex]=(UChar)c;
3506 }
3507
3508 }else{
3509 *status = U_BUFFER_OVERFLOW_ERROR;
3510 }
3511 dstIndex++; /* for preflighting */
3512 }
3513 return dstIndex;
3514}
3515
3516static void
3517TestFullRoundtrip(const char* cp){
3518 UChar usource[10] ={0};
3519 UChar nsrc[10] = {0};
3520 uint32_t i=1;
3521 int len=0, ulen;
3522 nsrc[0]=0x0061;
3523 /* Test codepoint 0 */
3524 TestConv(usource,1,cp,"",NULL,0);
3525 TestConv(usource,2,cp,"",NULL,0);
3526 nsrc[2]=0x5555;
3527 TestConv(nsrc,3,cp,"",NULL,0);
3528
3529 for(;i<=0x10FFFF;i++){
3530 if(i==0xD800){
3531 i=0xDFFF;
3532 continue;
3533 }
3534 if(i<=0xFFFF){
3535 usource[0] =(UChar) i;
3536 len=1;
3537 }else{
3538 usource[0]=U16_LEAD(i);
3539 usource[1]=U16_TRAIL(i);
3540 len=2;
3541 }
3542 ulen=len;
3543 if(i==0x80) {
3544 usource[2]=0;
3545 }
3546 /* Test only single code points */
3547 TestConv(usource,ulen,cp,"",NULL,0);
3548 /* Test codepoint repeated twice */
3549 usource[ulen]=usource[0];
3550 usource[ulen+1]=usource[1];
3551 ulen+=len;
3552 TestConv(usource,ulen,cp,"",NULL,0);
3553 /* Test codepoint repeated 3 times */
3554 usource[ulen]=usource[0];
3555 usource[ulen+1]=usource[1];
3556 ulen+=len;
3557 TestConv(usource,ulen,cp,"",NULL,0);
3558 /* Test codepoint in between 2 codepoints */
3559 nsrc[1]=usource[0];
3560 nsrc[2]=usource[1];
3561 nsrc[len+1]=0x5555;
3562 TestConv(nsrc,len+2,cp,"",NULL,0);
3563 uprv_memset(usource,0,sizeof(UChar)*10);
3564 }
3565}
3566
3567static void
3568TestRoundTrippingAllUTF(void){
3569 if(!getTestOption(QUICK_OPTION)){
3570 log_verbose("Running exhaustive round trip test for BOCU-1\n");
3571 TestFullRoundtrip("BOCU-1");
3572 log_verbose("Running exhaustive round trip test for SCSU\n");
3573 TestFullRoundtrip("SCSU");
3574 log_verbose("Running exhaustive round trip test for UTF-8\n");
3575 TestFullRoundtrip("UTF-8");
3576 log_verbose("Running exhaustive round trip test for CESU-8\n");
3577 TestFullRoundtrip("CESU-8");
3578 log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3579 TestFullRoundtrip("UTF-16BE");
3580 log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3581 TestFullRoundtrip("UTF-16LE");
3582 log_verbose("Running exhaustive round trip test for UTF-16\n");
3583 TestFullRoundtrip("UTF-16");
3584 log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3585 TestFullRoundtrip("UTF-32BE");
3586 log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3587 TestFullRoundtrip("UTF-32LE");
3588 log_verbose("Running exhaustive round trip test for UTF-32\n");
3589 TestFullRoundtrip("UTF-32");
3590 log_verbose("Running exhaustive round trip test for UTF-7\n");
3591 TestFullRoundtrip("UTF-7");
3592 log_verbose("Running exhaustive round trip test for UTF-7\n");
3593 TestFullRoundtrip("UTF-7,version=1");
3594 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3595 TestFullRoundtrip("IMAP-mailbox-name");
3596 /*
3597 *
3598 * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
3599 * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
3600 * The old mappings remain as fallbacks.
3601 * This test may be reintroduced at a later time.
3602 *
3603 * 110118 - mow
3604 */
3605 /*
3606 log_verbose("Running exhaustive round trip test for GB18030\n");
3607 TestFullRoundtrip("GB18030");
3608 */
3609 }
3610}
3611
3612static void
3613TestSCSU() {
3614
3615 static const uint16_t germanUTF16[]={
3616 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3617 };
3618
3619 static const uint8_t germanSCSU[]={
3620 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3621 };
3622
3623 static const uint16_t russianUTF16[]={
3624 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3625 };
3626
3627 static const uint8_t russianSCSU[]={
3628 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3629 };
3630
3631 static const uint16_t japaneseUTF16[]={
3632 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3633 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3634 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3635 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3636 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3637 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3638 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3639 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3640 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3641 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3642 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3643 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3644 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3645 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3646 0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3647 };
3648
3649 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3650 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3651 static const uint8_t japaneseSCSU[]={
3652 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3653 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3654 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3655 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3656 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3657 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3658 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3659 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3660 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3661 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3662 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3663 0xcb, 0x82
3664 };
3665
3666 static const uint16_t allFeaturesUTF16[]={
3667 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3668 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3669 0x01df, 0xf000, 0xdbff, 0xdfff
3670 };
3671
3672 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3673 * result here (34B vs. 35B)
3674 */
3675 static const uint8_t allFeaturesSCSU[]={
3676 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3677 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3678 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3679 0xdf, 0x14, 0x80, 0x15, 0xff
3680 };
3681 static const uint16_t monkeyIn[]={
3682 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3683 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3684 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3685 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3686 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3687 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3688 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3689 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3690 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3691 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3692 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3693 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3694 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3695 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3696 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3697 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3698 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3699 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3700 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3701 /* test non-BMP code points */
3702 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3703 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3704 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3705 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3706 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3707 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3708 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3709 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3710 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3711 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3712 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3713
3714
3715 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3716 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3717 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3718 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3719 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3720 };
3721 static const char *fTestCases [] = {
3722 "\\ud800\\udc00", /* smallest surrogate*/
3723 "\\ud8ff\\udcff",
3724 "\\udBff\\udFff", /* largest surrogate pair*/
3725 "\\ud834\\udc00",
3726 "\\U0010FFFF",
3727 "Hello \\u9292 \\u9192 World!",
3728 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3729 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3730
3731 "\\u0648\\u06c8", /* catch missing reset*/
3732 "\\u0648\\u06c8",
3733
3734 "\\u4444\\uE001", /* lowest quotable*/
3735 "\\u4444\\uf2FF", /* highest quotable*/
3736 "\\u4444\\uf188\\u4444",
3737 "\\u4444\\uf188\\uf288",
3738 "\\u4444\\uf188abc\\u0429\\uf288",
3739 "\\u9292\\u2222",
3740 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3741 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3742 "Hello World!123456",
3743 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3744
3745 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/
3746 "abc\\u4411d", /* uses SQU*/
3747 "abc\\u4411\\u4412d",/* uses SCU*/
3748 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3749 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3750 "\\u9292\\u2222",
3751 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3752 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3753 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3754
3755 "", /* empty input*/
3756 "\\u0000", /* smallest BMP character*/
3757 "\\uFFFF", /* largest BMP character*/
3758
3759 /* regression tests*/
3760 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3761 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3762 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3763 "\\u0041\\u00df\\u0401\\u015f",
3764 "\\u9066\\u2123abc",
3765 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3766 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3767 };
3768 int i=0;
3769 for(;i<UPRV_LENGTHOF(fTestCases);i++){
3770 const char* cSrc = fTestCases[i];
3771 UErrorCode status = U_ZERO_ERROR;
3772 int32_t cSrcLen,srcLen;
3773 UChar* src;
3774 /* UConverter* cnv = ucnv_open("SCSU",&status); */
3775 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
3776 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3777 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3778 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3779 TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3780 free(src);
3781 }
3782 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3783 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3784 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3785 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3786 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3787 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3788 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3789}
3790
3791#if !UCONFIG_NO_LEGACY_CONVERSION
3792static void TestJitterbug2346(){
3793 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3794 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3795 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3796
3797 UChar uTarget[500]={'\0'};
3798 UChar* utarget=uTarget;
3799 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3800
3801 char cTarget[500]={'\0'};
3802 char* ctarget=cTarget;
3803 char* ctargetLimit=cTarget+sizeof(cTarget);
3804 const char* csource=source;
3805 UChar* temp = expected;
3806 UErrorCode err=U_ZERO_ERROR;
3807
3808 UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3809 if(U_FAILURE(err)) {
3810 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3811 return;
3812 }
3813 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3814 if(U_FAILURE(err)) {
3815 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3816 return;
3817 }
3818 utargetLimit=utarget;
3819 utarget = uTarget;
3820 while(utarget<utargetLimit){
3821 if(*temp!=*utarget){
3822
3823 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3824 }
3825 utarget++;
3826 temp++;
3827 }
3828 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3829 if(U_FAILURE(err)) {
3830 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3831 return;
3832 }
3833 ctargetLimit=ctarget;
3834 ctarget =cTarget;
3835 ucnv_close(conv);
3836
3837
3838}
3839
3840static void
3841TestISO_2022_JP_1() {
3842 /* test input */
3843 static const uint16_t in[]={
3844 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3845 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3846 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3847 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3848 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3849 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3850 0x201D, 0x000D, 0x000A,
3851 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3852 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3853 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3854 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3855 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3856 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3857 };
3858 const UChar* uSource;
3859 const UChar* uSourceLimit;
3860 const char* cSource;
3861 const char* cSourceLimit;
3862 UChar *uTargetLimit =NULL;
3863 UChar *uTarget;
3864 char *cTarget;
3865 const char *cTargetLimit;
3866 char *cBuf;
3867 UChar *uBuf,*test;
3868 int32_t uBufSize = 120;
3869 UErrorCode errorCode=U_ZERO_ERROR;
3870 UConverter *cnv;
3871
3872 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3873 if(U_FAILURE(errorCode)) {
3874 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3875 return;
3876 }
3877
3878 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3879 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3880 uSource = (const UChar*)in;
3881 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3882 cTarget = cBuf;
3883 cTargetLimit = cBuf +uBufSize*5;
3884 uTarget = uBuf;
3885 uTargetLimit = uBuf+ uBufSize*5;
3886 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3887 if(U_FAILURE(errorCode)){
3888 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3889 return;
3890 }
3891 cSource = cBuf;
3892 cSourceLimit =cTarget;
3893 test =uBuf;
3894 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3895 if(U_FAILURE(errorCode)){
3896 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3897 return;
3898 }
3899 uSource = (const UChar*)in;
3900 while(uSource<uSourceLimit){
3901 if(*test!=*uSource){
3902
3903 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3904 }
3905 uSource++;
3906 test++;
3907 }
3908 /*ucnv_close(cnv);
3909 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3910 /*Test for the condition where there is an invalid character*/
3911 ucnv_reset(cnv);
3912 {
3913 static const uint8_t source2[]={0x0e,0x24,0x053};
3914 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3915 }
3916 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3917 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3918 ucnv_close(cnv);
3919 free(uBuf);
3920 free(cBuf);
3921}
3922
3923static void
3924TestISO_2022_JP_2() {
3925 /* test input */
3926 static const uint16_t in[]={
3927 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3928 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3929 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3930 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3931 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3932 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3933 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3934 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3935 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3936 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3937 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3938 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3939 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3940 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3941 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3942 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3943 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3944 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3945 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3946 };
3947 const UChar* uSource;
3948 const UChar* uSourceLimit;
3949 const char* cSource;
3950 const char* cSourceLimit;
3951 UChar *uTargetLimit =NULL;
3952 UChar *uTarget;
3953 char *cTarget;
3954 const char *cTargetLimit;
3955 char *cBuf = NULL;
3956 UChar *uBuf = NULL;
3957 UChar *test;
3958 int32_t uBufSize = 120;
3959 UErrorCode errorCode=U_ZERO_ERROR;
3960 UConverter *cnv = NULL;
3961 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3962 int32_t* myOff= offsets;
3963 cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3964 if(U_FAILURE(errorCode)) {
3965 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3966 goto cleanup;
3967 }
3968
3969 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3970 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3971 uSource = (const UChar*)in;
3972 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3973 cTarget = cBuf;
3974 cTargetLimit = cBuf +uBufSize*5;
3975 uTarget = uBuf;
3976 uTargetLimit = uBuf+ uBufSize*5;
3977 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3978 if(U_FAILURE(errorCode)){
3979 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3980 goto cleanup;
3981 }
3982 cSource = cBuf;
3983 cSourceLimit =cTarget;
3984 test =uBuf;
3985 myOff=offsets;
3986 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3987 if(U_FAILURE(errorCode)){
3988 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3989 goto cleanup;
3990 }
3991 uSource = (const UChar*)in;
3992 while(uSource<uSourceLimit){
3993 if(*test!=*uSource){
3994
3995 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3996 }
3997 uSource++;
3998 test++;
3999 }
4000 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4001 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4002 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4003 /*Test for the condition where there is an invalid character*/
4004 ucnv_reset(cnv);
4005 {
4006 static const uint8_t source2[]={0x0e,0x24,0x053};
4007 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
4008 }
4009
4010cleanup:
4011 ucnv_close(cnv);
4012 free(uBuf);
4013 free(cBuf);
4014 free(offsets);
4015}
4016
4017static void
4018TestISO_2022_KR() {
4019 /* test input */
4020 static const uint16_t in[]={
4021 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4022 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4023 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4024 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4025 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4026 ,0x53E3,0x53E4,0x000A,0x000D};
4027 const UChar* uSource;
4028 const UChar* uSourceLimit;
4029 const char* cSource;
4030 const char* cSourceLimit;
4031 UChar *uTargetLimit =NULL;
4032 UChar *uTarget;
4033 char *cTarget;
4034 const char *cTargetLimit;
4035 char *cBuf = NULL;
4036 UChar *uBuf = NULL;
4037 UChar *test;
4038 int32_t uBufSize = 120;
4039 UErrorCode errorCode=U_ZERO_ERROR;
4040 UConverter *cnv = NULL;
4041 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4042 int32_t* myOff= offsets;
4043 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
4044 if(U_FAILURE(errorCode)) {
4045 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4046 goto cleanup;
4047 }
4048
4049 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4050 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4051 uSource = (const UChar*)in;
4052 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4053 cTarget = cBuf;
4054 cTargetLimit = cBuf +uBufSize*5;
4055 uTarget = uBuf;
4056 uTargetLimit = uBuf+ uBufSize*5;
4057 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4058 if(U_FAILURE(errorCode)){
4059 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4060 goto cleanup;
4061 }
4062 cSource = cBuf;
4063 cSourceLimit =cTarget;
4064 test =uBuf;
4065 myOff=offsets;
4066 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4067 if(U_FAILURE(errorCode)){
4068 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4069 goto cleanup;
4070 }
4071 uSource = (const UChar*)in;
4072 while(uSource<uSourceLimit){
4073 if(*test!=*uSource){
4074 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4075 }
4076 uSource++;
4077 test++;
4078 }
4079 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4080 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4081 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4082 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4083 TestJitterbug930("csISO2022KR");
4084 /*Test for the condition where there is an invalid character*/
4085 ucnv_reset(cnv);
4086 {
4087 static const uint8_t source2[]={0x1b,0x24,0x053};
4088 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4089 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4090 }
4091
4092cleanup:
4093 ucnv_close(cnv);
4094 free(uBuf);
4095 free(cBuf);
4096 free(offsets);
4097}
4098
4099static void
4100TestISO_2022_KR_1() {
4101 /* test input */
4102 static const uint16_t in[]={
4103 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4104 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4105 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4106 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4107 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4108 ,0x53E3,0x53E4,0x000A,0x000D};
4109 const UChar* uSource;
4110 const UChar* uSourceLimit;
4111 const char* cSource;
4112 const char* cSourceLimit;
4113 UChar *uTargetLimit =NULL;
4114 UChar *uTarget;
4115 char *cTarget;
4116 const char *cTargetLimit;
4117 char *cBuf = NULL;
4118 UChar *uBuf = NULL;
4119 UChar *test;
4120 int32_t uBufSize = 120;
4121 UErrorCode errorCode=U_ZERO_ERROR;
4122 UConverter *cnv = NULL;
4123 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4124 int32_t* myOff= offsets;
4125 cnv=ucnv_open("ibm-25546", &errorCode);
4126 if(U_FAILURE(errorCode)) {
4127 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4128 goto cleanup;
4129 }
4130
4131 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4132 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4133 uSource = (const UChar*)in;
4134 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4135 cTarget = cBuf;
4136 cTargetLimit = cBuf +uBufSize*5;
4137 uTarget = uBuf;
4138 uTargetLimit = uBuf+ uBufSize*5;
4139 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4140 if(U_FAILURE(errorCode)){
4141 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4142 goto cleanup;
4143 }
4144 cSource = cBuf;
4145 cSourceLimit =cTarget;
4146 test =uBuf;
4147 myOff=offsets;
4148 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4149 if(U_FAILURE(errorCode)){
4150 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4151 goto cleanup;
4152 }
4153 uSource = (const UChar*)in;
4154 while(uSource<uSourceLimit){
4155 if(*test!=*uSource){
4156 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4157 }
4158 uSource++;
4159 test++;
4160 }
4161 ucnv_reset(cnv);
4162 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4163 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4164 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4165 ucnv_reset(cnv);
4166 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4167 /*Test for the condition where there is an invalid character*/
4168 ucnv_reset(cnv);
4169 {
4170 static const uint8_t source2[]={0x1b,0x24,0x053};
4171 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4172 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4173 }
4174
4175cleanup:
4176 ucnv_close(cnv);
4177 free(uBuf);
4178 free(cBuf);
4179 free(offsets);
4180}
4181
4182static void TestJitterbug2411(){
4183 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4184 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4185 UConverter* kr=NULL, *kr1=NULL;
4186 UErrorCode errorCode = U_ZERO_ERROR;
4187 UChar tgt[100]={'\0'};
4188 UChar* target = tgt;
4189 UChar* targetLimit = target+100;
4190 kr=ucnv_open("iso-2022-kr", &errorCode);
4191 if(U_FAILURE(errorCode)) {
4192 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4193 return;
4194 }
4195 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4196 if(U_FAILURE(errorCode)) {
4197 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4198 return;
4199 }
4200 kr1 = ucnv_open("ibm-25546", &errorCode);
4201 if(U_FAILURE(errorCode)) {
4202 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4203 return;
4204 }
4205 target = tgt;
4206 targetLimit = target+100;
4207 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4208
4209 if(U_FAILURE(errorCode)) {
4210 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4211 return;
4212 }
4213
4214 ucnv_close(kr);
4215 ucnv_close(kr1);
4216
4217}
4218
4219static void
4220TestJIS(){
4221 /* From Unicode moved to testdata/conversion.txt */
4222 /*To Unicode*/
4223 {
4224 static const uint8_t sampleTextJIS[] = {
4225 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4226 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4227 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4228 };
4229 static const uint16_t expectedISO2022JIS[] = {
4230 0x0041, 0x0042,
4231 0xFF81, 0xFF82,
4232 0x3000
4233 };
4234 static const int32_t toISO2022JISOffs[]={
4235 3,4,
4236 8,9,
4237 16
4238 };
4239
4240 static const uint8_t sampleTextJIS7[] = {
4241 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4242 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4243 0x1b,0x24,0x42,0x21,0x21,
4244 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */
4245 0x21,0x22,
4246 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4247 };
4248 static const uint16_t expectedISO2022JIS7[] = {
4249 0x0041, 0x0042,
4250 0xFF81, 0xFF82,
4251 0x3000,
4252 0xFF81, 0xFF82,
4253 0x3001,
4254 0x3000
4255 };
4256 static const int32_t toISO2022JIS7Offs[]={
4257 3,4,
4258 8,9,
4259 13,16,
4260 17,
4261 19,27
4262 };
4263 static const uint8_t sampleTextJIS8[] = {
4264 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4265 0xa1,0xc8,0xd9,/*Katakana Set*/
4266 0x1b,0x28,0x42,
4267 0x41,0x42,
4268 0xb1,0xc3, /*Katakana Set*/
4269 0x1b,0x24,0x42,0x21,0x21
4270 };
4271 static const uint16_t expectedISO2022JIS8[] = {
4272 0x0041, 0x0042,
4273 0xff61, 0xff88, 0xff99,
4274 0x0041, 0x0042,
4275 0xff71, 0xff83,
4276 0x3000
4277 };
4278 static const int32_t toISO2022JIS8Offs[]={
4279 3, 4, 5, 6,
4280 7, 11, 12, 13,
4281 14, 18,
4282 };
4283
4284 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4285 UPRV_LENGTHOF(expectedISO2022JIS),"JIS", toISO2022JISOffs,TRUE);
4286 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4287 UPRV_LENGTHOF(expectedISO2022JIS7),"JIS7", toISO2022JIS7Offs,TRUE);
4288 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4289 UPRV_LENGTHOF(expectedISO2022JIS8),"JIS8", toISO2022JIS8Offs,TRUE);
4290 }
4291
4292}
4293
4294
4295#if 0
4296 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
4297
4298static void TestJitterbug915(){
4299/* tests for roundtripping of the below sequence
4300\x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * /
4301\x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4302\x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4303\x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4304\x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4305\x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4306\x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4307*/
4308 static const char cSource[]={
4309 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4310 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4311 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4312 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4313 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4314 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4315 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4316 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4317 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4318 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4319 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4320 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4321 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4322 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4323 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4324 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4325 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4326 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4327 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4328 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4329 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4330 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4331 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4332 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4333 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4334 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4335 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4336 0x37, 0x20, 0x2A, 0x2F
4337 };
4338 UChar uTarget[500]={'\0'};
4339 UChar* utarget=uTarget;
4340 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4341
4342 char cTarget[500]={'\0'};
4343 char* ctarget=cTarget;
4344 char* ctargetLimit=cTarget+sizeof(cTarget);
4345 const char* csource=cSource;
4346 const char* tempSrc = cSource;
4347 UErrorCode err=U_ZERO_ERROR;
4348
4349 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4350 if(U_FAILURE(err)) {
4351 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4352 return;
4353 }
4354 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4355 if(U_FAILURE(err)) {
4356 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4357 return;
4358 }
4359 utargetLimit=utarget;
4360 utarget = uTarget;
4361 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4362 if(U_FAILURE(err)) {
4363 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4364 return;
4365 }
4366 ctargetLimit=ctarget;
4367 ctarget =cTarget;
4368 while(ctarget<ctargetLimit){
4369 if(*ctarget != *tempSrc){
4370 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4371 }
4372 ++ctarget;
4373 ++tempSrc;
4374 }
4375
4376 ucnv_close(conv);
4377}
4378
4379static void
4380TestISO_2022_CN_EXT() {
4381 /* test input */
4382 static const uint16_t in[]={
4383 /* test Non-BMP code points */
4384 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4385 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4386 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4387 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4388 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4389 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4390 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4391 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4392 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4393 0xD869, 0xDED5,
4394
4395 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4396 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4397 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4398 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4399 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4400 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4401 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4402 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4403 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4404 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4405 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4406 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4407 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4408 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4409 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4410 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4411 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4412 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4413
4414 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4415
4416 };
4417
4418 const UChar* uSource;
4419 const UChar* uSourceLimit;
4420 const char* cSource;
4421 const char* cSourceLimit;
4422 UChar *uTargetLimit =NULL;
4423 UChar *uTarget;
4424 char *cTarget;
4425 const char *cTargetLimit;
4426 char *cBuf = NULL;
4427 UChar *uBuf = NULL;
4428 UChar *test;
4429 int32_t uBufSize = 180;
4430 UErrorCode errorCode=U_ZERO_ERROR;
4431 UConverter *cnv = NULL;
4432 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4433 int32_t* myOff= offsets;
4434 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4435 if(U_FAILURE(errorCode)) {
4436 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4437 goto cleanup;
4438 }
4439
4440 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4441 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4442 uSource = (const UChar*)in;
4443 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4444 cTarget = cBuf;
4445 cTargetLimit = cBuf +uBufSize*5;
4446 uTarget = uBuf;
4447 uTargetLimit = uBuf+ uBufSize*5;
4448 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4449 if(U_FAILURE(errorCode)){
4450 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4451 goto cleanup;
4452 }
4453 cSource = cBuf;
4454 cSourceLimit =cTarget;
4455 test =uBuf;
4456 myOff=offsets;
4457 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4458 if(U_FAILURE(errorCode)){
4459 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4460 goto cleanup;
4461 }
4462 uSource = (const UChar*)in;
4463 while(uSource<uSourceLimit){
4464 if(*test!=*uSource){
4465 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4466 }
4467 else{
4468 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4469 }
4470 uSource++;
4471 test++;
4472 }
4473 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4474 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4475 /*Test for the condition where there is an invalid character*/
4476 ucnv_reset(cnv);
4477 {
4478 static const uint8_t source2[]={0x0e,0x24,0x053};
4479 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4480 }
4481
4482cleanup:
4483 ucnv_close(cnv);
4484 free(uBuf);
4485 free(cBuf);
4486 free(offsets);
4487}
4488#endif
4489
4490static void
4491TestISO_2022_CN() {
4492 /* test input */
4493 static const uint16_t in[]={
4494 /* jitterbug 951 */
4495 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4496 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4497 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4498 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4499 0x0020, 0x0045, 0x004e, 0x0044,
4500 /**/
4501 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4502 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4503 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4504 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4505 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4506 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4507 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4508 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4509 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4510 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4511 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4512 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4513 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4514 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4515 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4516 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4517 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4518
4519 };
4520 const UChar* uSource;
4521 const UChar* uSourceLimit;
4522 const char* cSource;
4523 const char* cSourceLimit;
4524 UChar *uTargetLimit =NULL;
4525 UChar *uTarget;
4526 char *cTarget;
4527 const char *cTargetLimit;
4528 char *cBuf = NULL;
4529 UChar *uBuf = NULL;
4530 UChar *test;
4531 int32_t uBufSize = 180;
4532 UErrorCode errorCode=U_ZERO_ERROR;
4533 UConverter *cnv = NULL;
4534 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4535 int32_t* myOff= offsets;
4536 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4537 if(U_FAILURE(errorCode)) {
4538 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4539 goto cleanup;
4540 }
4541
4542 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4543 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4544 uSource = (const UChar*)in;
4545 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4546 cTarget = cBuf;
4547 cTargetLimit = cBuf +uBufSize*5;
4548 uTarget = uBuf;
4549 uTargetLimit = uBuf+ uBufSize*5;
4550 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4551 if(U_FAILURE(errorCode)){
4552 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4553 goto cleanup;
4554 }
4555 cSource = cBuf;
4556 cSourceLimit =cTarget;
4557 test =uBuf;
4558 myOff=offsets;
4559 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4560 if(U_FAILURE(errorCode)){
4561 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4562 goto cleanup;
4563 }
4564 uSource = (const UChar*)in;
4565 while(uSource<uSourceLimit){
4566 if(*test!=*uSource){
4567 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4568 }
4569 else{
4570 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4571 }
4572 uSource++;
4573 test++;
4574 }
4575 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4576 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4577 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4578 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4579 TestJitterbug930("csISO2022CN");
4580 /*Test for the condition where there is an invalid character*/
4581 ucnv_reset(cnv);
4582 {
4583 static const uint8_t source2[]={0x0e,0x24,0x053};
4584 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4585 }
4586
4587cleanup:
4588 ucnv_close(cnv);
4589 free(uBuf);
4590 free(cBuf);
4591 free(offsets);
4592}
4593
4594/* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4595typedef struct {
4596 const char * converterName;
4597 const char * inputText;
4598 int inputTextLength;
4599} EmptySegmentTest;
4600
4601/* Callback for TestJitterbug6175, should only get called for empty segment errors */
4602static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
4603 int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
4604 // suppress compiler warnings about unused variables
4605 (void)context;
4606 (void)codeUnits;
4607 (void)length;
4608 if (reason > UCNV_IRREGULAR) {
4609 return;
4610 }
4611 if (reason != UCNV_IRREGULAR) {
4612 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4613 }
4614 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4615 *err = U_ZERO_ERROR;
4616 ucnv_cbToUWriteSub(toArgs,0,err);
4617}
4618
4619enum { kEmptySegmentToUCharsMax = 64 };
4620static void TestJitterbug6175(void) {
4621 static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4622 static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4623 static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4624 static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4625 static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4626 static const EmptySegmentTest emptySegmentTests[] = {
4627 /* converterName inputText inputTextLength */
4628 { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
4629 { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
4630 { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
4631 { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
4632 { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) },
4633 /* terminator: */
4634 { NULL, NULL, 0, }
4635 };
4636 const EmptySegmentTest * testPtr;
4637 for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
4638 UErrorCode err = U_ZERO_ERROR;
4639 UConverter * cnv = ucnv_open(testPtr->converterName, &err);
4640 if (U_FAILURE(err)) {
4641 log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
4642 return;
4643 }
4644 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
4645 if (U_FAILURE(err)) {
4646 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
4647 ucnv_close(cnv);
4648 return;
4649 }
4650 {
4651 UChar toUChars[kEmptySegmentToUCharsMax];
4652 UChar * toUCharsPtr = toUChars;
4653 const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
4654 const char * inCharsPtr = testPtr->inputText;
4655 const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength;
4656 ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err);
4657 }
4658 ucnv_close(cnv);
4659 }
4660}
4661
4662static void
4663TestEBCDIC_STATEFUL() {
4664 /* test input */
4665 static const uint8_t in[]={
4666 0x61,
4667 0x1a,
4668 0x0f, 0x4b,
4669 0x42,
4670 0x40,
4671 0x36,
4672 };
4673
4674 /* expected test results */
4675 static const int32_t results[]={
4676 /* number of bytes read, code point */
4677 1, 0x002f,
4678 1, 0x0092,
4679 2, 0x002e,
4680 1, 0xff62,
4681 1, 0x0020,
4682 1, 0x0096,
4683
4684 };
4685 static const uint8_t in2[]={
4686 0x0f,
4687 0xa1,
4688 0x01
4689 };
4690
4691 /* expected test results */
4692 static const int32_t results2[]={
4693 /* number of bytes read, code point */
4694 2, 0x203E,
4695 1, 0x0001,
4696 };
4697
4698 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4699 UErrorCode errorCode=U_ZERO_ERROR;
4700 UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4701 if(U_FAILURE(errorCode)) {
4702 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4703 return;
4704 }
4705 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4706 ucnv_reset(cnv);
4707 /* Test the condition when source >= sourceLimit */
4708 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4709 ucnv_reset(cnv);
4710 /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4711 {
4712 static const uint8_t source1[]={0x0f};
4713 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4714 }
4715 /*Test for the condition where there is an invalid character*/
4716 ucnv_reset(cnv);
4717 {
4718 static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4719 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4720 }
4721 ucnv_reset(cnv);
4722 source=(const char*)in2;
4723 limit=(const char*)in2+sizeof(in2);
4724 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4725 ucnv_close(cnv);
4726
4727}
4728
4729static void
4730TestGB18030() {
4731 /* test input */
4732 static const uint8_t in[]={
4733 0x24,
4734 0x7f,
4735 0x81, 0x30, 0x81, 0x30,
4736 0xa8, 0xbf,
4737 0xa2, 0xe3,
4738 0xd2, 0xbb,
4739 0x82, 0x35, 0x8f, 0x33,
4740 0x84, 0x31, 0xa4, 0x39,
4741 0x90, 0x30, 0x81, 0x30,
4742 0xe3, 0x32, 0x9a, 0x35
4743#if 0
4744 /*
4745 * Feature removed markus 2000-oct-26
4746 * Only some codepages must match surrogate pairs into supplementary code points -
4747 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4748 * GB 18030 provides direct encodings for supplementary code points, therefore
4749 * it must not combine two single-encoded surrogates into one code point.
4750 */
4751 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4752#endif
4753 };
4754
4755 /* expected test results */
4756 static const int32_t results[]={
4757 /* number of bytes read, code point */
4758 1, 0x24,
4759 1, 0x7f,
4760 4, 0x80,
4761 2, 0x1f9,
4762 2, 0x20ac,
4763 2, 0x4e00,
4764 4, 0x9fa6,
4765 4, 0xffff,
4766 4, 0x10000,
4767 4, 0x10ffff
4768#if 0
4769 /* Feature removed. See comment above. */
4770 8, 0x10000
4771#endif
4772 };
4773
4774/* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4775 UErrorCode errorCode=U_ZERO_ERROR;
4776 UConverter *cnv=ucnv_open("gb18030", &errorCode);
4777 if(U_FAILURE(errorCode)) {
4778 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4779 return;
4780 }
4781 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4782 ucnv_close(cnv);
4783}
4784
4785static void
4786TestLMBCS() {
4787 /* LMBCS-1 string */
4788 static const uint8_t pszLMBCS[]={
4789 0x61,
4790 0x01, 0x29,
4791 0x81,
4792 0xA0,
4793 0x0F, 0x27,
4794 0x0F, 0x91,
4795 0x14, 0x0a, 0x74,
4796 0x14, 0xF6, 0x02,
4797 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4798 0x10, 0x88, 0xA0,
4799 };
4800
4801 /* Unicode UChar32 equivalents */
4802 static const UChar32 pszUnicode32[]={
4803 /* code point */
4804 0x00000061,
4805 0x00002013,
4806 0x000000FC,
4807 0x000000E1,
4808 0x00000007,
4809 0x00000091,
4810 0x00000a74,
4811 0x00000200,
4812 0x00023456, /* code point for surrogate pair */
4813 0x00005516
4814 };
4815
4816/* Unicode UChar equivalents */
4817 static const UChar pszUnicode[]={
4818 /* code point */
4819 0x0061,
4820 0x2013,
4821 0x00FC,
4822 0x00E1,
4823 0x0007,
4824 0x0091,
4825 0x0a74,
4826 0x0200,
4827 0xD84D, /* low surrogate */
4828 0xDC56, /* high surrogate */
4829 0x5516
4830 };
4831
4832/* expected test results */
4833 static const int offsets32[]={
4834 /* number of bytes read, code point */
4835 0,
4836 1,
4837 3,
4838 4,
4839 5,
4840 7,
4841 9,
4842 12,
4843 15,
4844 21,
4845 24
4846 };
4847
4848/* expected test results */
4849 static const int offsets[]={
4850 /* number of bytes read, code point */
4851 0,
4852 1,
4853 3,
4854 4,
4855 5,
4856 7,
4857 9,
4858 12,
4859 15,
4860 18,
4861 21,
4862 24
4863 };
4864
4865
4866 UConverter *cnv;
4867
4868#define NAME_LMBCS_1 "LMBCS-1"
4869#define NAME_LMBCS_2 "LMBCS-2"
4870
4871
4872 /* Some basic open/close/property tests on some LMBCS converters */
4873 {
4874
4875 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */
4876 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/
4877 char get_subchars [1];
4878 const char * get_name;
4879 UConverter *cnv1;
4880 UConverter *cnv2;
4881
4882 int8_t len = sizeof(get_subchars);
4883
4884 UErrorCode errorCode=U_ZERO_ERROR;
4885
4886 /* Open */
4887 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4888 if(U_FAILURE(errorCode)) {
4889 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4890 return;
4891 }
4892 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4893 if(U_FAILURE(errorCode)) {
4894 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4895 return;
4896 }
4897
4898 /* Name */
4899 get_name = ucnv_getName (cnv1, &errorCode);
4900 if (strcmp(NAME_LMBCS_1,get_name)){
4901 log_err("Unexpected converter name: %s\n", get_name);
4902 }
4903 get_name = ucnv_getName (cnv2, &errorCode);
4904 if (strcmp(NAME_LMBCS_2,get_name)){
4905 log_err("Unexpected converter name: %s\n", get_name);
4906 }
4907
4908 /* substitution chars */
4909 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4910 if(U_FAILURE(errorCode)) {
4911 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4912 }
4913 if (len!=1){
4914 log_err("Unexpected length of sub chars\n");
4915 }
4916 if (get_subchars[0] != expected_subchars[0]){
4917 log_err("Unexpected value of sub chars\n");
4918 }
4919 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4920 if(U_FAILURE(errorCode)) {
4921 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4922 }
4923 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4924 if(U_FAILURE(errorCode)) {
4925 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4926 }
4927 if (len!=1){
4928 log_err("Unexpected length of sub chars\n");
4929 }
4930 if (get_subchars[0] != new_subchars[0]){
4931 log_err("Unexpected value of sub chars\n");
4932 }
4933 ucnv_close(cnv1);
4934 ucnv_close(cnv2);
4935
4936 }
4937
4938 /* LMBCS to Unicode - offsets */
4939 {
4940 UErrorCode errorCode=U_ZERO_ERROR;
4941
4942 const char * pSource = (const char *)pszLMBCS;
4943 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4944
4945 UChar Out [sizeof(pszUnicode) + 1];
4946 UChar * pOut = Out;
4947 UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
4948
4949 int32_t off [sizeof(offsets)];
4950
4951 /* last 'offset' in expected results is just the final size.
4952 (Makes other tests easier). Compensate here: */
4953
4954 off[UPRV_LENGTHOF(offsets)-1] = sizeof(pszLMBCS);
4955
4956
4957
4958 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4959 if(U_FAILURE(errorCode)) {
4960 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4961 return;
4962 }
4963
4964
4965
4966 ucnv_toUnicode (cnv,
4967 &pOut,
4968 OutLimit,
4969 &pSource,
4970 sourceLimit,
4971 off,
4972 TRUE,
4973 &errorCode);
4974
4975
4976 if (memcmp(off,offsets,sizeof(offsets)))
4977 {
4978 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4979 }
4980 if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4981 {
4982 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4983 }
4984 ucnv_close(cnv);
4985 }
4986 {
4987 /* LMBCS to Unicode - getNextUChar */
4988 const char * sourceStart;
4989 const char *source=(const char *)pszLMBCS;
4990 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4991 const UChar32 *results= pszUnicode32;
4992 const int *off = offsets32;
4993
4994 UErrorCode errorCode=U_ZERO_ERROR;
4995 UChar32 uniChar;
4996
4997 cnv=ucnv_open("LMBCS-1", &errorCode);
4998 if(U_FAILURE(errorCode)) {
4999 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5000 return;
5001 }
5002 else
5003 {
5004
5005 while(source<limit) {
5006 sourceStart=source;
5007 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
5008 if(U_FAILURE(errorCode)) {
5009 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
5010 break;
5011 } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
5012 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
5013 uniChar, (source-sourceStart), *results, *off);
5014 break;
5015 }
5016 results++;
5017 off++;
5018 }
5019 }
5020 ucnv_close(cnv);
5021 }
5022 { /* test locale & optimization group operations: Unicode to LMBCS */
5023
5024 UErrorCode errorCode=U_ZERO_ERROR;
5025 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
5026 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
5027 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
5028 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
5029 const UChar * pUniOut = uniString;
5030 UChar * pUniIn = uniString;
5031 uint8_t lmbcsString [4];
5032 const char * pLMBCSOut = (const char *)lmbcsString;
5033 char * pLMBCSIn = (char *)lmbcsString;
5034
5035 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
5036 ucnv_fromUnicode (cnv16he,
5037 &pLMBCSIn, (pLMBCSIn + UPRV_LENGTHOF(lmbcsString)),
5038 &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5039 NULL, 1, &errorCode);
5040
5041 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
5042 {
5043 log_err("LMBCS-16,locale=he gives unexpected translation\n");
5044 }
5045
5046 pLMBCSIn= (char *)lmbcsString;
5047 pUniOut = uniString;
5048 ucnv_fromUnicode (cnv01us,
5049 &pLMBCSIn, (const char *)(lmbcsString + UPRV_LENGTHOF(lmbcsString)),
5050 &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5051 NULL, 1, &errorCode);
5052
5053 if (lmbcsString[0] != 0x9F)
5054 {
5055 log_err("LMBCS-1,locale=US gives unexpected translation\n");
5056 }
5057
5058 /* single byte char from mbcs char set */
5059 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */
5060 pLMBCSOut = (const char *)lmbcsString;
5061 pUniIn = uniString;
5062 ucnv_toUnicode (cnv16jp,
5063 &pUniIn, pUniIn + 1,
5064 &pLMBCSOut, (pLMBCSOut + 1),
5065 NULL, 1, &errorCode);
5066 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5067 {
5068 log_err("Unexpected results from LMBCS-16 single byte char\n");
5069 }
5070 /* convert to group 1: should be 3 bytes */
5071 pLMBCSIn = (char *)lmbcsString;
5072 pUniOut = uniString;
5073 ucnv_fromUnicode (cnv01us,
5074 &pLMBCSIn, (const char *)(pLMBCSIn + 3),
5075 &pUniOut, pUniOut + 1,
5076 NULL, 1, &errorCode);
5077 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
5078 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
5079 {
5080 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5081 }
5082 pLMBCSOut = (const char *)lmbcsString;
5083 pUniIn = uniString;
5084 ucnv_toUnicode (cnv01us,
5085 &pUniIn, pUniIn + 1,
5086 &pLMBCSOut, (const char *)(pLMBCSOut + 3),
5087 NULL, 1, &errorCode);
5088 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5089 {
5090 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5091 }
5092 pLMBCSIn = (char *)lmbcsString;
5093 pUniOut = uniString;
5094 ucnv_fromUnicode (cnv16jp,
5095 &pLMBCSIn, (const char *)(pLMBCSIn + 1),
5096 &pUniOut, pUniOut + 1,
5097 NULL, 1, &errorCode);
5098 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
5099 {
5100 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5101 }
5102 ucnv_close(cnv16he);
5103 ucnv_close(cnv16jp);
5104 ucnv_close(cnv01us);
5105 }
5106 {
5107 /* Small source buffer testing, LMBCS -> Unicode */
5108
5109 UErrorCode errorCode=U_ZERO_ERROR;
5110
5111 const char * pSource = (const char *)pszLMBCS;
5112 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
5113 int codepointCount = 0;
5114
5115 UChar Out [sizeof(pszUnicode) + 1];
5116 UChar * pOut = Out;
5117 UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
5118
5119
5120 cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
5121 if(U_FAILURE(errorCode)) {
5122 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5123 return;
5124 }
5125
5126
5127 while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
5128 {
5129 ucnv_toUnicode (cnv,
5130 &pOut,
5131 OutLimit,
5132 &pSource,
5133 (pSource+1), /* claim that this is a 1- byte buffer */
5134 NULL,
5135 FALSE, /* FALSE means there might be more chars in the next buffer */
5136 &errorCode);
5137
5138 if (U_SUCCESS (errorCode))
5139 {
5140 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
5141 {
5142 /* we are on to the next code point: check value */
5143
5144 if (Out[0] != pszUnicode[codepointCount]){
5145 log_err("LMBCS->Uni result %lx should have been %lx \n",
5146 Out[0], pszUnicode[codepointCount]);
5147 }
5148
5149 pOut = Out; /* reset for accumulating next code point */
5150 codepointCount++;
5151 }
5152 }
5153 else
5154 {
5155 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
5156 }
5157 }
5158 {
5159 /* limits & surrogate error testing */
5160 char LIn [sizeof(pszLMBCS)];
5161 const char * pLIn = LIn;
5162
5163 char LOut [sizeof(pszLMBCS)];
5164 char * pLOut = LOut;
5165
5166 UChar UOut [sizeof(pszUnicode)];
5167 UChar * pUOut = UOut;
5168
5169 UChar UIn [sizeof(pszUnicode)];
5170 const UChar * pUIn = UIn;
5171
5172 int32_t off [sizeof(offsets)];
5173 UChar32 uniChar;
5174
5175 errorCode=U_ZERO_ERROR;
5176
5177 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5178 pUIn++;
5179 ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode);
5180 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5181 {
5182 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
5183 }
5184 pUIn--;
5185
5186 errorCode=U_ZERO_ERROR;
5187 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
5188 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5189 {
5190 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
5191 }
5192 errorCode=U_ZERO_ERROR;
5193
5194 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
5195 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5196 {
5197 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
5198 }
5199 errorCode=U_ZERO_ERROR;
5200
5201 /* 0 byte source request - no error, no pointer movement */
5202 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
5203 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
5204 if(U_FAILURE(errorCode)) {
5205 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
5206 }
5207 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5208 {
5209 log_err("Unexpected pointer move in 0 byte source request \n");
5210 }
5211 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5212 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
5213 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
5214 {
5215 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5216 }
5217 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5218 {
5219 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5220 }
5221 errorCode = U_ZERO_ERROR;
5222
5223 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5224
5225 pUIn = pszUnicode;
5226 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+UPRV_LENGTHOF(pszUnicode),off,FALSE, &errorCode);
5227 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5228 {
5229 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5230 }
5231
5232 errorCode = U_ZERO_ERROR;
5233
5234 pLIn = (const char *)pszLMBCS;
5235 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
5236 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
5237 {
5238 log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5239 }
5240
5241 /* unpaired or chopped LMBCS surrogates */
5242
5243 /* OK high surrogate, Low surrogate is chopped */
5244 LIn [0] = (char)0x14;
5245 LIn [1] = (char)0xD8;
5246 LIn [2] = (char)0x01;
5247 LIn [3] = (char)0x14;
5248 LIn [4] = (char)0xDC;
5249 pLIn = LIn;
5250 errorCode = U_ZERO_ERROR;
5251 pUOut = UOut;
5252
5253 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
5254 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5255 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5256 {
5257 log_err("Unexpected results on chopped low surrogate\n");
5258 }
5259
5260 /* chopped at surrogate boundary */
5261 LIn [0] = (char)0x14;
5262 LIn [1] = (char)0xD8;
5263 LIn [2] = (char)0x01;
5264 pLIn = LIn;
5265 errorCode = U_ZERO_ERROR;
5266 pUOut = UOut;
5267
5268 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
5269 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5270 {
5271 log_err("Unexpected results on chopped at surrogate boundary \n");
5272 }
5273
5274 /* unpaired surrogate plus valid Unichar */
5275 LIn [0] = (char)0x14;
5276 LIn [1] = (char)0xD8;
5277 LIn [2] = (char)0x01;
5278 LIn [3] = (char)0x14;
5279 LIn [4] = (char)0xC9;
5280 LIn [5] = (char)0xD0;
5281 pLIn = LIn;
5282 errorCode = U_ZERO_ERROR;
5283 pUOut = UOut;
5284
5285 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
5286 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5287 {
5288 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5289 }
5290
5291 /* unpaired surrogate plus chopped Unichar */
5292 LIn [0] = (char)0x14;
5293 LIn [1] = (char)0xD8;
5294 LIn [2] = (char)0x01;
5295 LIn [3] = (char)0x14;
5296 LIn [4] = (char)0xC9;
5297
5298 pLIn = LIn;
5299 errorCode = U_ZERO_ERROR;
5300 pUOut = UOut;
5301
5302 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5303 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5304 {
5305 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5306 }
5307
5308 /* unpaired surrogate plus valid non-Unichar */
5309 LIn [0] = (char)0x14;
5310 LIn [1] = (char)0xD8;
5311 LIn [2] = (char)0x01;
5312 LIn [3] = (char)0x0F;
5313 LIn [4] = (char)0x3B;
5314
5315 pLIn = LIn;
5316 errorCode = U_ZERO_ERROR;
5317 pUOut = UOut;
5318
5319 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5320 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5321 {
5322 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5323 }
5324
5325 /* unpaired surrogate plus chopped non-Unichar */
5326 LIn [0] = (char)0x14;
5327 LIn [1] = (char)0xD8;
5328 LIn [2] = (char)0x01;
5329 LIn [3] = (char)0x0F;
5330
5331 pLIn = LIn;
5332 errorCode = U_ZERO_ERROR;
5333 pUOut = UOut;
5334
5335 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
5336
5337 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5338 {
5339 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5340 }
5341 }
5342 }
5343 ucnv_close(cnv); /* final cleanup */
5344}
5345
5346
5347static void TestJitterbug255()
5348{
5349 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5350 const char *testBuffer = (const char *)testBytes;
5351 const char *testEnd = (const char *)testBytes + sizeof(testBytes);
5352 UErrorCode status = U_ZERO_ERROR;
5353 /*UChar32 result;*/
5354 UConverter *cnv = 0;
5355
5356 cnv = ucnv_open("shift-jis", &status);
5357 if (U_FAILURE(status) || cnv == 0) {
5358 log_data_err("Failed to open the converter for SJIS.\n");
5359 return;
5360 }
5361 while (testBuffer != testEnd)
5362 {
5363 /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
5364 if (U_FAILURE(status))
5365 {
5366 log_err("Failed to convert the next UChar for SJIS.\n");
5367 break;
5368 }
5369 }
5370 ucnv_close(cnv);
5371}
5372
5373static void TestEBCDICUS4XML()
5374{
5375 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5376 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5377 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5378 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5379 char target_x[] = {0x00, 0x00, 0x00, 0x00};
5380 UChar *unicodes = unicodes_x;
5381 const UChar *toUnicodeMaps = toUnicodeMaps_x;
5382 char *target = target_x;
5383 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5384 UErrorCode status = U_ZERO_ERROR;
5385 UConverter *cnv = 0;
5386
5387 cnv = ucnv_open("ebcdic-xml-us", &status);
5388 if (U_FAILURE(status) || cnv == 0) {
5389 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5390 return;
5391 }
5392 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5393 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5394 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5395 u_errorName(status));
5396 printUSeqErr(unicodes_x, 3);
5397 printUSeqErr(toUnicodeMaps, 3);
5398 }
5399 status = U_ZERO_ERROR;
5400 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5401 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5402 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5403 u_errorName(status));
5404 printSeqErr((const unsigned char*)target_x, 3);
5405 printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5406 }
5407 ucnv_close(cnv);
5408}
5409#endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5410
5411#if !UCONFIG_NO_COLLATION
5412
5413static void TestJitterbug981(){
5414 const UChar* rules;
5415 int32_t rules_length, target_cap, bytes_needed, buff_size;
5416 UErrorCode status = U_ZERO_ERROR;
5417 UConverter *utf8cnv;
5418 UCollator* myCollator;
5419 char *buff;
5420 int numNeeded=0;
5421 utf8cnv = ucnv_open ("utf8", &status);
5422 if(U_FAILURE(status)){
5423 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status));
5424 return;
5425 }
5426 myCollator = ucol_open("zh", &status);
5427 if(U_FAILURE(status)){
5428 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status));
5429 ucnv_close(utf8cnv);
5430 return;
5431 }
5432
5433 rules = ucol_getRules(myCollator, &rules_length);
5434 if(rules_length == 0) {
5435 log_data_err("missing zh tailoring rule string\n");
5436 ucol_close(myCollator);
5437 ucnv_close(utf8cnv);
5438 return;
5439 }
5440 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5441 buff = malloc(buff_size);
5442
5443 target_cap = 0;
5444 do {
5445 ucnv_reset(utf8cnv);
5446 status = U_ZERO_ERROR;
5447 if(target_cap >= buff_size) {
5448 log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5449 break;
5450 }
5451 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5452 rules, rules_length, &status);
5453 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5454 if(numNeeded!=0 && numNeeded!= bytes_needed){
5455 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5456 break;
5457 }
5458 numNeeded = bytes_needed;
5459 } while (status == U_BUFFER_OVERFLOW_ERROR);
5460 ucol_close(myCollator);
5461 ucnv_close(utf8cnv);
5462 free(buff);
5463}
5464
5465#endif
5466
5467#if !UCONFIG_NO_LEGACY_CONVERSION
5468static void TestJitterbug1293(){
5469 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5470 char target[256];
5471 UErrorCode status = U_ZERO_ERROR;
5472 UConverter* conv=NULL;
5473 int32_t target_cap, bytes_needed, numNeeded = 0;
5474 conv = ucnv_open("shift-jis",&status);
5475 if(U_FAILURE(status)){
5476 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5477 return;
5478 }
5479
5480 do{
5481 target_cap =0;
5482 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5483 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5484 if(numNeeded!=0 && numNeeded!= bytes_needed){
5485 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5486 }
5487 numNeeded = bytes_needed;
5488 } while (status == U_BUFFER_OVERFLOW_ERROR);
5489 if(U_FAILURE(status)){
5490 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
5491 return;
5492 }
5493 ucnv_close(conv);
5494}
5495#endif
5496
5497static void TestJB5275_1(){
5498
5499 static const char* data = "\x3B\xB3\x0A" /* Easy characters */
5500 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5501 /* Switch script: */
5502 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5503 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5504 "\xEF\x40\x3B\xB3\x0A";
5505 static const UChar expected[] ={
5506 0x003b, 0x0a15, 0x000a, /* Easy characters */
5507 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5508 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5509 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5510 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5511 };
5512
5513 UErrorCode status = U_ZERO_ERROR;
5514 UConverter* conv = ucnv_open("iscii-gur", &status);
5515 UChar dest[100] = {'\0'};
5516 UChar* target = dest;
5517 UChar* targetLimit = dest+100;
5518 const char* source = data;
5519 const char* sourceLimit = data+strlen(data);
5520 const UChar* exp = expected;
5521
5522 if (U_FAILURE(status)) {
5523 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status));
5524 return;
5525 }
5526
5527 log_verbose("Testing switching back to default script when new line is encountered.\n");
5528 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5529 if(U_FAILURE(status)){
5530 log_err("conversion failed: %s \n", u_errorName(status));
5531 }
5532 targetLimit = target;
5533 target = dest;
5534 printUSeq(target, (int)(targetLimit-target));
5535 while(target<targetLimit){
5536 if(*exp!=*target){
5537 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5538 }
5539 target++;
5540 exp++;
5541 }
5542 ucnv_close(conv);
5543}
5544
5545static void TestJB5275(){
5546 static const char* data =
5547 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */
5548 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */
5549 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */
5550 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5551 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */
5552 "\xEF\x48\x38\xB3\x0A" /* Kannada test */
5553 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */
5554 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */
5555 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */
5556 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */;
5557 static const UChar expected[] ={
5558 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5559 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */
5560 0x0038, 0x0C95, 0x000A, /* Kannada test */
5561 0x0039, 0x0D15, 0x000A, /* Malayalam test */
5562 0x003A, 0x0A95, 0x000A, /* Gujarati test */
5563 0x003B, 0x0A15, 0x000A, /* Punjabi test */
5564 };
5565
5566 UErrorCode status = U_ZERO_ERROR;
5567 UConverter* conv = ucnv_open("iscii", &status);
5568 UChar dest[100] = {'\0'};
5569 UChar* target = dest;
5570 UChar* targetLimit = dest+100;
5571 const char* source = data;
5572 const char* sourceLimit = data+strlen(data);
5573 const UChar* exp = expected;
5574 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5575 if(U_FAILURE(status)){
5576 log_data_err("conversion failed: %s \n", u_errorName(status));
5577 }
5578 targetLimit = target;
5579 target = dest;
5580
5581 printUSeq(target, (int)(targetLimit-target));
5582
5583 while(target<targetLimit){
5584 if(*exp!=*target){
5585 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5586 }
5587 target++;
5588 exp++;
5589 }
5590 ucnv_close(conv);
5591}
5592
5593static void
5594TestIsFixedWidth() {
5595 UErrorCode status = U_ZERO_ERROR;
5596 UConverter *cnv = NULL;
5597 int32_t i;
5598
5599 const char *fixedWidth[] = {
5600 "US-ASCII",
5601 "UTF32",
5602 "ibm-5478_P100-1995"
5603 };
5604
5605 const char *notFixedWidth[] = {
5606 "GB18030",
5607 "UTF8",
5608 "windows-949-2000",
5609 "UTF16"
5610 };
5611
5612 for (i = 0; i < UPRV_LENGTHOF(fixedWidth); i++) {
5613 cnv = ucnv_open(fixedWidth[i], &status);
5614 if (cnv == NULL || U_FAILURE(status)) {
5615 log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status));
5616 continue;
5617 }
5618
5619 if (!ucnv_isFixedWidth(cnv, &status)) {
5620 log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth[i]);
5621 }
5622 ucnv_close(cnv);
5623 }
5624
5625 for (i = 0; i < UPRV_LENGTHOF(notFixedWidth); i++) {
5626 cnv = ucnv_open(notFixedWidth[i], &status);
5627 if (cnv == NULL || U_FAILURE(status)) {
5628 log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status));
5629 continue;
5630 }
5631
5632 if (ucnv_isFixedWidth(cnv, &status)) {
5633 log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth[i]);
5634 }
5635 ucnv_close(cnv);
5636 }
5637}