1 /********************************************************************
3 * Copyright (c) 1997-2003, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
10 * Modification History:
12 * Steven R. Loomis 7/8/1999 Adding input buffer test
13 *********************************************************************************
17 #include "unicode/uloc.h"
18 #include "unicode/ucnv.h"
19 #include "unicode/ucnv_err.h"
21 #include "unicode/utypes.h"
22 #include "unicode/ustring.h"
23 #include "unicode/ucol.h"
26 static void TestNextUChar(UConverter
* cnv
, const char* source
, const char* limit
, const uint32_t results
[], const char* message
);
27 static void TestNextUCharError(UConverter
* cnv
, const char* source
, const char* limit
, UErrorCode expected
, const char* message
);
28 #if !UCONFIG_NO_COLLATION
29 static void TestJitterbug981(void);
31 static void TestJitterbug1293(void);
32 static void TestNewConvertWithBufferSizes(int32_t osize
, int32_t isize
) ;
33 static void TestConverterTypesAndStarters(void);
34 static void TestAmbiguous(void);
35 static void TestSignatureDetection(void);
36 static void TestUTF7(void);
37 static void TestIMAP(void);
38 static void TestUTF8(void);
39 static void TestCESU8(void);
40 static void TestUTF16(void);
41 static void TestUTF16BE(void);
42 static void TestUTF16LE(void);
43 static void TestUTF32(void);
44 static void TestUTF32BE(void);
45 static void TestUTF32LE(void);
46 static void TestLATIN1(void);
47 static void TestSBCS(void);
48 static void TestDBCS(void);
49 static void TestMBCS(void);
50 static void TestISO_2022(void);
51 static void TestISO_2022_JP(void);
52 static void TestISO_2022_JP_1(void);
53 static void TestISO_2022_JP_2(void);
54 static void TestISO_2022_KR(void);
55 static void TestISO_2022_KR_1(void);
56 static void TestISO_2022_CN(void);
57 static void TestISO_2022_CN_EXT(void);
58 static void TestJIS(void);
59 static void TestHZ(void);
60 static void TestSCSU(void);
61 static void TestEBCDIC_STATEFUL(void);
62 static void TestGB18030(void);
63 static void TestLMBCS(void);
64 static void TestJitterbug255(void);
65 static void TestEBCDICUS4XML(void);
66 static void TestJitterbug915(void);
67 static void TestISCII(void);
68 static void TestConv(const uint16_t in
[],
74 static void TestRoundTrippingAllUTF(void);
75 static void TestCoverageMBCS(void);
76 static void TestJitterbug2346(void);
77 static void TestJitterbug2411(void);
78 void addTestNewConvert(TestNode
** root
);
80 /* open a converter, using test data if it begins with '@' */
81 static UConverter
*my_ucnv_open(const char *cnv
, UErrorCode
*err
);
84 #define NEW_MAX_BUFFER 999
86 static int32_t gInBufferSize
= NEW_MAX_BUFFER
;
87 static int32_t gOutBufferSize
= NEW_MAX_BUFFER
;
88 static char gNuConvTestName
[1024];
90 #define nct_min(x,y) ((x<y) ? x : y)
92 static UConverter
*my_ucnv_open(const char *cnv
, UErrorCode
*err
)
94 if(cnv
&& cnv
[0] == '@') {
95 return ucnv_openPackage("testdata", cnv
+1, err
);
97 return ucnv_open(cnv
, err
);
101 static void printSeq(const unsigned char* a
, int len
)
106 log_verbose("0x%02x ", a
[i
++]);
110 static void printUSeq(const UChar
* a
, int len
)
114 while (i
<len
) log_verbose("0x%04x ", a
[i
++]);
118 static void printSeqErr(const unsigned char* a
, int len
)
121 fprintf(stderr
, "{");
123 fprintf(stderr
, "0x%02x ", a
[i
++]);
124 fprintf(stderr
, "}\n");
127 static void printUSeqErr(const UChar
* a
, int len
)
130 fprintf(stderr
, "{U+");
132 fprintf(stderr
, "0x%04x ", a
[i
++]);
133 fprintf(stderr
,"}\n");
137 TestNextUChar(UConverter
* cnv
, const char* source
, const char* limit
, const uint32_t results
[], const char* message
)
140 const char* s
=(char*)source
;
141 const uint32_t *r
=results
;
142 UErrorCode errorCode
=U_ZERO_ERROR
;
147 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
148 if(errorCode
==U_INDEX_OUTOFBOUNDS_ERROR
) {
149 break; /* no more significant input */
150 } else if(U_FAILURE(errorCode
)) {
151 log_err("%s ucnv_getNextUChar() failed: %s\n", message
, u_errorName(errorCode
));
153 } else if((uint32_t)(s
-s0
)!=*r
|| c
!=*(r
+1)) {
154 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
155 message
, c
, (s
-s0
), *(r
+1), *r
);
163 TestNextUCharError(UConverter
* cnv
, const char* source
, const char* limit
, UErrorCode expected
, const char* message
)
165 const char* s
=(char*)source
;
166 UErrorCode errorCode
=U_ZERO_ERROR
;
168 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
169 if(errorCode
!= expected
){
170 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected
), message
, myErrorName(errorCode
));
172 if(c
!= 0xFFFD && c
!= 0xffff){
173 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message
, c
);
178 static void TestInBufSizes(void)
180 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,1);
182 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,2);
183 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,3);
184 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,4);
185 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,5);
186 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,6);
187 TestNewConvertWithBufferSizes(1,1);
188 TestNewConvertWithBufferSizes(2,3);
189 TestNewConvertWithBufferSizes(3,2);
193 static void TestOutBufSizes(void)
196 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,NEW_MAX_BUFFER
);
197 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER
);
198 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER
);
199 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER
);
200 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER
);
201 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER
);
207 void addTestNewConvert(TestNode
** root
)
209 addTest(root
, &TestInBufSizes
, "tsconv/nucnvtst/TestInBufSizes");
210 addTest(root
, &TestOutBufSizes
, "tsconv/nucnvtst/TestOutBufSizes");
211 addTest(root
, &TestConverterTypesAndStarters
, "tsconv/nucnvtst/TestConverterTypesAndStarters");
212 addTest(root
, &TestAmbiguous
, "tsconv/nucnvtst/TestAmbiguous");
213 addTest(root
, &TestSignatureDetection
, "tsconv/nucnvtst/TestSignatureDetection");
214 addTest(root
, &TestUTF7
, "tsconv/nucnvtst/TestUTF7");
215 addTest(root
, &TestIMAP
, "tsconv/nucnvtst/TestIMAP");
216 addTest(root
, &TestUTF8
, "tsconv/nucnvtst/TestUTF8");
217 addTest(root
, &TestCESU8
, "tsconv/nucnvtst/TestCESU8");
218 addTest(root
, &TestUTF16
, "tsconv/nucnvtst/TestUTF16");
219 addTest(root
, &TestUTF16BE
, "tsconv/nucnvtst/TestUTF16BE");
220 addTest(root
, &TestUTF16LE
, "tsconv/nucnvtst/TestUTF16LE");
221 addTest(root
, &TestUTF32
, "tsconv/nucnvtst/TestUTF32");
222 addTest(root
, &TestUTF32BE
, "tsconv/nucnvtst/TestUTF32BE");
223 addTest(root
, &TestUTF32LE
, "tsconv/nucnvtst/TestUTF32LE");
224 addTest(root
, &TestLATIN1
, "tsconv/nucnvtst/TestLATIN1");
225 addTest(root
, &TestSBCS
, "tsconv/nucnvtst/TestSBCS");
226 addTest(root
, &TestDBCS
, "tsconv/nucnvtst/TestDBCS");
227 addTest(root
, &TestMBCS
, "tsconv/nucnvtst/TestMBCS");
228 addTest(root
, &TestISO_2022
, "tsconv/nucnvtst/TestISO_2022");
229 addTest(root
, &TestISO_2022_JP
, "tsconv/nucnvtst/TestISO_2022_JP");
230 addTest(root
, &TestJIS
, "tsconv/nucnvtst/TestJIS");
231 addTest(root
, &TestISO_2022_JP_1
, "tsconv/nucnvtst/TestISO_2022_JP_1");
232 addTest(root
, &TestISO_2022_JP_2
, "tsconv/nucnvtst/TestISO_2022_JP_2");
233 addTest(root
, &TestISO_2022_KR
, "tsconv/nucnvtst/TestISO_2022_KR");
234 addTest(root
, &TestISO_2022_KR_1
, "tsconv/nucnvtst/TestISO_2022_KR_1");
235 addTest(root
, &TestISO_2022_CN
, "tsconv/nucnvtst/TestISO_2022_CN");
236 addTest(root
, &TestISO_2022_CN_EXT
, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
237 addTest(root
, &TestJitterbug915
, "tsconv/nucnvtst/TestJitterbug915");
238 addTest(root
, &TestHZ
, "tsconv/nucnvtst/TestHZ");
239 addTest(root
, &TestSCSU
, "tsconv/nucnvtst/TestSCSU");
240 addTest(root
, &TestEBCDIC_STATEFUL
, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
241 addTest(root
, &TestGB18030
, "tsconv/nucnvtst/TestGB18030");
242 addTest(root
, &TestLMBCS
, "tsconv/nucnvtst/TestLMBCS");
243 addTest(root
, &TestJitterbug255
, "tsconv/nucnvtst/TestJitterbug255");
244 addTest(root
, &TestEBCDICUS4XML
, "tsconv/nucnvtst/TestEBCDICUS4XML");
245 addTest(root
, &TestISCII
, "tsconv/nucnvtst/TestISCII");
246 #if !UCONFIG_NO_COLLATION
247 addTest(root
, &TestJitterbug981
, "tsconv/nucnvtst/TestJitterbug981");
249 addTest(root
, &TestJitterbug1293
, "tsconv/nucnvtst/TestJitterbug1293");
250 addTest(root
, &TestCoverageMBCS
, "tsconv/nucnvtst/TestCoverageMBCS");
251 addTest(root
, &TestRoundTrippingAllUTF
, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
252 addTest(root
, &TestJitterbug2346
, "tsconv/nucnvtst/TestJitterbug2346");
253 addTest(root
, &TestJitterbug2411
, "tsconv/nucnvtst/TestJitterbug2411");
258 /* Note that this test already makes use of statics, so it's not really
260 This convenience function lets us make the error messages actually useful.
263 static void setNuConvTestName(const char *codepage
, const char *direction
)
265 sprintf(gNuConvTestName
, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
274 TC_OK
= 0, /* test was OK */
275 TC_MISMATCH
= 1, /* Match failed - err was printed */
276 TC_FAIL
= 2 /* Test failed, don't print an err because it was already printed. */
277 } ETestConvertResult
;
279 /* Note: This function uses global variables and it will not do offset
280 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
281 static ETestConvertResult
testConvertFromU( const UChar
*source
, int sourceLen
, const uint8_t *expect
, int expectLen
,
282 const char *codepage
, const int32_t *expectOffsets
, UBool useFallback
)
284 UErrorCode status
= U_ZERO_ERROR
;
285 UConverter
*conv
= 0;
286 uint8_t junkout
[NEW_MAX_BUFFER
]; /* FIX */
287 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
294 int32_t realBufferSize
;
295 uint8_t *realBufferEnd
;
296 const UChar
*realSourceEnd
;
297 const UChar
*sourceLimit
;
298 UBool checkOffsets
= TRUE
;
301 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
303 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
306 setNuConvTestName(codepage
, "FROM");
308 log_verbose("\n========= %s\n", gNuConvTestName
);
310 conv
= my_ucnv_open(codepage
, &status
);
312 if(U_FAILURE(status
))
314 log_data_err("Couldn't open converter %s\n",codepage
);
318 ucnv_setFallback(conv
,useFallback
);
321 log_verbose("Converter opened..\n");
327 realBufferSize
= (sizeof(junkout
)/sizeof(junkout
[0]));
328 realBufferEnd
= junkout
+ realBufferSize
;
329 realSourceEnd
= source
+ sourceLen
;
331 if ( gOutBufferSize
!= realBufferSize
|| gInBufferSize
!= NEW_MAX_BUFFER
)
332 checkOffsets
= FALSE
;
336 end
= nct_min(targ
+ gOutBufferSize
, realBufferEnd
);
337 sourceLimit
= nct_min(src
+ gInBufferSize
, realSourceEnd
);
339 doFlush
= (UBool
)(sourceLimit
== realSourceEnd
);
341 if(targ
== realBufferEnd
) {
342 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ
, gNuConvTestName
);
345 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src
,sourceLimit
, targ
,end
, doFlush
?"TRUE":"FALSE");
348 status
= U_ZERO_ERROR
;
350 ucnv_fromUnicode (conv
,
355 checkOffsets
? offs
: NULL
,
356 doFlush
, /* flush if we're at the end of the input data */
358 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && sourceLimit
< realSourceEnd
) );
360 if(U_FAILURE(status
)) {
361 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage
, myErrorName(status
), gNuConvTestName
);
365 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
366 sourceLen
, targ
-junkout
);
371 char offset_str
[9999];
376 for(ptr
= junkout
;ptr
<targ
;ptr
++) {
377 sprintf(junk
+ strlen(junk
), "0x%02x, ", (int)(0xFF & *ptr
));
378 sprintf(offset_str
+ strlen(offset_str
), "0x%02x, ", (int)(0xFF & junokout
[ptr
-junkout
]));
382 printSeq((const uint8_t *)expect
, expectLen
);
383 if ( checkOffsets
) {
384 log_verbose("\nOffsets:");
385 log_verbose(offset_str
);
391 if(expectLen
!= targ
-junkout
) {
392 log_err("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
393 log_verbose("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
395 printSeqErr((const unsigned char*)junkout
, targ
-junkout
);
396 printf("\nExpected:");
397 printSeqErr((const unsigned char*)expect
, expectLen
);
401 if (checkOffsets
&& (expectOffsets
!= 0) ) {
402 log_verbose("comparing %d offsets..\n", targ
-junkout
);
403 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t) )){
404 log_err("did not get the expected offsets. %s\n", gNuConvTestName
);
405 printSeqErr((const unsigned char*)junkout
, targ
-junkout
);
408 for(p
=junkout
;p
<targ
;p
++) {
409 log_err("%d,", junokout
[p
-junkout
]);
412 log_err("Expected: ");
413 for(i
=0; i
<(targ
-junkout
); i
++) {
414 log_err("%d,", expectOffsets
[i
]);
420 log_verbose("comparing..\n");
421 if(!memcmp(junkout
, expect
, expectLen
)) {
422 log_verbose("Matches!\n");
425 log_err("String does not match u->%s\n", gNuConvTestName
);
426 printUSeqErr(source
, sourceLen
);
428 printSeqErr((const unsigned char *)junkout
, expectLen
);
429 printf("\nExpected:");
430 printSeqErr((const unsigned char *)expect
, expectLen
);
436 /* Note: This function uses global variables and it will not do offset
437 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
438 static ETestConvertResult
testConvertToU( const uint8_t *source
, int sourcelen
, const UChar
*expect
, int expectlen
,
439 const char *codepage
, const int32_t *expectOffsets
, UBool useFallback
)
441 UErrorCode status
= U_ZERO_ERROR
;
442 UConverter
*conv
= 0;
443 UChar junkout
[NEW_MAX_BUFFER
]; /* FIX */
444 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
446 const uint8_t *realSourceEnd
;
447 const uint8_t *srcLimit
;
453 UBool checkOffsets
= TRUE
;
455 int32_t realBufferSize
;
456 UChar
*realBufferEnd
;
459 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
462 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
465 setNuConvTestName(codepage
, "TO");
467 log_verbose("\n========= %s\n", gNuConvTestName
);
469 conv
= my_ucnv_open(codepage
, &status
);
471 if(U_FAILURE(status
))
473 log_data_err("Couldn't open converter %s\n",gNuConvTestName
);
477 ucnv_setFallback(conv
,useFallback
);
479 log_verbose("Converter opened..\n");
485 realBufferSize
= (sizeof(junkout
)/sizeof(junkout
[0]));
486 realBufferEnd
= junkout
+ realBufferSize
;
487 realSourceEnd
= src
+ sourcelen
;
489 if ( gOutBufferSize
!= realBufferSize
|| gInBufferSize
!= NEW_MAX_BUFFER
)
490 checkOffsets
= FALSE
;
494 end
= nct_min( targ
+ gOutBufferSize
, realBufferEnd
);
495 srcLimit
= nct_min(realSourceEnd
, src
+ gInBufferSize
);
497 if(targ
== realBufferEnd
)
499 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ
,gNuConvTestName
);
502 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ
,end
);
504 /* oldTarg = targ; */
506 status
= U_ZERO_ERROR
;
508 ucnv_toUnicode (conv
,
512 (const char *)srcLimit
,
513 checkOffsets
? offs
: NULL
,
514 (UBool
)(srcLimit
== realSourceEnd
), /* flush if we're at the end of hte source data */
517 /* offs += (targ-oldTarg); */
519 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (srcLimit
< realSourceEnd
)) ); /* while we just need another buffer */
521 if(U_FAILURE(status
))
523 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage
, myErrorName(status
), gNuConvTestName
);
527 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
528 sourcelen
, targ
-junkout
);
532 char offset_str
[9999];
538 for(ptr
= junkout
;ptr
<targ
;ptr
++)
540 sprintf(junk
+ strlen(junk
), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr
);
541 sprintf(offset_str
+ strlen(offset_str
), "0x%04x, ", (0xFFFF) & (unsigned int)junokout
[ptr
-junkout
]);
545 printUSeq(expect
, expectlen
);
548 log_verbose("\nOffsets:");
549 log_verbose(offset_str
);
555 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen
,expectlen
*2);
557 if (checkOffsets
&& (expectOffsets
!= 0))
559 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t))){
560 log_err("did not get the expected offsets. %s\n",gNuConvTestName
);
562 for(p
=junkout
;p
<targ
;p
++) {
563 log_err("%d,", junokout
[p
-junkout
]);
566 log_err("Expected: ");
567 for(i
=0; i
<(targ
-junkout
); i
++) {
568 log_err("%d,", expectOffsets
[i
]);
572 for(i
=0; i
<(targ
-junkout
); i
++) {
573 log_err("%X,", junkout
[i
]);
577 for(i
=0; i
<(src
-source
); i
++) {
578 log_err("%X,", (unsigned char)source
[i
]);
584 if(!memcmp(junkout
, expect
, expectlen
*2))
586 log_verbose("Matches!\n");
591 log_err("String does not match. %s\n", gNuConvTestName
);
592 log_verbose("String does not match. %s\n", gNuConvTestName
);
594 printUSeqErr(junkout
, expectlen
);
595 printf("\nExpected:");
596 printUSeqErr(expect
, expectlen
);
602 static void TestNewConvertWithBufferSizes(int32_t outsize
, int32_t insize
)
605 /* 1 2 3 1Han 2Han 3Han . */
607 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E };
610 const uint8_t expectedUTF8
[] =
611 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
612 int32_t toUTF8Offs
[] =
613 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07};
614 int32_t fmUTF8Offs
[] =
615 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };
617 /* Same as UTF8, but with ^[%B preceeding */
618 const uint8_t expectedISO2022
[] =
619 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
620 int32_t toISO2022Offs
[] =
621 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
622 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
623 int32_t fmISO2022Offs
[] =
624 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
626 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
627 const uint8_t expectedIBM930
[] =
628 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B };
629 int32_t toIBM930Offs
[] =
630 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, };
631 int32_t fmIBM930Offs
[] =
632 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c};
634 /* 1 2 3 0 h1 h2 h3 . MBCS*/
635 const uint8_t expectedIBM943
[] =
636 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e };
637 int32_t toIBM943Offs
[] =
638 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07 };
639 int32_t fmIBM943Offs
[] =
640 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a};
642 /* 1 2 3 0 h1 h2 h3 . DBCS*/
643 const uint8_t expectedIBM9027
[] =
644 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe};
645 int32_t toIBM9027Offs
[] =
646 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
648 /* 1 2 3 0 <?> <?> <?> . SBCS*/
649 const uint8_t expectedIBM920
[] =
650 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e };
651 int32_t toIBM920Offs
[] =
652 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
654 /* 1 2 3 0 <?> <?> <?> . SBCS*/
655 const uint8_t expectedISO88593
[] =
656 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
657 int32_t toISO88593Offs
[] =
658 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
660 /* 1 2 3 0 <?> <?> <?> . LATIN_1*/
661 const uint8_t expectedLATIN1
[] =
662 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
663 int32_t toLATIN1Offs
[] =
664 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
668 const uint8_t expectedUTF16BE
[] =
669 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e };
670 int32_t toUTF16BEOffs
[]=
671 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
672 int32_t fmUTF16BEOffs
[] =
673 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e };
675 const uint8_t expectedUTF16LE
[] =
676 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00 };
677 int32_t toUTF16LEOffs
[]=
678 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
679 int32_t fmUTF16LEOffs
[] =
680 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e };
682 const uint8_t expectedUTF32BE
[] =
683 { 0x00, 0x00, 0x00, 0x31,
684 0x00, 0x00, 0x00, 0x32,
685 0x00, 0x00, 0x00, 0x33,
686 0x00, 0x00, 0x00, 0x00,
687 0x00, 0x00, 0x4e, 0x00,
688 0x00, 0x00, 0x4e, 0x8c,
689 0x00, 0x00, 0x4e, 0x09,
690 0x00, 0x00, 0x00, 0x2e };
691 int32_t toUTF32BEOffs
[]=
692 { 0x00, 0x00, 0x00, 0x00,
693 0x01, 0x01, 0x01, 0x01,
694 0x02, 0x02, 0x02, 0x02,
695 0x03, 0x03, 0x03, 0x03,
696 0x04, 0x04, 0x04, 0x04,
697 0x05, 0x05, 0x05, 0x05,
698 0x06, 0x06, 0x06, 0x06,
699 0x07, 0x07, 0x07, 0x07,
700 0x08, 0x08, 0x08, 0x08 };
701 int32_t fmUTF32BEOffs
[] =
702 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c };
704 const uint8_t expectedUTF32LE
[] =
705 { 0x31, 0x00, 0x00, 0x00,
706 0x32, 0x00, 0x00, 0x00,
707 0x33, 0x00, 0x00, 0x00,
708 0x00, 0x00, 0x00, 0x00,
709 0x00, 0x4e, 0x00, 0x00,
710 0x8c, 0x4e, 0x00, 0x00,
711 0x09, 0x4e, 0x00, 0x00,
712 0x2e, 0x00, 0x00, 0x00 };
713 int32_t toUTF32LEOffs
[]=
714 { 0x00, 0x00, 0x00, 0x00,
715 0x01, 0x01, 0x01, 0x01,
716 0x02, 0x02, 0x02, 0x02,
717 0x03, 0x03, 0x03, 0x03,
718 0x04, 0x04, 0x04, 0x04,
719 0x05, 0x05, 0x05, 0x05,
720 0x06, 0x06, 0x06, 0x06,
721 0x07, 0x07, 0x07, 0x07,
722 0x08, 0x08, 0x08, 0x08 };
723 int32_t fmUTF32LEOffs
[] =
724 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c };
729 /** Test chars #2 **/
731 /* Sahha [health], slashed h's */
732 const UChar malteseUChars
[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
733 const uint8_t expectedMaltese913
[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
736 const UChar LMBCSUChars
[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
737 const uint8_t expectedLMBCS
[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
738 int32_t toLMBCSOffs
[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
739 int32_t fmLMBCSOffs
[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
740 /*********************************** START OF CODE finally *************/
742 gInBufferSize
= insize
;
743 gOutBufferSize
= outsize
;
745 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize
, gOutBufferSize
);
750 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
751 expectedUTF8
, sizeof(expectedUTF8
), "UTF8", toUTF8Offs
,FALSE
);
753 log_verbose("Test surrogate behaviour for UTF8\n");
755 const UChar testinput
[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
756 const uint8_t expectedUTF8test2
[]= { 0xe2, 0x82, 0xac,
757 0xf0, 0x90, 0x90, 0x81,
760 int32_t offsets
[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
761 testConvertFromU(testinput
, sizeof(testinput
)/sizeof(testinput
[0]),
762 expectedUTF8test2
, sizeof(expectedUTF8test2
), "UTF8", offsets
,FALSE
);
767 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
768 expectedISO2022
, sizeof(expectedISO2022
), "ISO_2022", toISO2022Offs
,FALSE
);
770 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
771 expectedUTF16LE
, sizeof(expectedUTF16LE
), "utf-16le", toUTF16LEOffs
,FALSE
);
773 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
774 expectedUTF16BE
, sizeof(expectedUTF16BE
), "utf-16be", toUTF16BEOffs
,FALSE
);
776 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
777 expectedUTF32LE
, sizeof(expectedUTF32LE
), "utf-32le", toUTF32LEOffs
,FALSE
);
779 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
780 expectedUTF32BE
, sizeof(expectedUTF32BE
), "utf-32be", toUTF32BEOffs
,FALSE
);
782 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
783 expectedLATIN1
, sizeof(expectedLATIN1
), "LATIN_1", toLATIN1Offs
,FALSE
);
785 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
786 expectedIBM930
, sizeof(expectedIBM930
), "ibm-930", toIBM930Offs
,FALSE
);
788 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
789 expectedISO88593
, sizeof(expectedISO88593
), "iso-8859-3", toISO88593Offs
,FALSE
);
793 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
794 expectedIBM943
, sizeof(expectedIBM943
), "ibm-943", toIBM943Offs
,FALSE
);
796 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
797 expectedIBM9027
, sizeof(expectedIBM9027
), "@ibm9027", toIBM9027Offs
,FALSE
);
799 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
800 expectedIBM920
, sizeof(expectedIBM920
), "ibm-920", toIBM920Offs
,FALSE
);
802 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
803 expectedISO88593
, sizeof(expectedISO88593
), "iso-8859-3", toISO88593Offs
,FALSE
);
811 testConvertToU(expectedUTF8
, sizeof(expectedUTF8
),
812 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf8", fmUTF8Offs
,FALSE
);
814 testConvertToU(expectedISO2022
, sizeof(expectedISO2022
),
815 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "ISO_2022", fmISO2022Offs
,FALSE
);
817 testConvertToU(expectedUTF16LE
, sizeof(expectedUTF16LE
),
818 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf-16le", fmUTF16LEOffs
,FALSE
);
820 testConvertToU(expectedUTF16BE
, sizeof(expectedUTF16BE
),
821 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf-16be", fmUTF16BEOffs
,FALSE
);
823 testConvertToU(expectedUTF32LE
, sizeof(expectedUTF32LE
),
824 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf-32le", fmUTF32LEOffs
,FALSE
);
826 testConvertToU(expectedUTF32BE
, sizeof(expectedUTF32BE
),
827 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf-32be", fmUTF32BEOffs
,FALSE
);
829 testConvertToU(expectedIBM930
, sizeof(expectedIBM930
),
830 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "ibm-930", fmIBM930Offs
,FALSE
);
832 testConvertToU(expectedIBM943
, sizeof(expectedIBM943
),
833 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "ibm-943", fmIBM943Offs
,FALSE
);
835 /* Try it again to make sure it still works */
836 testConvertToU(expectedUTF16LE
, sizeof(expectedUTF16LE
),
837 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf-16le", fmUTF16LEOffs
,FALSE
);
839 testConvertToU(expectedMaltese913
, sizeof(expectedMaltese913
),
840 malteseUChars
, sizeof(malteseUChars
)/sizeof(malteseUChars
[0]), "latin3", NULL
,FALSE
);
842 testConvertFromU(malteseUChars
, sizeof(malteseUChars
)/sizeof(malteseUChars
[0]),
843 expectedMaltese913
, sizeof(expectedMaltese913
), "iso-8859-3", NULL
,FALSE
);
846 testConvertFromU(LMBCSUChars
, sizeof(LMBCSUChars
)/sizeof(LMBCSUChars
[0]),
847 expectedLMBCS
, sizeof(expectedLMBCS
), "LMBCS-1", toLMBCSOffs
,FALSE
);
848 testConvertToU(expectedLMBCS
, sizeof(expectedLMBCS
),
849 LMBCSUChars
, sizeof(LMBCSUChars
)/sizeof(LMBCSUChars
[0]), "LMBCS-1", fmLMBCSOffs
,FALSE
);
851 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
853 /* encode directly set D and set O */
854 static const uint8_t utf7
[] = {
861 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
862 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
864 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
866 static const UChar unicode
[] = {
868 Hi Mom -<WHITE SMILING FACE>-!
869 A<NOT IDENTICAL TO><ALPHA>.
871 [Japanese word "nihongo"]
873 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
874 0x41, 0x2262, 0x0391, 0x2e,
876 0x65e5, 0x672c, 0x8a9e
878 static const int32_t toUnicodeOffsets
[] = {
879 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
884 static const int32_t fromUnicodeOffsets
[] = {
885 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
886 11, 12, 12, 12, 13, 13, 13, 13, 14,
888 16, 16, 16, 17, 17, 17, 18, 18, 18
891 /* same but escaping set O (the exclamation mark) */
892 static const uint8_t utf7Restricted
[] = {
899 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
900 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
902 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
904 static const int32_t toUnicodeOffsetsR
[] = {
905 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
910 static const int32_t fromUnicodeOffsetsR
[] = {
911 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
912 11, 12, 12, 12, 13, 13, 13, 13, 14,
914 16, 16, 16, 17, 17, 17, 18, 18, 18
917 testConvertFromU(unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, utf7
, sizeof(utf7
), "UTF-7", fromUnicodeOffsets
,FALSE
);
919 testConvertToU(utf7
, sizeof(utf7
), unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, "UTF-7", toUnicodeOffsets
,FALSE
);
921 testConvertFromU(unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, utf7Restricted
, sizeof(utf7Restricted
), "UTF-7,version=1", fromUnicodeOffsetsR
,FALSE
);
923 testConvertToU(utf7Restricted
, sizeof(utf7Restricted
), unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, "UTF-7,version=1", toUnicodeOffsetsR
,FALSE
);
927 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
928 * modified according to RFC 2060,
929 * and supplemented with the one example in RFC 2060 itself.
932 static const uint8_t imap
[] = {
943 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
944 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
946 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
948 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
949 0x2f, 0x6d, 0x61, 0x69, 0x6c,
950 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
951 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
953 static const UChar unicode
[] = {
954 /* Hi Mom -<WHITE SMILING FACE>-!
955 A<NOT IDENTICAL TO><ALPHA>.
957 [Japanese word "nihongo"]
964 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
965 0x41, 0x2262, 0x0391, 0x2e,
967 0x65e5, 0x672c, 0x8a9e,
969 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
970 0x2f, 0x6d, 0x61, 0x69, 0x6c,
971 0x2f, 0x65e5, 0x672c, 0x8a9e,
974 static const int32_t toUnicodeOffsets
[] = {
975 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
980 38, 39, 40, 41, 42, 43,
985 static const int32_t fromUnicodeOffsets
[] = {
986 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
987 11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
989 16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
991 20, 21, 22, 23, 24, 25,
993 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
994 35, 36, 36, 36, 37, 37, 37, 37, 37
997 testConvertFromU(unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, imap
, sizeof(imap
), "IMAP-mailbox-name", fromUnicodeOffsets
,FALSE
);
999 testConvertToU(imap
, sizeof(imap
), unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, "IMAP-mailbox-name", toUnicodeOffsets
,FALSE
);
1002 /* Test UTF-8 bad data handling*/
1004 static const uint8_t utf8
[]={
1006 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1009 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1010 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1011 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */
1012 0xdf, 0xbf, /* 7ff */
1013 0xbf, /* truncated tail */
1014 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */
1018 static const uint16_t utf8Expected
[]={
1032 static const int32_t utf8Offsets
[]={
1033 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
1035 testConvertToU(utf8
, sizeof(utf8
),
1036 utf8Expected
, sizeof(utf8Expected
)/sizeof(utf8Expected
[0]), "utf-8", utf8Offsets
,FALSE
);
1040 /* Test UTF-32BE bad data handling*/
1042 static const uint8_t utf32
[]={
1043 0x00, 0x00, 0x00, 0x61,
1044 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
1045 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1046 0x00, 0x00, 0x00, 0x62,
1047 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1048 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
1049 0x00, 0x00, 0x01, 0x62,
1050 0x00, 0x00, 0x02, 0x62
1053 static const uint16_t utf32Expected
[]={
1055 0xfffd, /* 0x110000 out of range */
1056 0xDBFF, /* 0x10FFFF in range */
1059 0xfffd, /* 0xffffffff out of range */
1060 0xfffd, /* 0x7fffffff out of range */
1065 static const int32_t utf32Offsets
[]={
1066 0, 4, 8, 8, 12, 16, 20, 24, 28
1068 testConvertToU(utf32
, sizeof(utf32
),
1069 utf32Expected
, sizeof(utf32Expected
)/sizeof(utf32Expected
[0]), "utf-32be", utf32Offsets
,FALSE
);
1073 /* Test UTF-32LE bad data handling*/
1075 static const uint8_t utf32
[]={
1076 0x61, 0x00, 0x00, 0x00,
1077 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
1078 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1079 0x62, 0x00, 0x00, 0x00,
1080 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1081 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
1082 0x62, 0x01, 0x00, 0x00,
1083 0x62, 0x02, 0x00, 0x00,
1086 static const uint16_t utf32Expected
[]={
1088 0xfffd, /* 0x110000 out of range */
1089 0xDBFF, /* 0x10FFFF in range */
1092 0xfffd, /* 0xffffffff out of range */
1093 0xfffd, /* 0x7fffffff out of range */
1098 static const int32_t utf32Offsets
[]={
1099 0, 4, 8, 8, 12, 16, 20, 24, 28
1101 testConvertToU(utf32
, sizeof(utf32
),
1102 utf32Expected
, sizeof(utf32Expected
)/sizeof(utf32Expected
[0]), "utf-32le", utf32Offsets
,FALSE
);
1107 static void TestCoverageMBCS(){
1109 UErrorCode status
= U_ZERO_ERROR
;
1110 const char *directory
= loadTestData(&status
);
1111 char* tdpath
= NULL
;
1112 char* saveDirectory
= (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1113 int len
= strlen(directory
);
1116 tdpath
= (char*) malloc(sizeof(char) * (len
* 2));
1117 uprv_strcpy(saveDirectory
,u_getDataDirectory());
1118 log_verbose("Retrieved data directory %s \n",saveDirectory
);
1119 uprv_strcpy(tdpath
,directory
);
1120 index
=strrchr(tdpath
,(char)U_FILE_SEP_CHAR
);
1122 if((unsigned int)(index
-tdpath
) != (strlen(tdpath
)-1)){
1125 u_setDataDirectory(tdpath
);
1126 log_verbose("ICU data directory is set to: %s \n" ,tdpath
);
1129 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
1130 which is test file for MBCS conversion with single-byte codepage data.*/
1133 /* MBCS with single byte codepage data test1.ucm*/
1134 const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1135 const uint8_t expectedtest1
[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1136 int32_t totest1Offs
[] = { 0, 1, 2, 3, 5, };
1138 const uint8_t test1input
[] = { 0x00, 0x05, 0x06, 0x07, 0x08, 0x09};
1139 const UChar expectedUnicode
[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xfffd, 0xfffd};
1140 int32_t fromtest1Offs
[] = { 0, 1, 2, 3, 3, 4, 5};
1143 testConvertFromU(unicodeInput
, sizeof(unicodeInput
)/sizeof(unicodeInput
[0]),
1144 expectedtest1
, sizeof(expectedtest1
), "@test1", totest1Offs
,FALSE
);
1147 testConvertToU(test1input
, sizeof(test1input
),
1148 expectedUnicode
, sizeof(expectedUnicode
)/sizeof(expectedUnicode
[0]), "@test1", fromtest1Offs
,FALSE
);
1152 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
1153 which is test file for MBCS conversion with three-byte codepage data.*/
1156 /* MBCS with three byte codepage data test3.ucm*/
1157 const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1158 const uint8_t expectedtest3
[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,};
1159 int32_t totest3Offs
[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1161 const uint8_t test3input
[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1162 const UChar expectedUnicode
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1163 int32_t fromtest3Offs
[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1166 testConvertFromU(unicodeInput
, sizeof(unicodeInput
)/sizeof(unicodeInput
[0]),
1167 expectedtest3
, sizeof(expectedtest3
), "@test3", totest3Offs
,FALSE
);
1170 testConvertToU(test3input
, sizeof(test3input
),
1171 expectedUnicode
, sizeof(expectedUnicode
)/sizeof(expectedUnicode
[0]), "@test3", fromtest3Offs
,FALSE
);
1175 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
1176 which is test file for MBCS conversion with four-byte codepage data.*/
1179 /* MBCS with three byte codepage data test4.ucm*/
1180 static const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1181 static const uint8_t expectedtest4
[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,};
1182 static const int32_t totest4Offs
[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1184 static const uint8_t test4input
[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1185 static const UChar expectedUnicode
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1186 static const int32_t fromtest4Offs
[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1189 testConvertFromU(unicodeInput
, sizeof(unicodeInput
)/sizeof(unicodeInput
[0]),
1190 expectedtest4
, sizeof(expectedtest4
), "@test4", totest4Offs
,FALSE
);
1193 testConvertToU(test4input
, sizeof(test4input
),
1194 expectedUnicode
, sizeof(expectedUnicode
)/sizeof(expectedUnicode
[0]), "@test4", fromtest4Offs
,FALSE
);
1199 /* restore the original data directory */
1200 log_verbose("Setting the data directory to %s \n", saveDirectory
);
1201 u_setDataDirectory(saveDirectory
);
1202 free(saveDirectory
);
1207 static void TestConverterType(const char *convName
, UConverterType convType
) {
1208 UConverter
* myConverter
;
1209 UErrorCode err
= U_ZERO_ERROR
;
1211 myConverter
= my_ucnv_open(convName
, &err
);
1213 if (U_FAILURE(err
)) {
1214 log_data_err("Failed to create an %s converter\n", convName
);
1219 if (ucnv_getType(myConverter
)!=convType
) {
1220 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1221 convName
, convType
);
1224 log_verbose("ucnv_getType %s ok\n", convName
);
1227 ucnv_close(myConverter
);
1230 static void TestConverterTypesAndStarters()
1232 UConverter
* myConverter
;
1233 UErrorCode err
= U_ZERO_ERROR
;
1234 UBool mystarters
[256];
1236 /* const UBool expectedKSCstarters[256] = {
1237 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1238 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1239 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1240 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1241 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1242 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1243 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1244 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1245 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1246 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1247 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1248 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1249 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1250 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1251 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1252 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1253 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1254 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1255 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1256 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1257 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1258 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1259 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1260 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1261 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1262 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1265 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types.");
1267 myConverter
= ucnv_open("ksc", &err
);
1268 if (U_FAILURE(err
)) {
1269 log_data_err("Failed to create an ibm-ksc converter\n");
1274 if (ucnv_getType(myConverter
)!=UCNV_MBCS
)
1275 log_err("ucnv_getType Failed for ibm-949\n");
1277 log_verbose("ucnv_getType ibm-949 ok\n");
1279 if(myConverter
!=NULL
)
1280 ucnv_getStarters(myConverter
, mystarters
, &err
);
1282 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1283 log_err("Failed ucnv_getStarters for ksc\n");
1285 log_verbose("ucnv_getStarters ok\n");*/
1288 ucnv_close(myConverter
);
1290 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL
);
1291 TestConverterType("ibm-878", UCNV_SBCS
);
1292 TestConverterType("iso-8859-1", UCNV_LATIN_1
);
1293 TestConverterType("ibm-1208", UCNV_UTF8
);
1294 TestConverterType("utf-8", UCNV_UTF8
);
1295 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian
);
1296 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian
);
1297 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian
);
1298 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian
);
1299 TestConverterType("iso-2022", UCNV_ISO_2022
);
1300 TestConverterType("hz", UCNV_HZ
);
1301 TestConverterType("scsu", UCNV_SCSU
);
1302 TestConverterType("x-iscii-de", UCNV_ISCII
);
1303 TestConverterType("ascii", UCNV_US_ASCII
);
1304 TestConverterType("utf-7", UCNV_UTF7
);
1305 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX
);
1306 TestConverterType("bocu-1", UCNV_BOCU1
);
1310 TestAmbiguousConverter(UConverter
*cnv
) {
1311 static const char inBytes
[2]={ 0x61, 0x5c };
1312 UChar outUnicode
[20]={ 0, 0, 0, 0 };
1316 UErrorCode errorCode
;
1319 /* try to convert an 'a' and a US-ASCII backslash */
1320 errorCode
=U_ZERO_ERROR
;
1323 ucnv_toUnicode(cnv
, &u
, u
+20, &s
, s
+2, NULL
, TRUE
, &errorCode
);
1324 if(U_FAILURE(errorCode
)) {
1325 /* we do not care about general failures in this test; the input may just not be mappable */
1329 if(outUnicode
[0]!=0x61 || outUnicode
[1]==0xfffd) {
1330 /* not an ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1334 isAmbiguous
=ucnv_isAmbiguous(cnv
);
1336 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1337 if((outUnicode
[1]!=0x5c)!=isAmbiguous
) {
1338 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1339 ucnv_getName(cnv
, &errorCode
), outUnicode
[1]!=0x5c, isAmbiguous
);
1343 if(outUnicode
[1]!=0x5c) {
1344 /* needs fixup, fix it */
1345 ucnv_fixFileSeparator(cnv
, outUnicode
, (int32_t)(u
-outUnicode
));
1346 if(outUnicode
[1]!=0x5c) {
1347 /* the fix failed */
1348 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv
, &errorCode
));
1354 static void TestAmbiguous()
1356 UErrorCode status
= U_ZERO_ERROR
;
1357 UConverter
*ascii_cnv
= 0, *sjis_cnv
= 0, *cnv
;
1358 const char target
[] = {
1359 /* "\\usr\\local\\share\\data\\icutest.txt" */
1360 0x5c, 0x75, 0x73, 0x72,
1361 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1362 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1363 0x5c, 0x64, 0x61, 0x74, 0x61,
1364 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1367 UChar asciiResult
[200], sjisResult
[200];
1368 int32_t asciiLength
= 0, sjisLength
= 0, i
;
1371 /* enumerate all converters */
1372 status
=U_ZERO_ERROR
;
1373 for(i
=0; (name
=ucnv_getAvailableName(i
))!=NULL
; ++i
) {
1374 cnv
=ucnv_open(name
, &status
);
1375 if(U_SUCCESS(status
)) {
1376 TestAmbiguousConverter(cnv
);
1379 log_err("error: unable to open available converter \"%s\"\n", name
);
1380 status
=U_ZERO_ERROR
;
1384 sjis_cnv
= ucnv_open("ibm-943", &status
);
1385 if (U_FAILURE(status
))
1387 log_data_err("Failed to create a SJIS converter\n");
1390 ascii_cnv
= ucnv_open("LATIN-1", &status
);
1391 if (U_FAILURE(status
))
1393 log_data_err("Failed to create a LATIN-1 converter\n");
1394 ucnv_close(sjis_cnv
);
1397 /* convert target from SJIS to Unicode */
1398 sjisLength
= ucnv_toUChars(sjis_cnv
, sjisResult
, sizeof(sjisResult
)/U_SIZEOF_UCHAR
, target
, strlen(target
), &status
);
1399 if (U_FAILURE(status
))
1401 log_err("Failed to convert the SJIS string.\n");
1402 ucnv_close(sjis_cnv
);
1403 ucnv_close(ascii_cnv
);
1406 /* convert target from Latin-1 to Unicode */
1407 asciiLength
= ucnv_toUChars(ascii_cnv
, asciiResult
, sizeof(asciiResult
)/U_SIZEOF_UCHAR
, target
, strlen(target
), &status
);
1408 if (U_FAILURE(status
))
1410 log_err("Failed to convert the Latin-1 string.\n");
1412 ucnv_close(sjis_cnv
);
1413 ucnv_close(ascii_cnv
);
1416 if (!ucnv_isAmbiguous(sjis_cnv
))
1418 log_err("SJIS converter should contain ambiguous character mappings.\n");
1421 ucnv_close(sjis_cnv
);
1422 ucnv_close(ascii_cnv
);
1425 if (u_strcmp(sjisResult
, asciiResult
) == 0)
1427 log_err("File separators for SJIS don't need to be fixed.\n");
1429 ucnv_fixFileSeparator(sjis_cnv
, sjisResult
, sjisLength
);
1430 if (u_strcmp(sjisResult
, asciiResult
) != 0)
1432 log_err("Fixing file separator for SJIS failed.\n");
1434 ucnv_close(sjis_cnv
);
1435 ucnv_close(ascii_cnv
);
1439 TestSignatureDetection(){
1440 /* with null terminated strings */
1442 static const char* data
[] = {
1443 "\xFE\xFF\x00\x00", /* UTF-16BE */
1444 "\xFF\xFE\x00\x00", /* UTF-16LE */
1445 "\xEF\xBB\xBF\x00", /* UTF-8 */
1446 "\x0E\xFE\xFF\x00", /* SCSU */
1448 "\xFE\xFF", /* UTF-16BE */
1449 "\xFF\xFE", /* UTF-16LE */
1450 "\xEF\xBB\xBF", /* UTF-8 */
1451 "\x0E\xFE\xFF", /* SCSU */
1453 "\xFE\xFF\x41\x42", /* UTF-16BE */
1454 "\xFF\xFE\x41\x41", /* UTF-16LE */
1455 "\xEF\xBB\xBF\x41", /* UTF-8 */
1456 "\x0E\xFE\xFF\x41", /* SCSU */
1458 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */
1459 "\x2B\x2F\x76\x38\x41", /* UTF-7 */
1460 "\x2B\x2F\x76\x39\x41", /* UTF-7 */
1461 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */
1462 "\x2B\x2F\x76\x2F\x41" /* UTF-7 */
1464 static const char* expected
[] = {
1486 static const int32_t expectedLength
[] ={
1510 int32_t signatureLength
= -1;
1511 const char* source
= NULL
;
1512 const char* enc
= NULL
;
1513 for( ; i
<sizeof(data
)/sizeof(char*); i
++){
1516 enc
= ucnv_detectUnicodeSignature(source
, -1 , &signatureLength
, &err
);
1518 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source
,i
,u_errorName(err
));
1521 if(enc
== NULL
|| strcmp(enc
,expected
[i
]) !=0){
1522 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source
,i
,expected
[i
],enc
);
1525 if(signatureLength
!= expectedLength
[i
]){
1526 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source
,i
,signatureLength
,expectedLength
[i
]);
1531 static const char* data
[] = {
1532 "\xFE\xFF\x00", /* UTF-16BE */
1533 "\xFF\xFE\x00", /* UTF-16LE */
1534 "\xEF\xBB\xBF\x00", /* UTF-8 */
1535 "\x0E\xFE\xFF\x00", /* SCSU */
1536 "\x00\x00\xFE\xFF", /* UTF-32BE */
1537 "\xFF\xFE\x00\x00", /* UTF-32LE */
1538 "\xFE\xFF", /* UTF-16BE */
1539 "\xFF\xFE", /* UTF-16LE */
1540 "\xEF\xBB\xBF", /* UTF-8 */
1541 "\x0E\xFE\xFF", /* SCSU */
1542 "\x00\x00\xFE\xFF", /* UTF-32BE */
1543 "\xFF\xFE\x00\x00", /* UTF-32LE */
1544 "\xFE\xFF\x41\x42", /* UTF-16BE */
1545 "\xFF\xFE\x41\x41", /* UTF-16LE */
1546 "\xEF\xBB\xBF\x41", /* UTF-8 */
1547 "\x0E\xFE\xFF\x41", /* SCSU */
1548 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1549 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1550 "\xFB\xEE\x28", /* BOCU-1 */
1551 "\xFF\x41\x42" /* NULL */
1553 static const int len
[] = {
1576 static const char* expected
[] = {
1598 static const int32_t expectedLength
[] ={
1622 int32_t signatureLength
= -1;
1623 int32_t sourceLength
=-1;
1624 const char* source
= NULL
;
1625 const char* enc
= NULL
;
1626 for( ; i
<sizeof(data
)/sizeof(char*); i
++){
1629 sourceLength
= len
[i
];
1630 enc
= ucnv_detectUnicodeSignature(source
, sourceLength
, &signatureLength
, &err
);
1632 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source
,i
,u_errorName(err
));
1635 if(enc
== NULL
|| strcmp(enc
,expected
[i
]) !=0){
1636 if(expected
[i
] !=NULL
){
1637 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source
,i
,expected
[i
],enc
);
1641 if(signatureLength
!= expectedLength
[i
]){
1642 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source
,i
,signatureLength
,expectedLength
[i
]);
1651 static const uint8_t in
[]={
1652 /* H - +Jjo- - ! +- +2AHcAQ */
1655 0x2b, 0x4a, 0x6a, 0x6f,
1659 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1662 /* expected test results */
1663 static const uint32_t results
[]={
1664 /* number of bytes read, code point */
1667 4, 0x263a, /* <WHITE SMILING FACE> */
1674 const char *cnvName
;
1675 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
1676 UErrorCode errorCode
=U_ZERO_ERROR
;
1677 UConverter
*cnv
=ucnv_open("UTF-7", &errorCode
);
1678 if(U_FAILURE(errorCode
)) {
1679 log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode
)); /* sholdn't be a data err */
1682 TestNextUChar(cnv
, source
, limit
, results
, "UTF-7");
1683 /* Test the condition when source >= sourceLimit */
1684 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1685 cnvName
= ucnv_getName(cnv
, &errorCode
);
1686 if (U_FAILURE(errorCode
) || uprv_strcmp(cnvName
, "UTF-7") != 0) {
1687 log_err("UTF-7 converter is called %s: %s\n", cnvName
, u_errorName(errorCode
));
1695 static const uint8_t in
[]={
1696 /* H - &Jjo- - ! &- &2AHcAQ- \ */
1699 0x26, 0x4a, 0x6a, 0x6f,
1703 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1706 /* expected test results */
1707 static const uint32_t results
[]={
1708 /* number of bytes read, code point */
1711 4, 0x263a, /* <WHITE SMILING FACE> */
1718 const char *cnvName
;
1719 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
1720 UErrorCode errorCode
=U_ZERO_ERROR
;
1721 UConverter
*cnv
=ucnv_open("IMAP-mailbox-name", &errorCode
);
1722 if(U_FAILURE(errorCode
)) {
1723 log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode
)); /* sholdn't be a data err */
1726 TestNextUChar(cnv
, source
, limit
, results
, "IMAP-mailbox-name");
1727 /* Test the condition when source >= sourceLimit */
1728 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1729 cnvName
= ucnv_getName(cnv
, &errorCode
);
1730 if (U_FAILURE(errorCode
) || uprv_strcmp(cnvName
, "IMAP-mailbox-name") != 0) {
1731 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName
, u_errorName(errorCode
));
1739 static const uint8_t in
[]={
1743 0xf0, 0x90, 0x80, 0x80,
1744 0xf4, 0x84, 0x8c, 0xa1,
1745 0xf0, 0x90, 0x90, 0x81
1748 /* expected test results */
1749 static const uint32_t results
[]={
1750 /* number of bytes read, code point */
1759 /* error test input */
1760 static const uint8_t in2
[]={
1762 0xc0, 0x80, /* illegal non-shortest form */
1763 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1764 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1765 0xc0, 0xc0, /* illegal trail byte */
1766 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1767 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1768 0xfe, /* illegal byte altogether */
1772 /* expected error test results */
1773 static const uint32_t results2
[]={
1774 /* number of bytes read, code point */
1779 UConverterToUCallback cb
;
1782 const char *source
=(const char *)in
,*limit
=(const char *)in
+sizeof(in
);
1783 UErrorCode errorCode
=U_ZERO_ERROR
;
1784 UConverter
*cnv
=ucnv_open("UTF-8", &errorCode
);
1785 if(U_FAILURE(errorCode
)) {
1786 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode
));
1789 TestNextUChar(cnv
, source
, limit
, results
, "UTF-8");
1790 /* Test the condition when source >= sourceLimit */
1791 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1793 /* test error behavior with a skip callback */
1794 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
1795 source
=(const char *)in2
;
1796 limit
=(const char *)(in2
+sizeof(in2
));
1797 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-8");
1803 static TestCESU8() {
1805 static const uint8_t in
[]={
1809 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1810 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1811 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1815 /* expected test results */
1816 static const uint32_t results
[]={
1817 /* number of bytes read, code point */
1828 /* error test input */
1829 static const uint8_t in2
[]={
1831 0xc0, 0x80, /* illegal non-shortest form */
1832 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1833 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1834 0xc0, 0xc0, /* illegal trail byte */
1835 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */
1836 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */
1837 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */
1838 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1839 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1840 0xfe, /* illegal byte altogether */
1844 /* expected error test results */
1845 static const uint32_t results2
[]={
1846 /* number of bytes read, code point */
1851 UConverterToUCallback cb
;
1854 const char *source
=(const char *)in
,*limit
=(const char *)in
+sizeof(in
);
1855 UErrorCode errorCode
=U_ZERO_ERROR
;
1856 UConverter
*cnv
=ucnv_open("CESU-8", &errorCode
);
1857 if(U_FAILURE(errorCode
)) {
1858 log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode
));
1861 TestNextUChar(cnv
, source
, limit
, results
, "CESU-8");
1862 /* Test the condition when source >= sourceLimit */
1863 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1865 /* test error behavior with a skip callback */
1866 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
1867 source
=(const char *)in2
;
1868 limit
=(const char *)(in2
+sizeof(in2
));
1869 TestNextUChar(cnv
, source
, limit
, results2
, "CESU-8");
1875 static TestUTF16() {
1877 static const uint8_t in1
[]={
1878 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
1880 static const uint8_t in2
[]={
1881 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
1883 static const uint8_t in3
[]={
1884 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
1887 /* expected test results */
1888 static const uint32_t results1
[]={
1889 /* number of bytes read, code point */
1893 static const uint32_t results2
[]={
1894 /* number of bytes read, code point */
1898 static const uint32_t results3
[]={
1899 /* number of bytes read, code point */
1906 const char *source
, *limit
;
1908 UErrorCode errorCode
=U_ZERO_ERROR
;
1909 UConverter
*cnv
=ucnv_open("UTF-16", &errorCode
);
1910 if(U_FAILURE(errorCode
)) {
1911 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode
));
1915 source
=(const char *)in1
, limit
=(const char *)in1
+sizeof(in1
);
1916 TestNextUChar(cnv
, source
, limit
, results1
, "UTF-16");
1918 source
=(const char *)in2
, limit
=(const char *)in2
+sizeof(in2
);
1919 ucnv_resetToUnicode(cnv
);
1920 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-16");
1922 source
=(const char *)in3
, limit
=(const char *)in3
+sizeof(in3
);
1923 ucnv_resetToUnicode(cnv
);
1924 TestNextUChar(cnv
, source
, limit
, results3
, "UTF-16");
1926 /* Test the condition when source >= sourceLimit */
1927 ucnv_resetToUnicode(cnv
);
1928 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1934 static TestUTF16BE() {
1936 static const uint8_t in
[]={
1942 0xd8, 0x01, 0xdc, 0x01
1945 /* expected test results */
1946 static const uint32_t results
[]={
1947 /* number of bytes read, code point */
1956 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
1957 UErrorCode errorCode
=U_ZERO_ERROR
;
1958 UConverter
*cnv
=ucnv_open("utf-16be", &errorCode
);
1959 if(U_FAILURE(errorCode
)) {
1960 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode
));
1963 TestNextUChar(cnv
, source
, limit
, results
, "UTF-16BE");
1964 /* Test the condition when source >= sourceLimit */
1965 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1966 /*Test for the condition where there is an invalid character*/
1968 static const uint8_t source2
[]={0x61};
1969 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an invalid character");
1971 /*Test for the condition where there is a surrogate pair*/
1973 const uint8_t source2
[]={0xd8, 0x01};
1974 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an truncated surrogate character");
1982 static const uint8_t in
[]={
1987 0x01, 0xd8, 0x01, 0xdc
1990 /* expected test results */
1991 static const uint32_t results
[]={
1992 /* number of bytes read, code point */
2000 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2001 UErrorCode errorCode
=U_ZERO_ERROR
;
2002 UConverter
*cnv
=ucnv_open("utf-16le", &errorCode
);
2003 if(U_FAILURE(errorCode
)) {
2004 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode
));
2007 TestNextUChar(cnv
, source
, limit
, results
, "UTF-16LE");
2008 /* Test the condition when source >= sourceLimit */
2009 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2010 /*Test for the condition where there is an invalid character*/
2012 static const uint8_t source2
[]={0x61};
2013 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an invalid character");
2015 /*Test for the condition where there is a surrogate character*/
2017 static const uint8_t source2
[]={0x01, 0xd8};
2018 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an truncated surrogate character");
2025 static TestUTF32() {
2027 static const uint8_t in1
[]={
2028 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff
2030 static const uint8_t in2
[]={
2031 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00
2033 static const uint8_t in3
[]={
2034 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01
2037 /* expected test results */
2038 static const uint32_t results1
[]={
2039 /* number of bytes read, code point */
2043 static const uint32_t results2
[]={
2044 /* number of bytes read, code point */
2048 static const uint32_t results3
[]={
2049 /* number of bytes read, code point */
2056 const char *source
, *limit
;
2058 UErrorCode errorCode
=U_ZERO_ERROR
;
2059 UConverter
*cnv
=ucnv_open("UTF-32", &errorCode
);
2060 if(U_FAILURE(errorCode
)) {
2061 log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode
));
2065 source
=(const char *)in1
, limit
=(const char *)in1
+sizeof(in1
);
2066 TestNextUChar(cnv
, source
, limit
, results1
, "UTF-32");
2068 source
=(const char *)in2
, limit
=(const char *)in2
+sizeof(in2
);
2069 ucnv_resetToUnicode(cnv
);
2070 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32");
2072 source
=(const char *)in3
, limit
=(const char *)in3
+sizeof(in3
);
2073 ucnv_resetToUnicode(cnv
);
2074 TestNextUChar(cnv
, source
, limit
, results3
, "UTF-32");
2076 /* Test the condition when source >= sourceLimit */
2077 ucnv_resetToUnicode(cnv
);
2078 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2086 static const uint8_t in
[]={
2087 0x00, 0x00, 0x00, 0x61,
2088 0x00, 0x00, 0xdc, 0x00,
2089 0x00, 0x00, 0xd8, 0x00,
2090 0x00, 0x00, 0xdf, 0xff,
2091 0x00, 0x00, 0xff, 0xfd,
2092 0x00, 0x10, 0xab, 0xcd,
2093 0x00, 0x10, 0xff, 0xff
2096 /* expected test results */
2097 static const uint32_t results
[]={
2098 /* number of bytes read, code point */
2108 /* error test input */
2109 static const uint8_t in2
[]={
2110 0x00, 0x00, 0x00, 0x61,
2111 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
2112 0x00, 0x00, 0x00, 0x62,
2113 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2114 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
2115 0x00, 0x00, 0x01, 0x62,
2116 0x00, 0x00, 0x02, 0x62
2119 /* expected error test results */
2120 static const uint32_t results2
[]={
2121 /* number of bytes read, code point */
2128 UConverterToUCallback cb
;
2131 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2132 UErrorCode errorCode
=U_ZERO_ERROR
;
2133 UConverter
*cnv
=ucnv_open("UTF-32BE", &errorCode
);
2134 if(U_FAILURE(errorCode
)) {
2135 log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode
));
2138 TestNextUChar(cnv
, source
, limit
, results
, "UTF-32BE");
2140 /* Test the condition when source >= sourceLimit */
2141 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2143 /* test error behavior with a skip callback */
2144 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
2145 source
=(const char *)in2
;
2146 limit
=(const char *)(in2
+sizeof(in2
));
2147 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32BE");
2155 static const uint8_t in
[]={
2156 0x61, 0x00, 0x00, 0x00,
2157 0x00, 0xdc, 0x00, 0x00,
2158 0x00, 0xd8, 0x00, 0x00,
2159 0xff, 0xdf, 0x00, 0x00,
2160 0xfd, 0xff, 0x00, 0x00,
2161 0xcd, 0xab, 0x10, 0x00,
2162 0xff, 0xff, 0x10, 0x00
2165 /* expected test results */
2166 static const uint32_t results
[]={
2167 /* number of bytes read, code point */
2177 /* error test input */
2178 static const uint8_t in2
[]={
2179 0x61, 0x00, 0x00, 0x00,
2180 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
2181 0x62, 0x00, 0x00, 0x00,
2182 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2183 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
2184 0x62, 0x01, 0x00, 0x00,
2185 0x62, 0x02, 0x00, 0x00,
2188 /* expected error test results */
2189 static const uint32_t results2
[]={
2190 /* number of bytes read, code point */
2197 UConverterToUCallback cb
;
2200 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2201 UErrorCode errorCode
=U_ZERO_ERROR
;
2202 UConverter
*cnv
=ucnv_open("UTF-32LE", &errorCode
);
2203 if(U_FAILURE(errorCode
)) {
2204 log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode
));
2207 TestNextUChar(cnv
, source
, limit
, results
, "UTF-32LE");
2209 /* Test the condition when source >= sourceLimit */
2210 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2212 /* test error behavior with a skip callback */
2213 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
2214 source
=(const char *)in2
;
2215 limit
=(const char *)(in2
+sizeof(in2
));
2216 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32LE");
2224 static const uint8_t in
[]={
2233 /* expected test results */
2234 static const uint32_t results
[]={
2235 /* number of bytes read, code point */
2243 static const uint16_t in1
[] = {
2244 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2245 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2246 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2247 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2248 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2249 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2250 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2251 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2252 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2253 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2254 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2257 static const uint8_t out1
[] = {
2258 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2259 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2260 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2261 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2262 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2263 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2264 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2265 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2266 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2267 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2268 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2271 static const uint16_t in2
[]={
2272 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2273 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2274 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2275 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2276 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2277 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2278 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2279 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2280 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2281 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2282 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2283 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2284 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2285 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2286 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2287 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2288 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2289 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2290 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2291 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2292 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2293 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2294 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2295 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2296 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2297 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2298 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2299 0x37, 0x20, 0x2A, 0x2F,
2301 static const unsigned char out2
[]={
2302 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2303 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2304 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2305 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2306 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2307 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2308 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2309 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2310 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2311 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2312 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2313 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2314 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2315 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2316 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2317 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2318 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2319 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2320 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2321 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2322 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2323 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2324 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2325 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2326 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2327 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2328 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2329 0x37, 0x20, 0x2A, 0x2F,
2331 const char *source
=(const char *)in
;
2332 const char *limit
=(const char *)in
+sizeof(in
);
2334 UErrorCode errorCode
=U_ZERO_ERROR
;
2335 UConverter
*cnv
=ucnv_open("LATIN_1", &errorCode
);
2336 if(U_FAILURE(errorCode
)) {
2337 log_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode
));
2340 TestNextUChar(cnv
, source
, limit
, results
, "LATIN_1");
2341 /* Test the condition when source >= sourceLimit */
2342 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2343 TestConv((uint16_t*)in1
,sizeof(in1
)/2,"LATIN_1","LATIN-1",(char*)out1
,sizeof(out1
));
2344 TestConv((uint16_t*)in2
,sizeof(in2
)/2,"ASCII","ASCII",(char*)out2
,sizeof(out2
));
2352 static const uint8_t in
[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2353 /* expected test results */
2354 static const uint32_t results
[]={
2355 /* number of bytes read, code point */
2364 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2365 UErrorCode errorCode
=U_ZERO_ERROR
;
2366 UConverter
*cnv
=ucnv_open("ibm-1281", &errorCode
);
2367 if(U_FAILURE(errorCode
)) {
2368 log_data_err("Unable to open a SBCS(ibm-1281) converter: %s\n", u_errorName(errorCode
));
2371 TestNextUChar(cnv
, source
, limit
, results
, "SBCS(ibm-1281)");
2372 /* Test the condition when source >= sourceLimit */
2373 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2374 /*Test for Illegal character */ /*
2376 static const uint8_t input1[]={ 0xA1 };
2377 const char* illegalsource=(const char*)input1;
2378 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2387 static const uint8_t in
[]={
2396 /* expected test results */
2397 static const uint32_t results
[]={
2398 /* number of bytes read, code point */
2406 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2407 UErrorCode errorCode
=U_ZERO_ERROR
;
2409 UConverter
*cnv
=my_ucnv_open("@ibm9027", &errorCode
);
2410 if(U_FAILURE(errorCode
)) {
2411 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode
));
2414 TestNextUChar(cnv
, source
, limit
, results
, "DBCS(@ibm9027)");
2415 /* Test the condition when source >= sourceLimit */
2416 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2417 /*Test for the condition where we have a truncated char*/
2419 static const uint8_t source1
[]={0xc4};
2420 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2422 /*Test for the condition where there is an invalid character*/
2424 static const uint8_t source2
[]={0x1a, 0x1b};
2425 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character");
2433 static const uint8_t in
[]={
2444 /* expected test results */
2445 static const uint32_t results
[]={
2446 /* number of bytes read, code point */
2456 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2457 UErrorCode errorCode
=U_ZERO_ERROR
;
2459 UConverter
*cnv
=ucnv_open("ibm-1363", &errorCode
);
2460 if(U_FAILURE(errorCode
)) {
2461 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode
));
2464 TestNextUChar(cnv
, source
, limit
, results
, "MBCS(ibm-1363)");
2465 /* Test the condition when source >= sourceLimit */
2466 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2467 /*Test for the condition where we have a truncated char*/
2469 static const uint8_t source1
[]={0xc4};
2470 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2472 /*Test for the condition where there is an invalid character*/
2474 static const uint8_t source2
[]={0xa1, 0x01};
2475 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character");
2484 static const uint8_t in
[]={
2485 0x1b, 0x25, 0x42, 0x31,
2490 0xf0, 0x90, 0x80, 0x80
2495 /* expected test results */
2496 static const uint32_t results
[]={
2497 /* number of bytes read, code point */
2507 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2508 UErrorCode errorCode
=U_ZERO_ERROR
;
2511 cnv
=ucnv_open("ISO_2022", &errorCode
);
2512 if(U_FAILURE(errorCode
)) {
2513 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
2516 TestNextUChar(cnv
, source
, limit
, results
, "ISO_2022");
2518 /* Test the condition when source >= sourceLimit */
2519 TestNextUCharError(cnv
, source
, source
-1, U_ILLEGAL_ARGUMENT_ERROR
, "sourceLimit < source");
2520 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2521 /*Test for the condition where we have a truncated char*/
2523 static const uint8_t source1
[]={0xc4};
2524 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2526 /*Test for the condition where there is an invalid character*/
2528 static const uint8_t source2
[]={0xa1, 0x01};
2529 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character");
2535 TestSmallTargetBuffer(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2536 const UChar
* uSource
;
2537 const UChar
* uSourceLimit
;
2538 const char* cSource
;
2539 const char* cSourceLimit
;
2540 UChar
*uTargetLimit
=NULL
;
2543 const char *cTargetLimit
;
2546 int32_t uBufSize
= 120;
2549 UErrorCode errorCode
=U_ZERO_ERROR
;
2550 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2551 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
2554 uSource
= (UChar
*) source
;
2555 uSourceLimit
=(const UChar
*)sourceLimit
;
2559 cTargetLimit
= cBuf
;
2560 uTargetLimit
= uBuf
;
2564 cTargetLimit
= cTargetLimit
+ i
;
2565 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,FALSE
, &errorCode
);
2566 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2567 errorCode
=U_ZERO_ERROR
;
2571 if(U_FAILURE(errorCode
)){
2572 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2576 }while (uSource
<uSourceLimit
);
2578 cSourceLimit
=cTarget
;
2580 uTargetLimit
=uTargetLimit
+i
;
2581 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,FALSE
,&errorCode
);
2582 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2583 errorCode
=U_ZERO_ERROR
;
2586 if(U_FAILURE(errorCode
)){
2587 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2590 }while(cSource
<cSourceLimit
);
2594 for(len
=0;len
<(int)(source
- sourceLimit
);len
++){
2595 if(uBuf
[len
]!=uSource
[len
]){
2596 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource
[len
],(int)uBuf
[len
]) ;
2603 /* Test for Jitterbug 778 */
2604 static void TestToAndFromUChars(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2605 const UChar
* uSource
;
2606 const UChar
* uSourceLimit
;
2607 const char* cSource
;
2608 UChar
*uTargetLimit
=NULL
;
2611 const char *cTargetLimit
;
2614 int32_t uBufSize
= 120;
2615 int numCharsInTarget
=0;
2616 UErrorCode errorCode
=U_ZERO_ERROR
;
2617 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2618 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
2620 uSourceLimit
=sourceLimit
;
2622 cTargetLimit
= cBuf
+uBufSize
*5;
2624 uTargetLimit
= uBuf
+ uBufSize
*5;
2626 numCharsInTarget
=ucnv_fromUChars( cnv
, cTarget
, (cTargetLimit
-cTarget
),uSource
,(uSourceLimit
-uSource
), &errorCode
);
2627 if(U_FAILURE(errorCode
)){
2628 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2633 ucnv_toUChars(cnv
,uTarget
,(uTargetLimit
-uTarget
),cSource
,numCharsInTarget
,&errorCode
);
2634 if(U_FAILURE(errorCode
)){
2635 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode
));
2639 while(uSource
<uSourceLimit
){
2640 if(*test
!=*uSource
){
2642 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
2651 static void TestSmallSourceBuffer(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2652 const UChar
* uSource
;
2653 const UChar
* uSourceLimit
;
2654 const char* cSource
;
2655 const char* cSourceLimit
;
2656 UChar
*uTargetLimit
=NULL
;
2659 const char *cTargetLimit
;
2662 int32_t uBufSize
= 120;
2665 const UChar
*temp
= sourceLimit
;
2666 UErrorCode errorCode
=U_ZERO_ERROR
;
2667 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2668 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
2672 uSource
= (UChar
*) source
;
2676 cTargetLimit
= cBuf
;
2677 uTargetLimit
= uBuf
+uBufSize
*5;
2678 cTargetLimit
= cTargetLimit
+uBufSize
*10;
2679 uSourceLimit
=uSource
;
2682 if (uSourceLimit
< sourceLimit
) {
2683 uSourceLimit
= uSourceLimit
+1;
2685 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,FALSE
, &errorCode
);
2686 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2687 errorCode
=U_ZERO_ERROR
;
2691 if(U_FAILURE(errorCode
)){
2692 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2696 }while (uSource
<temp
);
2700 if (cSourceLimit
< cBuf
+ (cTarget
- cBuf
)) {
2701 cSourceLimit
= cSourceLimit
+1;
2703 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,FALSE
,&errorCode
);
2704 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2705 errorCode
=U_ZERO_ERROR
;
2708 if(U_FAILURE(errorCode
)){
2709 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2712 }while(cSource
<cTarget
);
2716 for(;len
<(int)(source
- sourceLimit
);len
++){
2717 if(uBuf
[len
]!=uSource
[len
]){
2718 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource
[len
],(int)uBuf
[len
]) ;
2726 TestGetNextUChar2022(UConverter
* cnv
, const char* source
, const char* limit
,
2727 const uint16_t results
[], const char* message
){
2729 const char* s
=(char*)source
;
2730 const uint16_t *r
=results
;
2731 UErrorCode errorCode
=U_ZERO_ERROR
;
2736 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
2737 if(errorCode
==U_INDEX_OUTOFBOUNDS_ERROR
) {
2738 break; /* no more significant input */
2739 } else if(U_FAILURE(errorCode
)) {
2740 log_err("%s ucnv_getNextUChar() failed: %s\n", message
, u_errorName(errorCode
));
2743 if(UTF_IS_FIRST_SURROGATE(*r
)){
2745 UTF_NEXT_CHAR_SAFE(r
, i
, len
, exC
, FALSE
);
2750 if(c
!=(uint32_t)(exC
))
2751 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message
,(uint32_t) (*r
),c
);
2757 static int TestJitterbug930(const char* enc
){
2758 UErrorCode err
= U_ZERO_ERROR
;
2759 UConverter
*converter
;
2763 const UChar
*source
= in
;
2765 int32_t* offsets
= off
;
2766 int numOffWritten
=0;
2768 converter
= my_ucnv_open(enc
, &err
);
2770 in
[0] = 0x41; /* 0x4E00;*/
2775 memset(off
, '*', sizeof(off
));
2777 ucnv_fromUnicode (converter
,
2786 /* writes three bytes into the output buffer: 41 1B 24
2787 * but offsets contains 0 1 1
2789 while(*offsets
< off
[10]){
2793 log_verbose("Testing Jitterbug 930 for encoding %s",enc
);
2794 if(numOffWritten
!= (int)(target
-out
)){
2795 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc
, (int)(target
-out
),numOffWritten
);
2800 memset(off
,'*' , sizeof(off
));
2804 ucnv_fromUnicode (converter
,
2813 while(*offsets
< off
[10]){
2816 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc
,-1,*offsets
) ;
2821 /* writes 42 43 7A into output buffer,
2822 * offsets contains -1 -1 -1
2824 ucnv_close(converter
);
2831 static const uint16_t in
[]={
2832 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
2833 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
2834 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
2835 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
2836 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
2837 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
2838 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
2839 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
2840 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
2841 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
2842 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
2843 0x005A, 0x005B, 0x005C, 0x000A
2845 const UChar
* uSource
;
2846 const UChar
* uSourceLimit
;
2847 const char* cSource
;
2848 const char* cSourceLimit
;
2849 UChar
*uTargetLimit
=NULL
;
2852 const char *cTargetLimit
;
2855 int32_t uBufSize
= 120;
2856 UErrorCode errorCode
=U_ZERO_ERROR
;
2858 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
2859 int32_t* myOff
= offsets
;
2860 cnv
=ucnv_open("HZ", &errorCode
);
2861 if(U_FAILURE(errorCode
)) {
2862 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode
));
2866 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2867 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
2868 uSource
= (const UChar
*)&in
[0];
2869 uSourceLimit
=(const UChar
*)&in
[sizeof(in
)/2];
2871 cTargetLimit
= cBuf
+uBufSize
*5;
2873 uTargetLimit
= uBuf
+ uBufSize
*5;
2874 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
2875 if(U_FAILURE(errorCode
)){
2876 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2880 cSourceLimit
=cTarget
;
2883 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
2884 if(U_FAILURE(errorCode
)){
2885 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2888 uSource
= (const UChar
*)&in
[0];
2889 while(uSource
<uSourceLimit
){
2890 if(*test
!=*uSource
){
2892 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
2897 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "HZ encoding");
2898 TestSmallTargetBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
2899 TestSmallSourceBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
2900 TestToAndFromUChars(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
2901 TestJitterbug930("csISO2022JP");
2911 static const uint16_t in
[]={
2912 /* test full range of Devanagari */
2913 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
2914 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
2915 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
2916 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
2917 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
2918 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
2919 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
2920 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
2921 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
2922 0x096D,0x096E,0x096F,
2923 /* test Soft halant*/
2924 0x0915,0x094d, 0x200D,
2925 /* test explicit halant */
2926 0x0915,0x094d, 0x200c,
2927 /* test double danda */
2930 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2931 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2932 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2933 /* tests from Lotus */
2934 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
2935 0x0930,0x094D,0x200D,
2936 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
2937 0x0915,0x0921,0x002B,0x095F,
2939 0x0B86, 0xB87, 0xB88,
2941 0x0C05, 0x0C02, 0x0C03,0x0c31,
2943 0x0C85, 0xC82, 0x0C83,
2944 /* test Abbr sign and Anudatta */
2954 0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
2955 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
2958 0x0961 /* Vocallic LL 0xa6, 0xE9 */,
2959 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
2960 0x0950 /* OM Symbol 0xa1, 0xE9,*/,
2961 0x093D /* Avagraha 0xEA, 0xE9*/,
2969 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
2971 static const unsigned char byteArr
[]={
2973 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
2974 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
2975 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
2976 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
2977 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
2978 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
2979 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
2980 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
2981 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
2983 /* test soft halant */
2985 /* test explicit halant */
2987 /* test double danda */
2990 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2991 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2992 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2995 /* tests from Lotus */
2996 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
2997 0xEF,0x42,0xCF,0xE8,0xD9,
2998 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
2999 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3001 0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3003 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3005 0xEF, 0x48,0xa4, 0xa2, 0xa3,
3006 /* anudatta and abbreviation sign */
3007 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3010 0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3012 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3014 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3016 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3018 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3020 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3022 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3024 0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3026 0xB3, 0xE9, /* Ka + NUKTA */
3028 0xB4, 0xE9, /* Kha + NUKTA */
3030 0xB5, 0xE9, /* Ga + NUKTA */
3042 /* just consume unhandled codepoints */
3046 testConvertToU(byteArr
,(sizeof(byteArr
)),in
,(sizeof(in
)/U_SIZEOF_UCHAR
),"x-iscii-de",NULL
,TRUE
);
3047 TestConv(in
,(sizeof(in
)/2),"ISCII,version=0","hindi", (char *)byteArr
,sizeof(byteArr
));
3054 static const uint16_t in
[]={
3055 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3056 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3057 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3058 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3059 0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3060 0x201D, 0x3014, 0x000D, 0x000A,
3061 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3062 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3064 const UChar
* uSource
;
3065 const UChar
* uSourceLimit
;
3066 const char* cSource
;
3067 const char* cSourceLimit
;
3068 UChar
*uTargetLimit
=NULL
;
3071 const char *cTargetLimit
;
3074 int32_t uBufSize
= 120;
3075 UErrorCode errorCode
=U_ZERO_ERROR
;
3077 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3078 int32_t* myOff
= offsets
;
3079 cnv
=ucnv_open("ISO_2022_JP_1", &errorCode
);
3080 if(U_FAILURE(errorCode
)) {
3081 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
3085 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3086 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3087 uSource
= (const UChar
*)&in
[0];
3088 uSourceLimit
=(const UChar
*)&in
[sizeof(in
)/2];
3090 cTargetLimit
= cBuf
+uBufSize
*5;
3092 uTargetLimit
= uBuf
+ uBufSize
*5;
3093 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3094 if(U_FAILURE(errorCode
)){
3095 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3099 cSourceLimit
=cTarget
;
3102 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3103 if(U_FAILURE(errorCode
)){
3104 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3108 uSource
= (const UChar
*)&in
[0];
3109 while(uSource
<uSourceLimit
){
3110 if(*test
!=*uSource
){
3112 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3118 TestSmallTargetBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3119 TestSmallSourceBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3120 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-JP encoding");
3121 TestToAndFromUChars(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3122 TestJitterbug930("csISO2022JP");
3129 static void TestConv(const uint16_t in
[],int len
, const char* conv
, const char* lang
, char byteArr
[],int byteArrLen
){
3130 const UChar
* uSource
;
3131 const UChar
* uSourceLimit
;
3132 const char* cSource
;
3133 const char* cSourceLimit
;
3134 UChar
*uTargetLimit
=NULL
;
3137 const char *cTargetLimit
;
3140 int32_t uBufSize
= 120*10;
3141 UErrorCode errorCode
=U_ZERO_ERROR
;
3143 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) );
3144 int32_t* myOff
= offsets
;
3145 cnv
=my_ucnv_open(conv
, &errorCode
);
3146 if(U_FAILURE(errorCode
)) {
3147 log_data_err("Unable to open a %s converter: %s\n", conv
, u_errorName(errorCode
));
3151 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
));
3152 cBuf
=(char*)malloc(uBufSize
* sizeof(char));
3153 uSource
= (const UChar
*)&in
[0];
3154 uSourceLimit
=uSource
+len
;
3156 cTargetLimit
= cBuf
+uBufSize
;
3158 uTargetLimit
= uBuf
+ uBufSize
;
3159 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3160 if(U_FAILURE(errorCode
)){
3161 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3164 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3166 cSourceLimit
=cTarget
;
3169 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3170 if(U_FAILURE(errorCode
)){
3171 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode
));
3175 uSource
= (const UChar
*)&in
[0];
3176 while(uSource
<uSourceLimit
){
3177 if(*test
!=*uSource
){
3178 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv
,*uSource
,(int)*test
) ;
3183 TestSmallTargetBuffer(&in
[0],(const UChar
*)&in
[len
],cnv
);
3184 TestSmallSourceBuffer(&in
[0],(const UChar
*)&in
[len
],cnv
);
3185 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, conv
);
3186 if(byteArr
&& byteArrLen
!=0){
3187 TestGetNextUChar2022(cnv
, byteArr
, (byteArr
+byteArrLen
), in
, lang
);
3188 TestToAndFromUChars(&in
[0],(const UChar
*)&in
[len
],cnv
);
3191 cSourceLimit
= cSource
+byteArrLen
;
3194 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3195 if(U_FAILURE(errorCode
)){
3196 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3200 uSource
= (const UChar
*)&in
[0];
3201 while(uSource
<uSourceLimit
){
3202 if(*test
!=*uSource
){
3203 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3216 static UChar U_CALLCONV
3217 _charAt(int32_t offset
, void *context
) {
3218 return ((char*)context
)[offset
];
3222 unescape(UChar
* dst
, int32_t dstLen
,const char* src
,int32_t srcLen
,UErrorCode
*status
){
3225 if(U_FAILURE(*status
)){
3228 if((dst
==NULL
&& dstLen
>0) || (src
==NULL
) || dstLen
< -1 || srcLen
<-1 ){
3229 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
3233 srcLen
= uprv_strlen(src
);
3236 for (; srcIndex
<srcLen
; ) {
3237 UChar32 c
= src
[srcIndex
++];
3238 if (c
== 0x005C /*'\\'*/) {
3239 c
= u_unescapeAt(_charAt
,&srcIndex
,srcLen
,(void*)src
); /* advances i*/
3240 if (c
== (UChar32
)0xFFFFFFFF) {
3241 *status
=U_INVALID_CHAR_FOUND
; /* return empty string */
3242 break; /* invalid escape sequence */
3245 if(dstIndex
< dstLen
){
3247 dst
[dstIndex
++] = UTF16_LEAD(c
);
3248 if(dstIndex
<dstLen
){
3249 dst
[dstIndex
]=UTF16_TRAIL(c
);
3251 *status
=U_BUFFER_OVERFLOW_ERROR
;
3254 dst
[dstIndex
]=(UChar
)c
;
3258 *status
= U_BUFFER_OVERFLOW_ERROR
;
3260 dstIndex
++; /* for preflighting */
3266 TestFullRoundtrip(const char* cp
){
3267 UChar usource
[10] ={0};
3268 UChar nsrc
[10] = {0};
3272 /* Test codepoint 0 */
3273 TestConv(usource
,1,cp
,"",NULL
,0);
3274 TestConv(usource
,2,cp
,"",NULL
,0);
3276 TestConv(nsrc
,3,cp
,"",NULL
,0);
3278 for(;i
<=0x10FFFF;i
++){
3284 usource
[0] =(UChar
) i
;
3287 usource
[0]=UTF16_LEAD(i
);
3288 usource
[1]=UTF16_TRAIL(i
);
3295 /* Test only single code points */
3296 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3297 /* Test codepoint repeated twice */
3298 usource
[ulen
]=usource
[0];
3299 usource
[ulen
+1]=usource
[1];
3301 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3302 /* Test codepoint repeated 3 times */
3303 usource
[ulen
]=usource
[0];
3304 usource
[ulen
+1]=usource
[1];
3306 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3307 /* Test codepoint in between 2 codepoints */
3311 TestConv(nsrc
,len
+2,cp
,"",NULL
,0);
3312 uprv_memset(usource
,0,sizeof(UChar
)*10);
3317 TestRoundTrippingAllUTF(void){
3319 log_verbose("Running exhaustive round trip test for BOCU-1\n");
3320 TestFullRoundtrip("BOCU-1");
3321 log_verbose("Running exhaustive round trip test for SCSU\n");
3322 TestFullRoundtrip("SCSU");
3323 log_verbose("Running exhaustive round trip test for UTF-8\n");
3324 TestFullRoundtrip("UTF-8");
3325 log_verbose("Running exhaustive round trip test for CESU-8\n");
3326 TestFullRoundtrip("CESU-8");
3327 log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3328 TestFullRoundtrip("UTF-16BE");
3329 log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3330 TestFullRoundtrip("UTF-16LE");
3331 log_verbose("Running exhaustive round trip test for UTF-16\n");
3332 TestFullRoundtrip("UTF-16");
3333 log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3334 TestFullRoundtrip("UTF-32BE");
3335 log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3336 TestFullRoundtrip("UTF-32LE");
3337 log_verbose("Running exhaustive round trip test for UTF-32\n");
3338 TestFullRoundtrip("UTF-32");
3339 log_verbose("Running exhaustive round trip test for UTF-7\n");
3340 TestFullRoundtrip("UTF-7");
3341 log_verbose("Running exhaustive round trip test for UTF-7\n");
3342 TestFullRoundtrip("UTF-7,version=1");
3343 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3344 TestFullRoundtrip("IMAP-mailbox-name");
3345 log_verbose("Running exhaustive round trip test for GB18030\n");
3346 TestFullRoundtrip("GB18030");
3353 static const uint16_t germanUTF16
[]={
3354 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3357 static const uint8_t germanSCSU
[]={
3358 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3361 static const uint16_t russianUTF16
[]={
3362 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3365 static const uint8_t russianSCSU
[]={
3366 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3369 static const uint16_t japaneseUTF16
[]={
3370 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3371 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3372 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3373 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3374 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3375 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3376 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3377 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3378 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3379 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3380 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3381 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3382 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3383 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3384 0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3387 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3388 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3389 static const uint8_t japaneseSCSU
[]={
3390 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3391 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3392 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3393 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3394 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3395 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3396 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3397 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3398 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3399 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3400 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3404 static const uint16_t allFeaturesUTF16
[]={
3405 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3406 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3407 0x01df, 0xf000, 0xdbff, 0xdfff
3410 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3411 * result here (34B vs. 35B)
3413 static const uint8_t allFeaturesSCSU
[]={
3414 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3415 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3416 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3417 0xdf, 0x14, 0x80, 0x15, 0xff
3419 static const uint16_t monkeyIn
[]={
3420 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3421 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3422 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3423 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3424 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3425 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3426 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3427 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3428 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3429 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3430 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3431 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3432 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3433 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3434 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3435 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3436 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3437 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3438 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3439 /* test non-BMP code points */
3440 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3441 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3442 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3443 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3444 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3445 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3446 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3447 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3448 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3449 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3450 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3453 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3454 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3455 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3456 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3457 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3459 static const char *fTestCases
[] = {
3460 "\\ud800\\udc00", /* smallest surrogate*/
3462 "\\udBff\\udFff", /* largest surrogate pair*/
3465 "Hello \\u9292 \\u9192 World!",
3466 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3467 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3469 "\\u0648\\u06c8", /* catch missing reset*/
3472 "\\u4444\\uE001", /* lowest quotable*/
3473 "\\u4444\\uf2FF", /* highest quotable*/
3474 "\\u4444\\uf188\\u4444",
3475 "\\u4444\\uf188\\uf288",
3476 "\\u4444\\uf188abc\\u0429\\uf288",
3478 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3479 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3480 "Hello World!123456",
3481 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3483 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/
3484 "abc\\u4411d", /* uses SQU*/
3485 "abc\\u4411\\u4412d",/* uses SCU*/
3486 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3487 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3489 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3490 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3491 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3493 "", /* empty input*/
3494 "\\u0000", /* smallest BMP character*/
3495 "\\uFFFF", /* largest BMP character*/
3497 /* regression tests*/
3498 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3499 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3500 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3501 "\\u0041\\u00df\\u0401\\u015f",
3502 "\\u9066\\u2123abc",
3503 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3504 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3507 for(;i
<sizeof(fTestCases
)/sizeof(*fTestCases
);i
++){
3508 const char* cSrc
= fTestCases
[i
];
3509 UErrorCode status
= U_ZERO_ERROR
;
3510 int32_t cSrcLen
,srcLen
;
3512 /* UConverter* cnv = ucnv_open("SCSU",&status); */
3513 cSrcLen
= srcLen
= uprv_strlen(fTestCases
[i
]);
3514 src
= (UChar
*) malloc((sizeof(UChar
) * srcLen
) + sizeof(UChar
));
3515 srcLen
=unescape(src
,srcLen
,cSrc
,cSrcLen
,&status
);
3516 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc
,i
);
3517 TestConv(src
,srcLen
,"SCSU","Coverage",NULL
,0);
3520 TestConv(allFeaturesUTF16
,(sizeof(allFeaturesUTF16
)/2),"SCSU","all features", (char *)allFeaturesSCSU
,sizeof(allFeaturesSCSU
));
3521 TestConv(allFeaturesUTF16
,(sizeof(allFeaturesUTF16
)/2),"SCSU","all features",(char *)allFeaturesSCSU
,sizeof(allFeaturesSCSU
));
3522 TestConv(japaneseUTF16
,(sizeof(japaneseUTF16
)/2),"SCSU","japaneese",(char *)japaneseSCSU
,sizeof(japaneseSCSU
));
3523 TestConv(japaneseUTF16
,(sizeof(japaneseUTF16
)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU
,sizeof(japaneseSCSU
));
3524 TestConv(germanUTF16
,(sizeof(germanUTF16
)/2),"SCSU","german",(char *)germanSCSU
,sizeof(germanSCSU
));
3525 TestConv(russianUTF16
,(sizeof(russianUTF16
)/2), "SCSU","russian",(char *)russianSCSU
,sizeof(russianSCSU
));
3526 TestConv(monkeyIn
,(sizeof(monkeyIn
)/2),"SCSU","monkey",NULL
,0);
3528 static void TestJitterbug2346(){
3529 char source
[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3530 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3531 uint16_t expected
[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3533 UChar uTarget
[500]={'\0'};
3534 UChar
* utarget
=uTarget
;
3535 UChar
* utargetLimit
=uTarget
+sizeof(uTarget
)/2;
3537 char cTarget
[500]={'\0'};
3538 char* ctarget
=cTarget
;
3539 char* ctargetLimit
=cTarget
+sizeof(cTarget
);
3540 const char* csource
=source
;
3541 UChar
* temp
= expected
;
3542 UErrorCode err
=U_ZERO_ERROR
;
3544 UConverter
* conv
=ucnv_open("ISO_2022_JP",&err
);
3545 if(U_FAILURE(err
)) {
3546 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err
));
3549 ucnv_toUnicode(conv
,&utarget
,utargetLimit
,&csource
,csource
+sizeof(source
),NULL
,TRUE
,&err
);
3550 if(U_FAILURE(err
)) {
3551 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err
));
3554 utargetLimit
=utarget
;
3556 while(utarget
<utargetLimit
){
3557 if(*temp
!=*utarget
){
3559 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget
,(int)*temp
) ;
3564 ucnv_fromUnicode(conv
,&ctarget
,ctargetLimit
,(const UChar
**)&utarget
,utargetLimit
,NULL
,TRUE
,&err
);
3565 if(U_FAILURE(err
)) {
3566 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err
));
3569 ctargetLimit
=ctarget
;
3576 TestISO_2022_JP_1() {
3578 static const uint16_t in
[]={
3579 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3580 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3581 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3582 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3583 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3584 0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3585 0x201D, 0x000D, 0x000A,
3586 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3587 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3588 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3589 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3590 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3591 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3593 const UChar
* uSource
;
3594 const UChar
* uSourceLimit
;
3595 const char* cSource
;
3596 const char* cSourceLimit
;
3597 UChar
*uTargetLimit
=NULL
;
3600 const char *cTargetLimit
;
3603 int32_t uBufSize
= 120;
3604 UErrorCode errorCode
=U_ZERO_ERROR
;
3607 cnv
=ucnv_open("ISO_2022_JP_1", &errorCode
);
3608 if(U_FAILURE(errorCode
)) {
3609 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
3613 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3614 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3615 uSource
= (const UChar
*)&in
[0];
3616 uSourceLimit
=(const UChar
*)&in
[sizeof(in
)/2];
3618 cTargetLimit
= cBuf
+uBufSize
*5;
3620 uTargetLimit
= uBuf
+ uBufSize
*5;
3621 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,TRUE
, &errorCode
);
3622 if(U_FAILURE(errorCode
)){
3623 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3627 cSourceLimit
=cTarget
;
3629 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,TRUE
,&errorCode
);
3630 if(U_FAILURE(errorCode
)){
3631 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3634 uSource
= (const UChar
*)&in
[0];
3635 while(uSource
<uSourceLimit
){
3636 if(*test
!=*uSource
){
3638 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3644 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3645 /*Test for the condition where there is an invalid character*/
3648 static const uint8_t source2
[]={0x0e,0x24,0x053};
3649 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-JP-1]");
3651 TestSmallTargetBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3652 TestSmallSourceBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3659 TestISO_2022_JP_2() {
3661 static const uint16_t in
[]={
3662 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3663 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3664 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3665 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3666 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3667 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3668 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3669 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3670 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3671 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3672 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3673 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3674 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3675 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3676 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3677 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3678 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3679 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3680 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3682 const UChar
* uSource
;
3683 const UChar
* uSourceLimit
;
3684 const char* cSource
;
3685 const char* cSourceLimit
;
3686 UChar
*uTargetLimit
=NULL
;
3689 const char *cTargetLimit
;
3692 int32_t uBufSize
= 120;
3693 UErrorCode errorCode
=U_ZERO_ERROR
;
3695 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3696 int32_t* myOff
= offsets
;
3697 cnv
=ucnv_open("ISO_2022_JP_2", &errorCode
);
3698 if(U_FAILURE(errorCode
)) {
3699 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
3703 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3704 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3705 uSource
= (const UChar
*)&in
[0];
3706 uSourceLimit
=(const UChar
*)&in
[sizeof(in
)/2];
3708 cTargetLimit
= cBuf
+uBufSize
*5;
3710 uTargetLimit
= uBuf
+ uBufSize
*5;
3711 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3712 if(U_FAILURE(errorCode
)){
3713 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3717 cSourceLimit
=cTarget
;
3720 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3721 if(U_FAILURE(errorCode
)){
3722 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3725 uSource
= (const UChar
*)&in
[0];
3726 while(uSource
<uSourceLimit
){
3727 if(*test
!=*uSource
){
3729 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3734 TestSmallTargetBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3735 TestSmallSourceBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3736 TestToAndFromUChars(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3737 /*Test for the condition where there is an invalid character*/
3740 static const uint8_t source2
[]={0x0e,0x24,0x053};
3741 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-JP-2]");
3752 static const uint16_t in
[]={
3753 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F66,0x9F67,0x9F6A,0x000A,0x000D
3754 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC02,0xAC04
3755 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3756 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3757 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53DF,0x53E1,0x53E2
3758 ,0x53E3,0x53E4,0x000A,0x000D};
3759 const UChar
* uSource
;
3760 const UChar
* uSourceLimit
;
3761 const char* cSource
;
3762 const char* cSourceLimit
;
3763 UChar
*uTargetLimit
=NULL
;
3766 const char *cTargetLimit
;
3769 int32_t uBufSize
= 120;
3770 UErrorCode errorCode
=U_ZERO_ERROR
;
3772 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3773 int32_t* myOff
= offsets
;
3774 cnv
=ucnv_open("ISO_2022,locale=kr", &errorCode
);
3775 if(U_FAILURE(errorCode
)) {
3776 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
3780 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3781 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3782 uSource
= (const UChar
*)&in
[0];
3783 uSourceLimit
=(const UChar
*)&in
[sizeof(in
)/2];
3785 cTargetLimit
= cBuf
+uBufSize
*5;
3787 uTargetLimit
= uBuf
+ uBufSize
*5;
3788 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3789 if(U_FAILURE(errorCode
)){
3790 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3794 cSourceLimit
=cTarget
;
3797 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3798 if(U_FAILURE(errorCode
)){
3799 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3802 uSource
= (const UChar
*)&in
[0];
3803 while(uSource
<uSourceLimit
){
3804 if(*test
!=*uSource
){
3805 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,*test
) ;
3810 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-KR encoding");
3811 TestSmallTargetBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3812 TestSmallSourceBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3813 TestToAndFromUChars(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3814 TestJitterbug930("csISO2022KR");
3815 /*Test for the condition where there is an invalid character*/
3818 static const uint8_t source2
[]={0x1b,0x24,0x053};
3819 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ILLEGAL_ESCAPE_SEQUENCE
, "an invalid character [ISO-2022-KR]");
3828 TestISO_2022_KR_1() {
3830 static const uint16_t in
[]={
3831 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
3832 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
3833 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3834 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3835 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
3836 ,0x53E3,0x53E4,0x000A,0x000D};
3837 const UChar
* uSource
;
3838 const UChar
* uSourceLimit
;
3839 const char* cSource
;
3840 const char* cSourceLimit
;
3841 UChar
*uTargetLimit
=NULL
;
3844 const char *cTargetLimit
;
3847 int32_t uBufSize
= 120;
3848 UErrorCode errorCode
=U_ZERO_ERROR
;
3850 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3851 int32_t* myOff
= offsets
;
3852 cnv
=ucnv_open("ibm-25546", &errorCode
);
3853 if(U_FAILURE(errorCode
)) {
3854 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
3858 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3859 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3860 uSource
= (const UChar
*)&in
[0];
3861 uSourceLimit
=(const UChar
*)&in
[sizeof(in
)/2];
3863 cTargetLimit
= cBuf
+uBufSize
*5;
3865 uTargetLimit
= uBuf
+ uBufSize
*5;
3866 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3867 if(U_FAILURE(errorCode
)){
3868 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3872 cSourceLimit
=cTarget
;
3875 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3876 if(U_FAILURE(errorCode
)){
3877 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3880 uSource
= (const UChar
*)&in
[0];
3881 while(uSource
<uSourceLimit
){
3882 if(*test
!=*uSource
){
3883 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,*test
) ;
3889 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-KR encoding");
3890 TestSmallTargetBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3891 TestSmallSourceBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3893 TestToAndFromUChars(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3894 /*Test for the condition where there is an invalid character*/
3897 static const uint8_t source2
[]={0x1b,0x24,0x053};
3898 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ILLEGAL_ESCAPE_SEQUENCE
, "an invalid character [ISO-2022-KR]");
3906 static void TestJitterbug2411(){
3907 const char* source
= "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
3908 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
3909 UConverter
* kr
=NULL
, *kr1
=NULL
;
3910 UErrorCode errorCode
= U_ZERO_ERROR
;
3911 UChar tgt
[100]={'\0'};
3912 UChar
* target
= tgt
;
3913 UChar
* targetLimit
= target
+100;
3914 kr
=ucnv_open("iso-2022-kr", &errorCode
);
3915 if(U_FAILURE(errorCode
)) {
3916 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode
));
3919 ucnv_toUnicode(kr
,&target
,targetLimit
,&source
,source
+uprv_strlen(source
),NULL
,TRUE
,&errorCode
);
3920 if(U_FAILURE(errorCode
)) {
3921 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode
));
3924 kr1
= ucnv_open("ibm-25546", &errorCode
);
3925 if(U_FAILURE(errorCode
)) {
3926 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode
));
3930 targetLimit
= target
+100;
3931 ucnv_toUnicode(kr
,&target
,targetLimit
,&source
,source
+uprv_strlen(source
),NULL
,TRUE
,&errorCode
);
3933 if(U_FAILURE(errorCode
)) {
3934 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode
));
3948 UChar sampleTextJIS
[] ={
3956 const uint8_t expectedISO2022JIS
[] ={
3958 0x25, 0x41, 0x25, 0x44,
3959 0x25, 0x6c, 0x25, 0x6d,
3960 0x25, 0x6e, 0x25, 0x6F,
3961 0x25, 0x62, 0x25, 0x64,
3962 0x25, 0x66, 0x25, 0x68,
3963 0x25, 0x69, 0x25, 0x6a
3966 int32_t fmISO2022JISOffs
[] ={
3978 const uint8_t expectedISO2022JIS7
[] ={
3982 0x25, 0x6c, 0x25, 0x6d,
3983 0x25, 0x6e, 0x25, 0x6F,
3990 int32_t fmISO2022JIS7Offs
[] ={
4004 const uint8_t expectedISO2022JIS8
[] ={
4008 0x25, 0x6c, 0x25, 0x6d,
4009 0x25, 0x6e, 0x25, 0x6F,
4016 int32_t fmISO2022JIS8Offs
[] ={
4028 testConvertFromU(sampleTextJIS
, sizeof(sampleTextJIS
)/sizeof(sampleTextJIS
[0]),
4029 expectedISO2022JIS
, sizeof(expectedISO2022JIS
), "JIS", fmISO2022JISOffs
,TRUE
);
4030 testConvertFromU(sampleTextJIS
, sizeof(sampleTextJIS
)/sizeof(sampleTextJIS
[0]),
4031 expectedISO2022JIS7
, sizeof(expectedISO2022JIS7
), "JIS7", fmISO2022JIS7Offs
,FALSE
);
4032 testConvertFromU(sampleTextJIS
, sizeof(sampleTextJIS
)/sizeof(sampleTextJIS
[0]),
4033 expectedISO2022JIS8
, sizeof(expectedISO2022JIS8
), "JIS8", fmISO2022JIS8Offs
,FALSE
);
4039 const uint8_t sampleTextJIS
[] = {
4040 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4041 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4042 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4044 const uint16_t expectedISO2022JIS
[] = {
4049 int32_t toISO2022JISOffs
[]={
4055 const uint8_t sampleTextJIS7
[] = {
4056 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4057 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4058 0x1b,0x24,0x42,0x21,0x21,
4059 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */
4061 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4063 const uint16_t expectedISO2022JIS7
[] = {
4071 int32_t toISO2022JIS7Offs
[]={
4078 const uint8_t sampleTextJIS8
[] = {
4079 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4080 0xa1,0xc8,0xd9,/*Katakana Set*/
4083 0xb1,0xc3, /*Katakana Set*/
4084 0x1b,0x24,0x42,0x21,0x21
4086 const uint16_t expectedISO2022JIS8
[] = {
4088 0xff61, 0xff88, 0xff99,
4093 int32_t toISO2022JIS8Offs
[]={
4099 testConvertToU(sampleTextJIS
,sizeof(sampleTextJIS
),expectedISO2022JIS
,
4100 sizeof(expectedISO2022JIS
)/sizeof(expectedISO2022JIS
[0]),"JIS", toISO2022JISOffs
,TRUE
);
4101 testConvertToU(sampleTextJIS7
,sizeof(sampleTextJIS7
),expectedISO2022JIS7
,
4102 sizeof(expectedISO2022JIS7
)/sizeof(expectedISO2022JIS7
[0]),"JIS7", toISO2022JIS7Offs
,TRUE
);
4103 testConvertToU(sampleTextJIS8
,sizeof(sampleTextJIS8
),expectedISO2022JIS8
,
4104 sizeof(expectedISO2022JIS8
)/sizeof(expectedISO2022JIS8
[0]),"JIS8", toISO2022JIS8Offs
,TRUE
);
4109 static void TestJitterbug915(){
4110 /* tests for roundtripping of the below sequence
4111 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * /
4112 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4113 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4114 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4115 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4116 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4117 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4119 static char cSource
[]={
4120 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4121 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4122 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4123 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4124 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4125 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4126 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
4127 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4128 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4129 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4130 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4131 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4132 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4133 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4134 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4135 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4136 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4137 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4138 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4139 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4140 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4141 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4142 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4143 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4144 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4145 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4146 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4147 0x37, 0x20, 0x2A, 0x2F,
4149 UChar uTarget
[500]={'\0'};
4150 UChar
* utarget
=uTarget
;
4151 UChar
* utargetLimit
=uTarget
+sizeof(uTarget
)/2;
4153 char cTarget
[500]={'\0'};
4154 char* ctarget
=cTarget
;
4155 char* ctargetLimit
=cTarget
+sizeof(cTarget
);
4156 const char* csource
=cSource
;
4157 char* tempSrc
= cSource
;
4158 UErrorCode err
=U_ZERO_ERROR
;
4160 UConverter
* conv
=ucnv_open("ISO_2022_CN_EXT",&err
);
4161 if(U_FAILURE(err
)) {
4162 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err
));
4165 ucnv_toUnicode(conv
,&utarget
,utargetLimit
,&csource
,csource
+sizeof(cSource
),NULL
,TRUE
,&err
);
4166 if(U_FAILURE(err
)) {
4167 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err
));
4170 utargetLimit
=utarget
;
4172 ucnv_fromUnicode(conv
,&ctarget
,ctargetLimit
,(const UChar
**)&utarget
,utargetLimit
,NULL
,TRUE
,&err
);
4173 if(U_FAILURE(err
)) {
4174 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err
));
4177 ctargetLimit
=ctarget
;
4179 while(ctarget
<ctargetLimit
){
4180 if(*(ctarget
++) != *(tempSrc
++)){
4181 log_err("Expected : \\x%02X \t Got: \\x%02X\n",*ctarget
,(int)*tempSrc
) ;
4189 TestISO_2022_CN_EXT() {
4191 static const uint16_t in
[]={
4192 /* test Non-BMP code points */
4193 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4194 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4195 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4196 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4197 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4198 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4199 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4200 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4201 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4204 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4205 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4206 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4207 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4208 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4209 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4210 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4211 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4212 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4213 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4214 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4215 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4216 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4217 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4218 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4219 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4220 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4221 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4223 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4227 const UChar
* uSource
;
4228 const UChar
* uSourceLimit
;
4229 const char* cSource
;
4230 const char* cSourceLimit
;
4231 UChar
*uTargetLimit
=NULL
;
4234 const char *cTargetLimit
;
4237 int32_t uBufSize
= 180;
4238 UErrorCode errorCode
=U_ZERO_ERROR
;
4240 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4241 int32_t* myOff
= offsets
;
4242 cnv
=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode
);
4243 if(U_FAILURE(errorCode
)) {
4244 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4248 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4249 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
4250 uSource
= (const UChar
*)&in
[0];
4251 uSourceLimit
=(const UChar
*)&in
[sizeof(in
)/2];
4253 cTargetLimit
= cBuf
+uBufSize
*5;
4255 uTargetLimit
= uBuf
+ uBufSize
*5;
4256 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4257 if(U_FAILURE(errorCode
)){
4258 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4262 cSourceLimit
=cTarget
;
4265 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4266 if(U_FAILURE(errorCode
)){
4267 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4270 uSource
= (const UChar
*)&in
[0];
4271 while(uSource
<uSourceLimit
){
4272 if(*test
!=*uSource
){
4273 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
4276 log_verbose(" Got: \\u%04X\n",(int)*test
) ;
4281 TestSmallTargetBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
4282 TestSmallSourceBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
4283 /*Test for the condition where there is an invalid character*/
4286 static const uint8_t source2
[]={0x0e,0x24,0x053};
4287 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-CN-EXT]");
4298 static const uint16_t in
[]={
4300 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4301 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4302 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4303 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4304 0x0020, 0x0045, 0x004e, 0x0044,
4306 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4307 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4308 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4309 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4310 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4311 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4312 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4313 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4314 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4315 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4316 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4317 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4318 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4319 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4320 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4321 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4322 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4325 const UChar
* uSource
;
4326 const UChar
* uSourceLimit
;
4327 const char* cSource
;
4328 const char* cSourceLimit
;
4329 UChar
*uTargetLimit
=NULL
;
4332 const char *cTargetLimit
;
4335 int32_t uBufSize
= 180;
4336 UErrorCode errorCode
=U_ZERO_ERROR
;
4338 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4339 int32_t* myOff
= offsets
;
4340 cnv
=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode
);
4341 if(U_FAILURE(errorCode
)) {
4342 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4346 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4347 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
4348 uSource
= (const UChar
*)&in
[0];
4349 uSourceLimit
=(const UChar
*)&in
[sizeof(in
)/2];
4351 cTargetLimit
= cBuf
+uBufSize
*5;
4353 uTargetLimit
= uBuf
+ uBufSize
*5;
4354 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4355 if(U_FAILURE(errorCode
)){
4356 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4360 cSourceLimit
=cTarget
;
4363 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4364 if(U_FAILURE(errorCode
)){
4365 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4368 uSource
= (const UChar
*)&in
[0];
4369 while(uSource
<uSourceLimit
){
4370 if(*test
!=*uSource
){
4371 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
4374 log_verbose(" Got: \\u%04X\n",(int)*test
) ;
4379 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-CN encoding");
4380 TestSmallTargetBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
4381 TestSmallSourceBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
4382 TestToAndFromUChars(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
4383 TestJitterbug930("csISO2022CN");
4384 /*Test for the condition where there is an invalid character*/
4387 static const uint8_t source2
[]={0x0e,0x24,0x053};
4388 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-CN]");
4398 TestEBCDIC_STATEFUL() {
4400 static const uint8_t in
[]={
4409 /* expected test results */
4410 static const uint32_t results
[]={
4411 /* number of bytes read, code point */
4420 static const uint8_t in2
[]={
4426 /* expected test results */
4427 static const uint32_t results2
[]={
4428 /* number of bytes read, code point */
4433 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
4434 UErrorCode errorCode
=U_ZERO_ERROR
;
4435 UConverter
*cnv
=ucnv_open("ibm-930", &errorCode
);
4436 if(U_FAILURE(errorCode
)) {
4437 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode
));
4440 TestNextUChar(cnv
, source
, limit
, results
, "EBCDIC_STATEFUL(ibm-930)");
4442 /* Test the condition when source >= sourceLimit */
4443 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
4445 /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4447 static const uint8_t source1
[]={0x0f};
4448 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_INDEX_OUTOFBOUNDS_ERROR
, "a character is truncated");
4450 /*Test for the condition where there is an invalid character*/
4453 static const uint8_t source2
[]={0x0e, 0x7F, 0xFF};
4454 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [EBCDIC STATEFUL]");
4457 source
=(const char*)in2
;
4458 limit
=(const char*)in2
+sizeof(in2
);
4459 TestNextUChar(cnv
,source
,limit
,results2
,"EBCDIC_STATEFUL(ibm-930),seq#2");
4467 static const uint8_t in
[]={
4470 0x81, 0x30, 0x81, 0x30,
4474 0x82, 0x35, 0x8f, 0x33,
4475 0x84, 0x31, 0xa4, 0x39,
4476 0x90, 0x30, 0x81, 0x30,
4477 0xe3, 0x32, 0x9a, 0x35
4480 * Feature removed markus 2000-oct-26
4481 * Only some codepages must match surrogate pairs into supplementary code points -
4482 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4483 * GB 18030 provides direct encodings for supplementary code points, therefore
4484 * it must not combine two single-encoded surrogates into one code point.
4486 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4490 /* expected test results */
4491 static const uint32_t results
[]={
4492 /* number of bytes read, code point */
4504 /* Feature removed. See comment above. */
4509 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4510 UErrorCode errorCode
=U_ZERO_ERROR
;
4511 UConverter
*cnv
=ucnv_open("gb18030", &errorCode
);
4512 if(U_FAILURE(errorCode
)) {
4513 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode
));
4516 TestNextUChar(cnv
, (const char *)in
, (const char *)in
+sizeof(in
), results
, "gb18030");
4522 /* LMBCS-1 string */
4523 static const uint8_t pszLMBCS
[]={
4532 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4536 /* Unicode UChar32 equivalents */
4537 static const UChar32 pszUnicode32
[]={
4547 0x00023456, /* code point for surrogate pair */
4551 /* Unicode UChar equivalents */
4552 static const UChar pszUnicode
[]={
4562 0xD84D, /* low surrogate */
4563 0xDC56, /* high surrogate */
4567 /* expected test results */
4568 static const int offsets32
[]={
4569 /* number of bytes read, code point */
4583 /* expected test results */
4584 static const int offsets
[]={
4585 /* number of bytes read, code point */
4603 #define NAME_LMBCS_1 "LMBCS-1"
4604 #define NAME_LMBCS_2 "LMBCS-2"
4607 /* Some basic open/close/property tests on some LMBCS converters */
4610 char expected_subchars
[] = {0x3F}; /* ANSI Question Mark */
4611 char new_subchars
[] = {0x7F}; /* subst char used by SmartSuite..*/
4612 char get_subchars
[1];
4613 const char * get_name
;
4617 int8_t len
= sizeof(get_subchars
);
4619 UErrorCode errorCode
=U_ZERO_ERROR
;
4622 cnv1
=ucnv_open(NAME_LMBCS_1
, &errorCode
);
4623 if(U_FAILURE(errorCode
)) {
4624 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
4627 cnv2
=ucnv_open(NAME_LMBCS_2
, &errorCode
);
4628 if(U_FAILURE(errorCode
)) {
4629 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode
));
4634 get_name
= ucnv_getName (cnv1
, &errorCode
);
4635 if (strcmp(NAME_LMBCS_1
,get_name
)){
4636 log_err("Unexpected converter name: %s\n", get_name
);
4638 get_name
= ucnv_getName (cnv2
, &errorCode
);
4639 if (strcmp(NAME_LMBCS_2
,get_name
)){
4640 log_err("Unexpected converter name: %s\n", get_name
);
4643 /* substitution chars */
4644 ucnv_getSubstChars (cnv1
, get_subchars
, &len
, &errorCode
);
4645 if(U_FAILURE(errorCode
)) {
4646 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode
));
4649 log_err("Unexpected length of sub chars\n");
4651 if (get_subchars
[0] != expected_subchars
[0]){
4652 log_err("Unexpected value of sub chars\n");
4654 ucnv_setSubstChars (cnv2
,new_subchars
, len
, &errorCode
);
4655 if(U_FAILURE(errorCode
)) {
4656 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode
));
4658 ucnv_getSubstChars (cnv2
, get_subchars
, &len
, &errorCode
);
4659 if(U_FAILURE(errorCode
)) {
4660 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode
));
4663 log_err("Unexpected length of sub chars\n");
4665 if (get_subchars
[0] != new_subchars
[0]){
4666 log_err("Unexpected value of sub chars\n");
4673 /* LMBCS to Unicode - offsets */
4675 UErrorCode errorCode
=U_ZERO_ERROR
;
4677 const uint8_t * pSource
= pszLMBCS
;
4678 const uint8_t * sourceLimit
= pszLMBCS
+ sizeof(pszLMBCS
);
4680 UChar Out
[sizeof(pszUnicode
) + 1];
4682 UChar
* OutLimit
= Out
+ sizeof(pszUnicode
)/sizeof(UChar
);
4684 int32_t off
[sizeof(offsets
)];
4686 /* last 'offset' in expected results is just the final size.
4687 (Makes other tests easier). Compensate here: */
4689 off
[(sizeof(offsets
)/sizeof(offsets
[0]))-1] = sizeof(pszLMBCS
);
4693 cnv
=ucnv_open("lmbcs", &errorCode
); /* use generic name for LMBCS-1 */
4694 if(U_FAILURE(errorCode
)) {
4695 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode
));
4701 ucnv_toUnicode (cnv
,
4704 (const char **)&pSource
,
4705 (const char *)sourceLimit
,
4711 if (memcmp(off
,offsets
,sizeof(offsets
)))
4713 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4715 if (memcmp(Out
,pszUnicode
,sizeof(pszUnicode
)))
4717 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4722 /* LMBCS to Unicode - getNextUChar */
4723 const char * sourceStart
;
4724 const char *source
=(const char *)pszLMBCS
;
4725 const char *limit
=(const char *)pszLMBCS
+sizeof(pszLMBCS
);
4726 const UChar32
*results
= pszUnicode32
;
4727 const int *off
= offsets32
;
4729 UErrorCode errorCode
=U_ZERO_ERROR
;
4732 cnv
=ucnv_open("LMBCS-1", &errorCode
);
4733 if(U_FAILURE(errorCode
)) {
4734 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
4740 while(source
<limit
) {
4742 uniChar
=ucnv_getNextUChar(cnv
, &source
, source
+ (off
[1] - off
[0]), &errorCode
);
4743 if(U_FAILURE(errorCode
)) {
4744 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode
));
4746 } else if(source
-sourceStart
!= off
[1] - off
[0] || uniChar
!= *results
) {
4747 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4748 uniChar
, (source
-sourceStart
), *results
, *off
);
4757 { /* test locale & optimization group operations: Unicode to LMBCS */
4759 UErrorCode errorCode
=U_ZERO_ERROR
;
4760 UConverter
*cnv16he
= ucnv_open("LMBCS-16,locale=he", &errorCode
);
4761 UConverter
*cnv16jp
= ucnv_open("LMBCS-16,locale=ja_JP", &errorCode
);
4762 UConverter
*cnv01us
= ucnv_open("LMBCS-1,locale=us_EN", &errorCode
);
4763 UChar uniString
[] = {0x0192}; /* Latin Small letter f with hook */
4764 const UChar
* pUniOut
= uniString
;
4765 UChar
* pUniIn
= uniString
;
4766 uint8_t lmbcsString
[4];
4767 const uint8_t * pLMBCSOut
= lmbcsString
;
4768 uint8_t * pLMBCSIn
= lmbcsString
;
4770 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
4771 ucnv_fromUnicode (cnv16he
,
4772 (char **)&pLMBCSIn
, (const char *)(pLMBCSIn
+ sizeof(lmbcsString
)/sizeof(lmbcsString
[0])),
4773 &pUniOut
, pUniOut
+ sizeof(uniString
)/sizeof(uniString
[0]),
4774 NULL
, 1, &errorCode
);
4776 if (lmbcsString
[0] != 0x3 || lmbcsString
[1] != 0x83)
4778 log_err("LMBCS-16,locale=he gives unexpected translation\n");
4781 pLMBCSIn
=lmbcsString
;
4782 pUniOut
= uniString
;
4783 ucnv_fromUnicode (cnv01us
,
4784 (char **)&pLMBCSIn
, (const char *)(lmbcsString
+ sizeof(lmbcsString
)/sizeof(lmbcsString
[0])),
4785 &pUniOut
, pUniOut
+ sizeof(uniString
)/sizeof(uniString
[0]),
4786 NULL
, 1, &errorCode
);
4788 if (lmbcsString
[0] != 0x9F)
4790 log_err("LMBCS-1,locale=US gives unexpected translation\n");
4793 /* single byte char from mbcs char set */
4794 lmbcsString
[0] = 0xAE; /* 1/2 width katakana letter small Yo */
4795 pLMBCSOut
= lmbcsString
;
4797 ucnv_toUnicode (cnv16jp
,
4798 &pUniIn
, pUniIn
+ 1,
4799 (const char **)&pLMBCSOut
, (const char *)(pLMBCSOut
+ 1),
4800 NULL
, 1, &errorCode
);
4801 if (U_FAILURE(errorCode
) || pLMBCSOut
!= lmbcsString
+1 || pUniIn
!= uniString
+1 || uniString
[0] != 0xFF6E)
4803 log_err("Unexpected results from LMBCS-16 single byte char\n");
4805 /* convert to group 1: should be 3 bytes */
4806 pLMBCSIn
= lmbcsString
;
4807 pUniOut
= uniString
;
4808 ucnv_fromUnicode (cnv01us
,
4809 (char **)&pLMBCSIn
, (const char *)(pLMBCSIn
+ 3),
4810 &pUniOut
, pUniOut
+ 1,
4811 NULL
, 1, &errorCode
);
4812 if (U_FAILURE(errorCode
) || pLMBCSIn
!= lmbcsString
+3 || pUniOut
!= uniString
+1
4813 || lmbcsString
[0] != 0x10 || lmbcsString
[1] != 0x10 || lmbcsString
[2] != 0xAE)
4815 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
4817 pLMBCSOut
= lmbcsString
;
4819 ucnv_toUnicode (cnv01us
,
4820 &pUniIn
, pUniIn
+ 1,
4821 (const char **)&pLMBCSOut
, (const char *)(pLMBCSOut
+ 3),
4822 NULL
, 1, &errorCode
);
4823 if (U_FAILURE(errorCode
) || pLMBCSOut
!= lmbcsString
+3 || pUniIn
!= uniString
+1 || uniString
[0] != 0xFF6E)
4825 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
4827 pLMBCSIn
= lmbcsString
;
4828 pUniOut
= uniString
;
4829 ucnv_fromUnicode (cnv16jp
,
4830 (char **)&pLMBCSIn
, (const char *)(pLMBCSIn
+ 1),
4831 &pUniOut
, pUniOut
+ 1,
4832 NULL
, 1, &errorCode
);
4833 if (U_FAILURE(errorCode
) || pLMBCSIn
!= lmbcsString
+1 || pUniOut
!= uniString
+1 || lmbcsString
[0] != 0xAE)
4835 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
4837 ucnv_close(cnv16he
);
4838 ucnv_close(cnv16jp
);
4839 ucnv_close(cnv01us
);
4842 /* Small source buffer testing, LMBCS -> Unicode */
4844 UErrorCode errorCode
=U_ZERO_ERROR
;
4846 const uint8_t * pSource
= pszLMBCS
;
4847 const uint8_t * sourceLimit
= pszLMBCS
+ sizeof(pszLMBCS
);
4848 int codepointCount
= 0;
4850 UChar Out
[sizeof(pszUnicode
) + 1];
4852 UChar
* OutLimit
= Out
+ sizeof(pszUnicode
)/sizeof(UChar
);
4855 cnv
= ucnv_open(NAME_LMBCS_1
, &errorCode
);
4856 if(U_FAILURE(errorCode
)) {
4857 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
4862 while ((pSource
< sourceLimit
) && U_SUCCESS (errorCode
))
4864 ucnv_toUnicode (cnv
,
4867 (const char **)&pSource
,
4868 (const char *)(pSource
+1), /* claim that this is a 1- byte buffer */
4870 FALSE
, /* FALSE means there might be more chars in the next buffer */
4873 if (U_SUCCESS (errorCode
))
4875 if ((pSource
- (const uint8_t *)pszLMBCS
) == offsets
[codepointCount
+1])
4877 /* we are on to the next code point: check value */
4879 if (Out
[0] != pszUnicode
[codepointCount
]){
4880 log_err("LMBCS->Uni result %lx should have been %lx \n",
4881 Out
[0], pszUnicode
[codepointCount
]);
4884 pOut
= Out
; /* reset for accumulating next code point */
4890 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode
));
4894 /* limits & surrogate error testing */
4895 uint8_t LIn
[sizeof(pszLMBCS
)];
4896 const uint8_t * pLIn
= LIn
;
4898 char LOut
[sizeof(pszLMBCS
)];
4899 char * pLOut
= LOut
;
4901 UChar UOut
[sizeof(pszUnicode
)];
4902 UChar
* pUOut
= UOut
;
4904 UChar UIn
[sizeof(pszUnicode
)];
4905 const UChar
* pUIn
= UIn
;
4907 int32_t off
[sizeof(offsets
)];
4910 errorCode
=U_ZERO_ERROR
;
4912 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
4913 ucnv_fromUnicode(cnv
, &pLOut
,pLOut
+1,&pUIn
,pUIn
-1,off
,FALSE
, &errorCode
);
4914 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
4916 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode
));
4918 errorCode
=U_ZERO_ERROR
;
4919 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+1,(const char **)&pLIn
,(const char *)(pLIn
-1),off
,FALSE
, &errorCode
);
4920 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
4922 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode
));
4924 errorCode
=U_ZERO_ERROR
;
4926 uniChar
= ucnv_getNextUChar(cnv
, (const char **)&pLIn
, (const char *)(pLIn
-1), &errorCode
);
4927 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
4929 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode
));
4931 errorCode
=U_ZERO_ERROR
;
4933 /* 0 byte source request - no error, no pointer movement */
4934 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+1,(const char **)&pLIn
,(const char *)pLIn
,off
,FALSE
, &errorCode
);
4935 ucnv_fromUnicode(cnv
, &pLOut
,pLOut
+1,&pUIn
,pUIn
,off
,FALSE
, &errorCode
);
4936 if(U_FAILURE(errorCode
)) {
4937 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode
));
4939 if ((pUOut
!= UOut
) || (pUIn
!= UIn
) || (pLOut
!= LOut
) || (pLIn
!= LIn
))
4941 log_err("Unexpected pointer move in 0 byte source request \n");
4943 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
4944 uniChar
= ucnv_getNextUChar(cnv
, (const char **)&pLIn
, (const char *)pLIn
, &errorCode
);
4945 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
4947 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode
));
4949 if (((uint32_t)uniChar
- 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
4951 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
4953 errorCode
= U_ZERO_ERROR
;
4955 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
4958 ucnv_fromUnicode(cnv
, &pLOut
,pLOut
+offsets
[4],&pUIn
,pUIn
+sizeof(pszUnicode
)/sizeof(UChar
),off
,FALSE
, &errorCode
);
4959 if (errorCode
!= U_BUFFER_OVERFLOW_ERROR
|| pLOut
!= LOut
+ offsets
[4] || pUIn
!= pszUnicode
+4 )
4961 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
4964 errorCode
= U_ZERO_ERROR
;
4967 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+4,(const char **)&pLIn
,(const char *)(pLIn
+sizeof(pszLMBCS
)),off
,FALSE
, &errorCode
);
4968 if (errorCode
!= U_BUFFER_OVERFLOW_ERROR
|| pUOut
!= UOut
+ 4 || pLIn
!= (const uint8_t *)pszLMBCS
+offsets
[4])
4970 log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
4973 /* unpaired or chopped LMBCS surrogates */
4975 /* OK high surrogate, Low surrogate is chopped */
4982 errorCode
= U_ZERO_ERROR
;
4985 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
4986 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 5)
4988 log_err("Unexpected results on chopped low surrogate\n");
4991 /* chopped at surrogate boundary */
4996 errorCode
= U_ZERO_ERROR
;
4999 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+3),off
,TRUE
, &errorCode
);
5000 if (UOut
[0] != 0xD801 || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 3)
5002 log_err("Unexpected results on chopped at surrogate boundary \n");
5005 /* unpaired surrogate plus valid Unichar */
5013 errorCode
= U_ZERO_ERROR
;
5016 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+6),off
,TRUE
, &errorCode
);
5017 if (UOut
[0] != 0xD801 || UOut
[1] != 0xC9D0 || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 2 || pLIn
!= LIn
+ 6)
5019 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5022 /* unpaired surrogate plus chopped Unichar */
5030 errorCode
= U_ZERO_ERROR
;
5033 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
5034 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 5)
5036 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5039 /* unpaired surrogate plus valid non-Unichar */
5047 errorCode
= U_ZERO_ERROR
;
5050 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
5051 if (UOut
[0] != 0xD801 || UOut
[1] != 0x1B || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 2 || pLIn
!= LIn
+ 5)
5053 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5056 /* unpaired surrogate plus chopped non-Unichar */
5063 errorCode
= U_ZERO_ERROR
;
5066 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+4),off
,TRUE
, &errorCode
);
5068 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 4)
5070 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5074 ucnv_close(cnv
); /* final cleanup */
5078 static void TestJitterbug255()
5080 const uint8_t testBytes
[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5081 const uint8_t *testBuffer
= testBytes
;
5082 const uint8_t *testEnd
= testBytes
+ sizeof(testBytes
);
5083 UErrorCode status
= U_ZERO_ERROR
;
5085 UConverter
*cnv
= 0;
5087 cnv
= ucnv_open("shift-jis", &status
);
5088 if (U_FAILURE(status
) || cnv
== 0) {
5089 log_data_err("Failed to open the converter for SJIS.\n");
5092 while (testBuffer
!= testEnd
)
5094 result
= ucnv_getNextUChar (cnv
, (const char **)&testBuffer
, (const char *)testEnd
, &status
);
5095 if (U_FAILURE(status
))
5097 log_err("Failed to convert the next UChar for SJIS.\n");
5104 static void TestEBCDICUS4XML()
5106 UChar unicodes_x
[] = {0x0000, 0x0000, 0x0000, 0x0000};
5107 static const UChar toUnicodeMaps_x
[] = {0x000A, 0x000A, 0x000D, 0x0000};
5108 static const char fromUnicodeMaps_x
[] = {0x25, 0x25, 0x0D, 0x00};
5109 static const char newLines_x
[] = {0x25, 0x15, 0x0D, 0x00};
5110 char target_x
[] = {0x00, 0x00, 0x00, 0x00};
5111 UChar
*unicodes
= unicodes_x
;
5112 const UChar
*toUnicodeMaps
= toUnicodeMaps_x
;
5113 char *target
= target_x
;
5114 const char* fromUnicodeMaps
= fromUnicodeMaps_x
, *newLines
= newLines_x
;
5115 UErrorCode status
= U_ZERO_ERROR
;
5116 UConverter
*cnv
= 0;
5118 cnv
= ucnv_open("ebcdic-xml-us", &status
);
5119 if (U_FAILURE(status
) || cnv
== 0) {
5120 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5123 ucnv_toUnicode(cnv
, &unicodes
, unicodes
+3, (const char**)&newLines
, newLines
+3, NULL
, TRUE
, &status
);
5124 if (U_FAILURE(status
) || memcmp(unicodes_x
, toUnicodeMaps
, sizeof(UChar
)*3) != 0) {
5125 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5126 u_errorName(status
));
5127 printUSeqErr(unicodes_x
, 3);
5128 printUSeqErr(toUnicodeMaps
, 3);
5130 status
= U_ZERO_ERROR
;
5131 ucnv_fromUnicode(cnv
, &target
, target
+3, (const UChar
**)&toUnicodeMaps
, toUnicodeMaps
+3, NULL
, TRUE
, &status
);
5132 if (U_FAILURE(status
) || memcmp(target_x
, fromUnicodeMaps
, sizeof(char)*3) != 0) {
5133 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5134 u_errorName(status
));
5135 printSeqErr((const unsigned char*)target_x
, 3);
5136 printSeqErr((const unsigned char*)fromUnicodeMaps
, 3);
5141 #if !UCONFIG_NO_COLLATION
5143 static void TestJitterbug981(){
5145 int32_t rules_length
, target_cap
, bytes_needed
;
5146 UErrorCode status
= U_ZERO_ERROR
;
5147 UConverter
*utf8cnv
;
5148 UCollator
* myCollator
;
5151 utf8cnv
= ucnv_open ("utf8", &status
);
5152 if(U_FAILURE(status
)){
5153 log_err("Could not open UTF-8 converter. Error: %s", u_errorName(status
));
5156 myCollator
= ucol_open("zh", &status
);
5157 if(U_FAILURE(status
)){
5158 log_err("Could not open collator for zh locale. Error: %s", u_errorName(status
));
5162 rules
= ucol_getRules(myCollator
, &rules_length
);
5166 ucnv_reset(utf8cnv
);
5167 status
= U_ZERO_ERROR
;
5168 bytes_needed
= ucnv_fromUChars(utf8cnv
, buff
, target_cap
,
5169 rules
, rules_length
, &status
);
5170 target_cap
= (bytes_needed
> target_cap
) ? bytes_needed
: target_cap
+1;
5171 if(numNeeded
!=0 && numNeeded
!= bytes_needed
){
5172 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5174 numNeeded
= bytes_needed
;
5175 } while (status
== U_BUFFER_OVERFLOW_ERROR
);
5176 ucol_close(myCollator
);
5177 ucnv_close(utf8cnv
);
5182 static void TestJitterbug1293(){
5183 UChar src
[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5185 UErrorCode status
= U_ZERO_ERROR
;
5186 UConverter
* conv
=NULL
;
5187 int32_t target_cap
, bytes_needed
, numNeeded
= 0;
5188 conv
= ucnv_open("shift-jis",&status
);
5189 if(U_FAILURE(status
)){
5190 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status
));
5196 bytes_needed
= ucnv_fromUChars(conv
,target
,256,src
,u_strlen(src
),&status
);
5197 target_cap
= (bytes_needed
> target_cap
) ? bytes_needed
: target_cap
+1;
5198 if(numNeeded
!=0 && numNeeded
!= bytes_needed
){
5199 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5201 numNeeded
= bytes_needed
;
5202 } while (status
== U_BUFFER_OVERFLOW_ERROR
);
5203 if(U_FAILURE(status
)){
5204 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status
));