1 /********************************************************************
3 * Copyright (c) 1997-2006, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /*******************************************************************************
10 * Modification History:
12 * Steven R. Loomis 7/8/1999 Adding input buffer test
13 ********************************************************************************
17 #include "unicode/uloc.h"
18 #include "unicode/ucnv.h"
19 #include "unicode/ucnv_err.h"
21 #include "unicode/utypes.h"
22 #include "unicode/ustring.h"
23 #include "unicode/ucol.h"
26 static void TestNextUChar(UConverter
* cnv
, const char* source
, const char* limit
, const int32_t results
[], const char* message
);
27 static void TestNextUCharError(UConverter
* cnv
, const char* source
, const char* limit
, UErrorCode expected
, const char* message
);
28 #if !UCONFIG_NO_COLLATION
29 static void TestJitterbug981(void);
31 static void TestJitterbug1293(void);
32 static void TestNewConvertWithBufferSizes(int32_t osize
, int32_t isize
) ;
33 static void TestConverterTypesAndStarters(void);
34 static void TestAmbiguous(void);
35 static void TestSignatureDetection(void);
36 static void TestUTF7(void);
37 static void TestIMAP(void);
38 static void TestUTF8(void);
39 static void TestCESU8(void);
40 static void TestUTF16(void);
41 static void TestUTF16BE(void);
42 static void TestUTF16LE(void);
43 static void TestUTF32(void);
44 static void TestUTF32BE(void);
45 static void TestUTF32LE(void);
46 static void TestLATIN1(void);
48 #if !UCONFIG_NO_LEGACY_CONVERSION
49 static void TestSBCS(void);
50 static void TestDBCS(void);
51 static void TestMBCS(void);
53 #ifdef U_ENABLE_GENERIC_ISO_2022
54 static void TestISO_2022(void);
57 static void TestISO_2022_JP(void);
58 static void TestISO_2022_JP_1(void);
59 static void TestISO_2022_JP_2(void);
60 static void TestISO_2022_KR(void);
61 static void TestISO_2022_KR_1(void);
62 static void TestISO_2022_CN(void);
63 static void TestISO_2022_CN_EXT(void);
64 static void TestJIS(void);
65 static void TestHZ(void);
68 static void TestSCSU(void);
70 #if !UCONFIG_NO_LEGACY_CONVERSION
71 static void TestEBCDIC_STATEFUL(void);
72 static void TestGB18030(void);
73 static void TestLMBCS(void);
74 static void TestJitterbug255(void);
75 static void TestEBCDICUS4XML(void);
76 static void TestJitterbug915(void);
77 static void TestISCII(void);
79 static void TestCoverageMBCS(void);
80 static void TestJitterbug2346(void);
81 static void TestJitterbug2411(void);
84 static void TestRoundTrippingAllUTF(void);
85 static void TestConv(const uint16_t in
[],
91 void addTestNewConvert(TestNode
** root
);
93 /* open a converter, using test data if it begins with '@' */
94 static UConverter
*my_ucnv_open(const char *cnv
, UErrorCode
*err
);
97 #define NEW_MAX_BUFFER 999
99 static int32_t gInBufferSize
= NEW_MAX_BUFFER
;
100 static int32_t gOutBufferSize
= NEW_MAX_BUFFER
;
101 static char gNuConvTestName
[1024];
103 #define nct_min(x,y) ((x<y) ? x : y)
105 static UConverter
*my_ucnv_open(const char *cnv
, UErrorCode
*err
)
107 if(cnv
&& cnv
[0] == '@') {
108 return ucnv_openPackage(loadTestData(err
), cnv
+1, err
);
110 return ucnv_open(cnv
, err
);
114 static void printSeq(const unsigned char* a
, int len
)
119 log_verbose("0x%02x ", a
[i
++]);
123 static void printUSeq(const UChar
* a
, int len
)
127 while (i
<len
) log_verbose("0x%04x ", a
[i
++]);
131 static void printSeqErr(const unsigned char* a
, int len
)
134 fprintf(stderr
, "{");
136 fprintf(stderr
, "0x%02x ", a
[i
++]);
137 fprintf(stderr
, "}\n");
140 static void printUSeqErr(const UChar
* a
, int len
)
143 fprintf(stderr
, "{U+");
145 fprintf(stderr
, "0x%04x ", a
[i
++]);
146 fprintf(stderr
,"}\n");
150 TestNextUChar(UConverter
* cnv
, const char* source
, const char* limit
, const int32_t results
[], const char* message
)
153 const char* s
=(char*)source
;
154 const int32_t *r
=results
;
155 UErrorCode errorCode
=U_ZERO_ERROR
;
160 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
161 if(errorCode
==U_INDEX_OUTOFBOUNDS_ERROR
) {
162 break; /* no more significant input */
163 } else if(U_FAILURE(errorCode
)) {
164 log_err("%s ucnv_getNextUChar() failed: %s\n", message
, u_errorName(errorCode
));
167 /* test the expected number of input bytes only if >=0 */
168 (*r
>=0 && (int32_t)(s
-s0
)!=*r
) ||
171 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
172 message
, c
, (s
-s0
), *(r
+1), *r
);
180 TestNextUCharError(UConverter
* cnv
, const char* source
, const char* limit
, UErrorCode expected
, const char* message
)
182 const char* s
=(char*)source
;
183 UErrorCode errorCode
=U_ZERO_ERROR
;
185 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
186 if(errorCode
!= expected
){
187 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected
), message
, myErrorName(errorCode
));
189 if(c
!= 0xFFFD && c
!= 0xffff){
190 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message
, c
);
195 static void TestInBufSizes(void)
197 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,1);
199 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,2);
200 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,3);
201 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,4);
202 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,5);
203 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,6);
204 TestNewConvertWithBufferSizes(1,1);
205 TestNewConvertWithBufferSizes(2,3);
206 TestNewConvertWithBufferSizes(3,2);
210 static void TestOutBufSizes(void)
213 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,NEW_MAX_BUFFER
);
214 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER
);
215 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER
);
216 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER
);
217 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER
);
218 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER
);
224 void addTestNewConvert(TestNode
** root
)
226 addTest(root
, &TestInBufSizes
, "tsconv/nucnvtst/TestInBufSizes");
227 addTest(root
, &TestOutBufSizes
, "tsconv/nucnvtst/TestOutBufSizes");
228 addTest(root
, &TestConverterTypesAndStarters
, "tsconv/nucnvtst/TestConverterTypesAndStarters");
229 addTest(root
, &TestAmbiguous
, "tsconv/nucnvtst/TestAmbiguous");
230 addTest(root
, &TestSignatureDetection
, "tsconv/nucnvtst/TestSignatureDetection");
231 addTest(root
, &TestUTF7
, "tsconv/nucnvtst/TestUTF7");
232 addTest(root
, &TestIMAP
, "tsconv/nucnvtst/TestIMAP");
233 addTest(root
, &TestUTF8
, "tsconv/nucnvtst/TestUTF8");
235 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
236 addTest(root
, &TestCESU8
, "tsconv/nucnvtst/TestCESU8");
237 addTest(root
, &TestUTF16
, "tsconv/nucnvtst/TestUTF16");
238 addTest(root
, &TestUTF16BE
, "tsconv/nucnvtst/TestUTF16BE");
239 addTest(root
, &TestUTF16LE
, "tsconv/nucnvtst/TestUTF16LE");
240 addTest(root
, &TestUTF32
, "tsconv/nucnvtst/TestUTF32");
241 addTest(root
, &TestUTF32BE
, "tsconv/nucnvtst/TestUTF32BE");
242 addTest(root
, &TestUTF32LE
, "tsconv/nucnvtst/TestUTF32LE");
244 #if !UCONFIG_NO_LEGACY_CONVERSION
245 addTest(root
, &TestLMBCS
, "tsconv/nucnvtst/TestLMBCS");
248 addTest(root
, &TestLATIN1
, "tsconv/nucnvtst/TestLATIN1");
250 #if !UCONFIG_NO_LEGACY_CONVERSION
251 addTest(root
, &TestSBCS
, "tsconv/nucnvtst/TestSBCS");
252 addTest(root
, &TestDBCS
, "tsconv/nucnvtst/TestDBCS");
253 addTest(root
, &TestMBCS
, "tsconv/nucnvtst/TestMBCS");
255 #ifdef U_ENABLE_GENERIC_ISO_2022
256 addTest(root
, &TestISO_2022
, "tsconv/nucnvtst/TestISO_2022");
259 addTest(root
, &TestISO_2022_JP
, "tsconv/nucnvtst/TestISO_2022_JP");
260 addTest(root
, &TestJIS
, "tsconv/nucnvtst/TestJIS");
261 addTest(root
, &TestISO_2022_JP_1
, "tsconv/nucnvtst/TestISO_2022_JP_1");
262 addTest(root
, &TestISO_2022_JP_2
, "tsconv/nucnvtst/TestISO_2022_JP_2");
263 addTest(root
, &TestISO_2022_KR
, "tsconv/nucnvtst/TestISO_2022_KR");
264 addTest(root
, &TestISO_2022_KR_1
, "tsconv/nucnvtst/TestISO_2022_KR_1");
265 addTest(root
, &TestISO_2022_CN
, "tsconv/nucnvtst/TestISO_2022_CN");
266 addTest(root
, &TestISO_2022_CN_EXT
, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
267 addTest(root
, &TestJitterbug915
, "tsconv/nucnvtst/TestJitterbug915");
268 addTest(root
, &TestHZ
, "tsconv/nucnvtst/TestHZ");
271 addTest(root
, &TestSCSU
, "tsconv/nucnvtst/TestSCSU");
273 #if !UCONFIG_NO_LEGACY_CONVERSION
274 addTest(root
, &TestEBCDIC_STATEFUL
, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
275 addTest(root
, &TestGB18030
, "tsconv/nucnvtst/TestGB18030");
276 addTest(root
, &TestJitterbug255
, "tsconv/nucnvtst/TestJitterbug255");
277 addTest(root
, &TestEBCDICUS4XML
, "tsconv/nucnvtst/TestEBCDICUS4XML");
278 addTest(root
, &TestISCII
, "tsconv/nucnvtst/TestISCII");
280 #if !UCONFIG_NO_COLLATION
281 addTest(root
, &TestJitterbug981
, "tsconv/nucnvtst/TestJitterbug981");
284 addTest(root
, &TestJitterbug1293
, "tsconv/nucnvtst/TestJitterbug1293");
288 #if !UCONFIG_NO_LEGACY_CONVERSION
289 addTest(root
, &TestCoverageMBCS
, "tsconv/nucnvtst/TestCoverageMBCS");
292 addTest(root
, &TestRoundTrippingAllUTF
, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
294 #if !UCONFIG_NO_LEGACY_CONVERSION
295 addTest(root
, &TestJitterbug2346
, "tsconv/nucnvtst/TestJitterbug2346");
296 addTest(root
, &TestJitterbug2411
, "tsconv/nucnvtst/TestJitterbug2411");
302 /* Note that this test already makes use of statics, so it's not really
304 This convenience function lets us make the error messages actually useful.
307 static void setNuConvTestName(const char *codepage
, const char *direction
)
309 sprintf(gNuConvTestName
, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
313 (int)gOutBufferSize
);
318 TC_OK
= 0, /* test was OK */
319 TC_MISMATCH
= 1, /* Match failed - err was printed */
320 TC_FAIL
= 2 /* Test failed, don't print an err because it was already printed. */
321 } ETestConvertResult
;
323 /* Note: This function uses global variables and it will not do offset
324 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
325 static ETestConvertResult
testConvertFromU( const UChar
*source
, int sourceLen
, const uint8_t *expect
, int expectLen
,
326 const char *codepage
, const int32_t *expectOffsets
, UBool useFallback
)
328 UErrorCode status
= U_ZERO_ERROR
;
329 UConverter
*conv
= 0;
330 char junkout
[NEW_MAX_BUFFER
]; /* FIX */
331 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
338 int32_t realBufferSize
;
340 const UChar
*realSourceEnd
;
341 const UChar
*sourceLimit
;
342 UBool checkOffsets
= TRUE
;
345 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
346 junkout
[i
] = (char)0xF0;
347 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
350 setNuConvTestName(codepage
, "FROM");
352 log_verbose("\n========= %s\n", gNuConvTestName
);
354 conv
= my_ucnv_open(codepage
, &status
);
356 if(U_FAILURE(status
))
358 log_data_err("Couldn't open converter %s\n",codepage
);
362 ucnv_setFallback(conv
,useFallback
);
365 log_verbose("Converter opened..\n");
371 realBufferSize
= (sizeof(junkout
)/sizeof(junkout
[0]));
372 realBufferEnd
= junkout
+ realBufferSize
;
373 realSourceEnd
= source
+ sourceLen
;
375 if ( gOutBufferSize
!= realBufferSize
|| gInBufferSize
!= NEW_MAX_BUFFER
)
376 checkOffsets
= FALSE
;
380 end
= nct_min(targ
+ gOutBufferSize
, realBufferEnd
);
381 sourceLimit
= nct_min(src
+ gInBufferSize
, realSourceEnd
);
383 doFlush
= (UBool
)(sourceLimit
== realSourceEnd
);
385 if(targ
== realBufferEnd
) {
386 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ
, gNuConvTestName
);
389 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src
,sourceLimit
, targ
,end
, doFlush
?"TRUE":"FALSE");
392 status
= U_ZERO_ERROR
;
394 ucnv_fromUnicode (conv
,
399 checkOffsets
? offs
: NULL
,
400 doFlush
, /* flush if we're at the end of the input data */
402 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && sourceLimit
< realSourceEnd
) );
404 if(U_FAILURE(status
)) {
405 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage
, myErrorName(status
), gNuConvTestName
);
409 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
410 sourceLen
, targ
-junkout
);
415 char offset_str
[9999];
420 for(ptr
= junkout
;ptr
<targ
;ptr
++) {
421 sprintf(junk
+ strlen(junk
), "0x%02x, ", (int)(0xFF & *ptr
));
422 sprintf(offset_str
+ strlen(offset_str
), "0x%02x, ", (int)(0xFF & junokout
[ptr
-junkout
]));
426 printSeq((const uint8_t *)expect
, expectLen
);
427 if ( checkOffsets
) {
428 log_verbose("\nOffsets:");
429 log_verbose(offset_str
);
435 if(expectLen
!= targ
-junkout
) {
436 log_err("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
437 log_verbose("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
439 printSeqErr((const unsigned char*)junkout
, (int32_t)(targ
-junkout
));
440 printf("\nExpected:");
441 printSeqErr((const unsigned char*)expect
, expectLen
);
445 if (checkOffsets
&& (expectOffsets
!= 0) ) {
446 log_verbose("comparing %d offsets..\n", targ
-junkout
);
447 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t) )){
448 log_err("did not get the expected offsets. %s\n", gNuConvTestName
);
449 printSeqErr((const unsigned char*)junkout
, (int32_t)(targ
-junkout
));
452 for(p
=junkout
;p
<targ
;p
++) {
453 log_err("%d,", junokout
[p
-junkout
]);
456 log_err("Expected: ");
457 for(i
=0; i
<(targ
-junkout
); i
++) {
458 log_err("%d,", expectOffsets
[i
]);
464 log_verbose("comparing..\n");
465 if(!memcmp(junkout
, expect
, expectLen
)) {
466 log_verbose("Matches!\n");
469 log_err("String does not match u->%s\n", gNuConvTestName
);
470 printUSeqErr(source
, sourceLen
);
472 printSeqErr((const unsigned char *)junkout
, expectLen
);
473 printf("\nExpected:");
474 printSeqErr((const unsigned char *)expect
, expectLen
);
480 /* Note: This function uses global variables and it will not do offset
481 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
482 static ETestConvertResult
testConvertToU( const uint8_t *source
, int sourcelen
, const UChar
*expect
, int expectlen
,
483 const char *codepage
, const int32_t *expectOffsets
, UBool useFallback
)
485 UErrorCode status
= U_ZERO_ERROR
;
486 UConverter
*conv
= 0;
487 UChar junkout
[NEW_MAX_BUFFER
]; /* FIX */
488 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
490 const char *realSourceEnd
;
491 const char *srcLimit
;
497 UBool checkOffsets
= TRUE
;
499 int32_t realBufferSize
;
500 UChar
*realBufferEnd
;
503 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
506 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
509 setNuConvTestName(codepage
, "TO");
511 log_verbose("\n========= %s\n", gNuConvTestName
);
513 conv
= my_ucnv_open(codepage
, &status
);
515 if(U_FAILURE(status
))
517 log_data_err("Couldn't open converter %s\n",gNuConvTestName
);
521 ucnv_setFallback(conv
,useFallback
);
523 log_verbose("Converter opened..\n");
525 src
= (const char *)source
;
529 realBufferSize
= (sizeof(junkout
)/sizeof(junkout
[0]));
530 realBufferEnd
= junkout
+ realBufferSize
;
531 realSourceEnd
= src
+ sourcelen
;
533 if ( gOutBufferSize
!= realBufferSize
|| gInBufferSize
!= NEW_MAX_BUFFER
)
534 checkOffsets
= FALSE
;
538 end
= nct_min( targ
+ gOutBufferSize
, realBufferEnd
);
539 srcLimit
= nct_min(realSourceEnd
, src
+ gInBufferSize
);
541 if(targ
== realBufferEnd
)
543 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ
,gNuConvTestName
);
546 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ
,end
);
548 /* oldTarg = targ; */
550 status
= U_ZERO_ERROR
;
552 ucnv_toUnicode (conv
,
557 checkOffsets
? offs
: NULL
,
558 (UBool
)(srcLimit
== realSourceEnd
), /* flush if we're at the end of hte source data */
561 /* offs += (targ-oldTarg); */
563 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (srcLimit
< realSourceEnd
)) ); /* while we just need another buffer */
565 if(U_FAILURE(status
))
567 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage
, myErrorName(status
), gNuConvTestName
);
571 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
572 sourcelen
, targ
-junkout
);
576 char offset_str
[9999];
582 for(ptr
= junkout
;ptr
<targ
;ptr
++)
584 sprintf(junk
+ strlen(junk
), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr
);
585 sprintf(offset_str
+ strlen(offset_str
), "0x%04x, ", (0xFFFF) & (unsigned int)junokout
[ptr
-junkout
]);
589 printUSeq(expect
, expectlen
);
592 log_verbose("\nOffsets:");
593 log_verbose(offset_str
);
599 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen
,expectlen
*2);
601 if (checkOffsets
&& (expectOffsets
!= 0))
603 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t))){
604 log_err("did not get the expected offsets. %s\n",gNuConvTestName
);
606 for(p
=junkout
;p
<targ
;p
++) {
607 log_err("%d,", junokout
[p
-junkout
]);
610 log_err("Expected: ");
611 for(i
=0; i
<(targ
-junkout
); i
++) {
612 log_err("%d,", expectOffsets
[i
]);
616 for(i
=0; i
<(targ
-junkout
); i
++) {
617 log_err("%X,", junkout
[i
]);
621 for(i
=0; i
<(src
-(const char *)source
); i
++) {
622 log_err("%X,", (unsigned char)source
[i
]);
628 if(!memcmp(junkout
, expect
, expectlen
*2))
630 log_verbose("Matches!\n");
635 log_err("String does not match. %s\n", gNuConvTestName
);
636 log_verbose("String does not match. %s\n", gNuConvTestName
);
638 printUSeqErr(junkout
, expectlen
);
639 printf("\nExpected:");
640 printUSeqErr(expect
, expectlen
);
646 static void TestNewConvertWithBufferSizes(int32_t outsize
, int32_t insize
)
649 /* 1 2 3 1Han 2Han 3Han . */
650 static const UChar sampleText
[] =
651 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E };
654 static const uint8_t expectedUTF8
[] =
655 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
656 static const int32_t toUTF8Offs
[] =
657 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07};
658 static const int32_t fmUTF8Offs
[] =
659 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };
661 #ifdef U_ENABLE_GENERIC_ISO_2022
662 /* Same as UTF8, but with ^[%B preceeding */
663 static const const uint8_t expectedISO2022
[] =
664 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
665 static const int32_t toISO2022Offs
[] =
666 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
667 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
668 static const int32_t fmISO2022Offs
[] =
669 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
672 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
673 static const uint8_t expectedIBM930
[] =
674 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B };
675 static const int32_t toIBM930Offs
[] =
676 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, };
677 static const int32_t fmIBM930Offs
[] =
678 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c};
680 /* 1 2 3 0 h1 h2 h3 . MBCS*/
681 static const uint8_t expectedIBM943
[] =
682 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e };
683 static const int32_t toIBM943Offs
[] =
684 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07 };
685 static const int32_t fmIBM943Offs
[] =
686 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a};
688 /* 1 2 3 0 h1 h2 h3 . DBCS*/
689 static const uint8_t expectedIBM9027
[] =
690 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe};
691 static const int32_t toIBM9027Offs
[] =
692 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
694 /* 1 2 3 0 <?> <?> <?> . SBCS*/
695 static const uint8_t expectedIBM920
[] =
696 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e };
697 static const int32_t toIBM920Offs
[] =
698 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
700 /* 1 2 3 0 <?> <?> <?> . SBCS*/
701 static const uint8_t expectedISO88593
[] =
702 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
703 static const int32_t toISO88593Offs
[] =
704 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
706 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
707 static const uint8_t expectedLATIN1
[] =
708 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
709 static const int32_t toLATIN1Offs
[] =
710 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
714 static const uint8_t expectedUTF16BE
[] =
715 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e };
716 static const int32_t toUTF16BEOffs
[]=
717 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
718 static const int32_t fmUTF16BEOffs
[] =
719 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e };
721 static const uint8_t expectedUTF16LE
[] =
722 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00 };
723 static const int32_t toUTF16LEOffs
[]=
724 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
725 static const int32_t fmUTF16LEOffs
[] =
726 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e };
728 static const uint8_t expectedUTF32BE
[] =
729 { 0x00, 0x00, 0x00, 0x31,
730 0x00, 0x00, 0x00, 0x32,
731 0x00, 0x00, 0x00, 0x33,
732 0x00, 0x00, 0x00, 0x00,
733 0x00, 0x00, 0x4e, 0x00,
734 0x00, 0x00, 0x4e, 0x8c,
735 0x00, 0x00, 0x4e, 0x09,
736 0x00, 0x00, 0x00, 0x2e };
737 static const int32_t toUTF32BEOffs
[]=
738 { 0x00, 0x00, 0x00, 0x00,
739 0x01, 0x01, 0x01, 0x01,
740 0x02, 0x02, 0x02, 0x02,
741 0x03, 0x03, 0x03, 0x03,
742 0x04, 0x04, 0x04, 0x04,
743 0x05, 0x05, 0x05, 0x05,
744 0x06, 0x06, 0x06, 0x06,
745 0x07, 0x07, 0x07, 0x07,
746 0x08, 0x08, 0x08, 0x08 };
747 static const int32_t fmUTF32BEOffs
[] =
748 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c };
750 static const uint8_t expectedUTF32LE
[] =
751 { 0x31, 0x00, 0x00, 0x00,
752 0x32, 0x00, 0x00, 0x00,
753 0x33, 0x00, 0x00, 0x00,
754 0x00, 0x00, 0x00, 0x00,
755 0x00, 0x4e, 0x00, 0x00,
756 0x8c, 0x4e, 0x00, 0x00,
757 0x09, 0x4e, 0x00, 0x00,
758 0x2e, 0x00, 0x00, 0x00 };
759 static const int32_t toUTF32LEOffs
[]=
760 { 0x00, 0x00, 0x00, 0x00,
761 0x01, 0x01, 0x01, 0x01,
762 0x02, 0x02, 0x02, 0x02,
763 0x03, 0x03, 0x03, 0x03,
764 0x04, 0x04, 0x04, 0x04,
765 0x05, 0x05, 0x05, 0x05,
766 0x06, 0x06, 0x06, 0x06,
767 0x07, 0x07, 0x07, 0x07,
768 0x08, 0x08, 0x08, 0x08 };
769 static const int32_t fmUTF32LEOffs
[] =
770 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c };
775 /** Test chars #2 **/
777 /* Sahha [health], slashed h's */
778 static const UChar malteseUChars
[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
779 static const uint8_t expectedMaltese913
[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
782 static const UChar LMBCSUChars
[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
783 static const uint8_t expectedLMBCS
[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
784 static const int32_t toLMBCSOffs
[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
785 static const int32_t fmLMBCSOffs
[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
786 /*********************************** START OF CODE finally *************/
788 gInBufferSize
= insize
;
789 gOutBufferSize
= outsize
;
791 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize
, gOutBufferSize
);
795 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
796 expectedUTF8
, sizeof(expectedUTF8
), "UTF8", toUTF8Offs
,FALSE
);
798 log_verbose("Test surrogate behaviour for UTF8\n");
800 static const UChar testinput
[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
801 static const uint8_t expectedUTF8test2
[]= { 0xe2, 0x82, 0xac,
802 0xf0, 0x90, 0x90, 0x81,
805 static const int32_t offsets
[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
806 testConvertFromU(testinput
, sizeof(testinput
)/sizeof(testinput
[0]),
807 expectedUTF8test2
, sizeof(expectedUTF8test2
), "UTF8", offsets
,FALSE
);
812 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
814 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
815 expectedISO2022
, sizeof(expectedISO2022
), "ISO_2022", toISO2022Offs
,FALSE
);
819 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
820 expectedUTF16LE
, sizeof(expectedUTF16LE
), "utf-16le", toUTF16LEOffs
,FALSE
);
822 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
823 expectedUTF16BE
, sizeof(expectedUTF16BE
), "utf-16be", toUTF16BEOffs
,FALSE
);
825 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
826 expectedUTF32LE
, sizeof(expectedUTF32LE
), "utf-32le", toUTF32LEOffs
,FALSE
);
828 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
829 expectedUTF32BE
, sizeof(expectedUTF32BE
), "utf-32be", toUTF32BEOffs
,FALSE
);
832 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
833 expectedLATIN1
, sizeof(expectedLATIN1
), "LATIN_1", toLATIN1Offs
,FALSE
);
835 #if !UCONFIG_NO_LEGACY_CONVERSION
837 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
838 expectedIBM930
, sizeof(expectedIBM930
), "ibm-930", toIBM930Offs
,FALSE
);
840 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
841 expectedISO88593
, sizeof(expectedISO88593
), "iso-8859-3", toISO88593Offs
,FALSE
);
845 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
846 expectedIBM943
, sizeof(expectedIBM943
), "ibm-943", toIBM943Offs
,FALSE
);
848 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
849 expectedIBM9027
, sizeof(expectedIBM9027
), "@ibm9027", toIBM9027Offs
,FALSE
);
851 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
852 expectedIBM920
, sizeof(expectedIBM920
), "ibm-920", toIBM920Offs
,FALSE
);
854 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
855 expectedISO88593
, sizeof(expectedISO88593
), "iso-8859-3", toISO88593Offs
,FALSE
);
862 testConvertToU(expectedUTF8
, sizeof(expectedUTF8
),
863 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf8", fmUTF8Offs
,FALSE
);
864 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
866 testConvertToU(expectedISO2022
, sizeof(expectedISO2022
),
867 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "ISO_2022", fmISO2022Offs
,FALSE
);
871 testConvertToU(expectedUTF16LE
, sizeof(expectedUTF16LE
),
872 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf-16le", fmUTF16LEOffs
,FALSE
);
874 testConvertToU(expectedUTF16BE
, sizeof(expectedUTF16BE
),
875 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf-16be", fmUTF16BEOffs
,FALSE
);
877 testConvertToU(expectedUTF32LE
, sizeof(expectedUTF32LE
),
878 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf-32le", fmUTF32LEOffs
,FALSE
);
880 testConvertToU(expectedUTF32BE
, sizeof(expectedUTF32BE
),
881 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf-32be", fmUTF32BEOffs
,FALSE
);
883 #if !UCONFIG_NO_LEGACY_CONVERSION
885 testConvertToU(expectedIBM930
, sizeof(expectedIBM930
),
886 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "ibm-930", fmIBM930Offs
,FALSE
);
888 testConvertToU(expectedIBM943
, sizeof(expectedIBM943
),
889 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "ibm-943", fmIBM943Offs
,FALSE
);
892 /* Try it again to make sure it still works */
893 testConvertToU(expectedUTF16LE
, sizeof(expectedUTF16LE
),
894 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf-16le", fmUTF16LEOffs
,FALSE
);
896 #if !UCONFIG_NO_LEGACY_CONVERSION
897 testConvertToU(expectedMaltese913
, sizeof(expectedMaltese913
),
898 malteseUChars
, sizeof(malteseUChars
)/sizeof(malteseUChars
[0]), "latin3", NULL
,FALSE
);
900 testConvertFromU(malteseUChars
, sizeof(malteseUChars
)/sizeof(malteseUChars
[0]),
901 expectedMaltese913
, sizeof(expectedMaltese913
), "iso-8859-3", NULL
,FALSE
);
904 testConvertFromU(LMBCSUChars
, sizeof(LMBCSUChars
)/sizeof(LMBCSUChars
[0]),
905 expectedLMBCS
, sizeof(expectedLMBCS
), "LMBCS-1", toLMBCSOffs
,FALSE
);
906 testConvertToU(expectedLMBCS
, sizeof(expectedLMBCS
),
907 LMBCSUChars
, sizeof(LMBCSUChars
)/sizeof(LMBCSUChars
[0]), "LMBCS-1", fmLMBCSOffs
,FALSE
);
910 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
912 /* encode directly set D and set O */
913 static const uint8_t utf7
[] = {
920 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
921 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
923 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
925 static const UChar unicode
[] = {
927 Hi Mom -<WHITE SMILING FACE>-!
928 A<NOT IDENTICAL TO><ALPHA>.
930 [Japanese word "nihongo"]
932 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
933 0x41, 0x2262, 0x0391, 0x2e,
935 0x65e5, 0x672c, 0x8a9e
937 static const int32_t toUnicodeOffsets
[] = {
938 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
943 static const int32_t fromUnicodeOffsets
[] = {
944 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
945 11, 12, 12, 12, 13, 13, 13, 13, 14,
947 16, 16, 16, 17, 17, 17, 18, 18, 18
950 /* same but escaping set O (the exclamation mark) */
951 static const uint8_t utf7Restricted
[] = {
958 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
959 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
961 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
963 static const int32_t toUnicodeOffsetsR
[] = {
964 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
969 static const int32_t fromUnicodeOffsetsR
[] = {
970 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
971 11, 12, 12, 12, 13, 13, 13, 13, 14,
973 16, 16, 16, 17, 17, 17, 18, 18, 18
976 testConvertFromU(unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, utf7
, sizeof(utf7
), "UTF-7", fromUnicodeOffsets
,FALSE
);
978 testConvertToU(utf7
, sizeof(utf7
), unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, "UTF-7", toUnicodeOffsets
,FALSE
);
980 testConvertFromU(unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, utf7Restricted
, sizeof(utf7Restricted
), "UTF-7,version=1", fromUnicodeOffsetsR
,FALSE
);
982 testConvertToU(utf7Restricted
, sizeof(utf7Restricted
), unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, "UTF-7,version=1", toUnicodeOffsetsR
,FALSE
);
986 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
987 * modified according to RFC 2060,
988 * and supplemented with the one example in RFC 2060 itself.
991 static const uint8_t imap
[] = {
1002 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1003 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1005 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1007 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1008 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1009 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1010 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1012 static const UChar unicode
[] = {
1013 /* Hi Mom -<WHITE SMILING FACE>-!
1014 A<NOT IDENTICAL TO><ALPHA>.
1016 [Japanese word "nihongo"]
1023 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1024 0x41, 0x2262, 0x0391, 0x2e,
1026 0x65e5, 0x672c, 0x8a9e,
1028 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1029 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1030 0x2f, 0x65e5, 0x672c, 0x8a9e,
1031 0x2f, 0x53f0, 0x5317
1033 static const int32_t toUnicodeOffsets
[] = {
1034 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1039 38, 39, 40, 41, 42, 43,
1044 static const int32_t fromUnicodeOffsets
[] = {
1045 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1046 11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1048 16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1050 20, 21, 22, 23, 24, 25,
1052 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1053 35, 36, 36, 36, 37, 37, 37, 37, 37
1056 testConvertFromU(unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, imap
, sizeof(imap
), "IMAP-mailbox-name", fromUnicodeOffsets
,FALSE
);
1058 testConvertToU(imap
, sizeof(imap
), unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, "IMAP-mailbox-name", toUnicodeOffsets
,FALSE
);
1061 /* Test UTF-8 bad data handling*/
1063 static const uint8_t utf8
[]={
1065 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1068 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1069 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1070 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */
1071 0xdf, 0xbf, /* 7ff */
1072 0xbf, /* truncated tail */
1073 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */
1077 static const uint16_t utf8Expected
[]={
1091 static const int32_t utf8Offsets
[]={
1092 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
1094 testConvertToU(utf8
, sizeof(utf8
),
1095 utf8Expected
, sizeof(utf8Expected
)/sizeof(utf8Expected
[0]), "utf-8", utf8Offsets
,FALSE
);
1099 /* Test UTF-32BE bad data handling*/
1101 static const uint8_t utf32
[]={
1102 0x00, 0x00, 0x00, 0x61,
1103 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
1104 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1105 0x00, 0x00, 0x00, 0x62,
1106 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1107 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
1108 0x00, 0x00, 0x01, 0x62,
1109 0x00, 0x00, 0x02, 0x62
1111 static const uint16_t utf32Expected
[]={
1113 0xfffd, /* 0x110000 out of range */
1114 0xDBFF, /* 0x10FFFF in range */
1117 0xfffd, /* 0xffffffff out of range */
1118 0xfffd, /* 0x7fffffff out of range */
1122 static const int32_t utf32Offsets
[]={
1123 0, 4, 8, 8, 12, 16, 20, 24, 28
1125 static const uint8_t utf32ExpectedBack
[]={
1126 0x00, 0x00, 0x00, 0x61,
1127 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */
1128 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1129 0x00, 0x00, 0x00, 0x62,
1130 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */
1131 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */
1132 0x00, 0x00, 0x01, 0x62,
1133 0x00, 0x00, 0x02, 0x62
1135 static const int32_t utf32OffsetsBack
[]={
1146 testConvertToU(utf32
, sizeof(utf32
),
1147 utf32Expected
, sizeof(utf32Expected
)/sizeof(utf32Expected
[0]), "utf-32be", utf32Offsets
,FALSE
);
1148 testConvertFromU(utf32Expected
, sizeof(utf32Expected
)/sizeof(utf32Expected
[0]),
1149 utf32ExpectedBack
, sizeof(utf32ExpectedBack
), "utf-32be", utf32OffsetsBack
, FALSE
);
1152 /* Test UTF-32LE bad data handling*/
1154 static const uint8_t utf32
[]={
1155 0x61, 0x00, 0x00, 0x00,
1156 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
1157 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1158 0x62, 0x00, 0x00, 0x00,
1159 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1160 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
1161 0x62, 0x01, 0x00, 0x00,
1162 0x62, 0x02, 0x00, 0x00,
1165 static const uint16_t utf32Expected
[]={
1167 0xfffd, /* 0x110000 out of range */
1168 0xDBFF, /* 0x10FFFF in range */
1171 0xfffd, /* 0xffffffff out of range */
1172 0xfffd, /* 0x7fffffff out of range */
1176 static const int32_t utf32Offsets
[]={
1177 0, 4, 8, 8, 12, 16, 20, 24, 28
1179 static const uint8_t utf32ExpectedBack
[]={
1180 0x61, 0x00, 0x00, 0x00,
1181 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */
1182 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1183 0x62, 0x00, 0x00, 0x00,
1184 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */
1185 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */
1186 0x62, 0x01, 0x00, 0x00,
1187 0x62, 0x02, 0x00, 0x00
1189 static const int32_t utf32OffsetsBack
[]={
1199 testConvertToU(utf32
, sizeof(utf32
),
1200 utf32Expected
, sizeof(utf32Expected
)/sizeof(utf32Expected
[0]), "utf-32le", utf32Offsets
,FALSE
);
1201 testConvertFromU(utf32Expected
, sizeof(utf32Expected
)/sizeof(utf32Expected
[0]),
1202 utf32ExpectedBack
, sizeof(utf32ExpectedBack
), "utf-32le", utf32OffsetsBack
, FALSE
);
1206 static void TestCoverageMBCS(){
1208 UErrorCode status
= U_ZERO_ERROR
;
1209 const char *directory
= loadTestData(&status
);
1210 char* tdpath
= NULL
;
1211 char* saveDirectory
= (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1212 int len
= strlen(directory
);
1215 tdpath
= (char*) malloc(sizeof(char) * (len
* 2));
1216 uprv_strcpy(saveDirectory
,u_getDataDirectory());
1217 log_verbose("Retrieved data directory %s \n",saveDirectory
);
1218 uprv_strcpy(tdpath
,directory
);
1219 index
=strrchr(tdpath
,(char)U_FILE_SEP_CHAR
);
1221 if((unsigned int)(index
-tdpath
) != (strlen(tdpath
)-1)){
1224 u_setDataDirectory(tdpath
);
1225 log_verbose("ICU data directory is set to: %s \n" ,tdpath
);
1228 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
1229 which is test file for MBCS conversion with single-byte codepage data.*/
1232 /* MBCS with single byte codepage data test1.ucm*/
1233 const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1234 const uint8_t expectedtest1
[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1235 int32_t totest1Offs
[] = { 0, 1, 2, 3, 5, };
1238 testConvertFromU(unicodeInput
, sizeof(unicodeInput
)/sizeof(unicodeInput
[0]),
1239 expectedtest1
, sizeof(expectedtest1
), "@test1", totest1Offs
,FALSE
);
1242 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
1243 which is test file for MBCS conversion with three-byte codepage data.*/
1246 /* MBCS with three byte codepage data test3.ucm*/
1247 const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1248 const uint8_t expectedtest3
[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,};
1249 int32_t totest3Offs
[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1251 const uint8_t test3input
[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1252 const UChar expectedUnicode
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1253 int32_t fromtest3Offs
[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1256 testConvertFromU(unicodeInput
, sizeof(unicodeInput
)/sizeof(unicodeInput
[0]),
1257 expectedtest3
, sizeof(expectedtest3
), "@test3", totest3Offs
,FALSE
);
1260 testConvertToU(test3input
, sizeof(test3input
),
1261 expectedUnicode
, sizeof(expectedUnicode
)/sizeof(expectedUnicode
[0]), "@test3", fromtest3Offs
,FALSE
);
1265 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
1266 which is test file for MBCS conversion with four-byte codepage data.*/
1269 /* MBCS with three byte codepage data test4.ucm*/
1270 static const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1271 static const uint8_t expectedtest4
[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,};
1272 static const int32_t totest4Offs
[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1274 static const uint8_t test4input
[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1275 static const UChar expectedUnicode
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1276 static const int32_t fromtest4Offs
[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1279 testConvertFromU(unicodeInput
, sizeof(unicodeInput
)/sizeof(unicodeInput
[0]),
1280 expectedtest4
, sizeof(expectedtest4
), "@test4", totest4Offs
,FALSE
);
1283 testConvertToU(test4input
, sizeof(test4input
),
1284 expectedUnicode
, sizeof(expectedUnicode
)/sizeof(expectedUnicode
[0]), "@test4", fromtest4Offs
,FALSE
);
1289 /* restore the original data directory */
1290 log_verbose("Setting the data directory to %s \n", saveDirectory
);
1291 u_setDataDirectory(saveDirectory
);
1292 free(saveDirectory
);
1297 static void TestConverterType(const char *convName
, UConverterType convType
) {
1298 UConverter
* myConverter
;
1299 UErrorCode err
= U_ZERO_ERROR
;
1301 myConverter
= my_ucnv_open(convName
, &err
);
1303 if (U_FAILURE(err
)) {
1304 log_data_err("Failed to create an %s converter\n", convName
);
1309 if (ucnv_getType(myConverter
)!=convType
) {
1310 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1311 convName
, convType
);
1314 log_verbose("ucnv_getType %s ok\n", convName
);
1317 ucnv_close(myConverter
);
1320 static void TestConverterTypesAndStarters()
1322 #if !UCONFIG_NO_LEGACY_CONVERSION
1323 UConverter
* myConverter
;
1324 UErrorCode err
= U_ZERO_ERROR
;
1325 UBool mystarters
[256];
1327 /* const UBool expectedKSCstarters[256] = {
1328 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1329 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1330 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1331 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1332 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1333 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1334 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1335 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1336 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1337 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1338 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1339 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1340 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1341 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1342 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1343 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1344 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1345 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1346 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1347 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1348 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1349 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1350 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1351 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1352 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1353 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1356 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types.");
1358 myConverter
= ucnv_open("ksc", &err
);
1359 if (U_FAILURE(err
)) {
1360 log_data_err("Failed to create an ibm-ksc converter\n");
1365 if (ucnv_getType(myConverter
)!=UCNV_MBCS
)
1366 log_err("ucnv_getType Failed for ibm-949\n");
1368 log_verbose("ucnv_getType ibm-949 ok\n");
1370 if(myConverter
!=NULL
)
1371 ucnv_getStarters(myConverter
, mystarters
, &err
);
1373 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1374 log_err("Failed ucnv_getStarters for ksc\n");
1376 log_verbose("ucnv_getStarters ok\n");*/
1379 ucnv_close(myConverter
);
1381 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL
);
1382 TestConverterType("ibm-878", UCNV_SBCS
);
1385 TestConverterType("iso-8859-1", UCNV_LATIN_1
);
1387 TestConverterType("ibm-1208", UCNV_UTF8
);
1389 TestConverterType("utf-8", UCNV_UTF8
);
1390 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian
);
1391 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian
);
1392 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian
);
1393 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian
);
1395 #if !UCONFIG_NO_LEGACY_CONVERSION
1397 #if defined(U_ENABLE_GENERIC_ISO_2022)
1398 TestConverterType("iso-2022", UCNV_ISO_2022
);
1401 TestConverterType("hz", UCNV_HZ
);
1404 TestConverterType("scsu", UCNV_SCSU
);
1406 #if !UCONFIG_NO_LEGACY_CONVERSION
1407 TestConverterType("x-iscii-de", UCNV_ISCII
);
1410 TestConverterType("ascii", UCNV_US_ASCII
);
1411 TestConverterType("utf-7", UCNV_UTF7
);
1412 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX
);
1413 TestConverterType("bocu-1", UCNV_BOCU1
);
1417 TestAmbiguousConverter(UConverter
*cnv
) {
1418 static const char inBytes
[2]={ 0x61, 0x5c };
1419 UChar outUnicode
[20]={ 0, 0, 0, 0 };
1423 UErrorCode errorCode
;
1426 /* try to convert an 'a' and a US-ASCII backslash */
1427 errorCode
=U_ZERO_ERROR
;
1430 ucnv_toUnicode(cnv
, &u
, u
+20, &s
, s
+2, NULL
, TRUE
, &errorCode
);
1431 if(U_FAILURE(errorCode
)) {
1432 /* we do not care about general failures in this test; the input may just not be mappable */
1436 if(outUnicode
[0]!=0x61 || outUnicode
[1]==0xfffd) {
1437 /* not an ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1441 isAmbiguous
=ucnv_isAmbiguous(cnv
);
1443 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1444 if((outUnicode
[1]!=0x5c)!=isAmbiguous
) {
1445 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1446 ucnv_getName(cnv
, &errorCode
), outUnicode
[1]!=0x5c, isAmbiguous
);
1450 if(outUnicode
[1]!=0x5c) {
1451 /* needs fixup, fix it */
1452 ucnv_fixFileSeparator(cnv
, outUnicode
, (int32_t)(u
-outUnicode
));
1453 if(outUnicode
[1]!=0x5c) {
1454 /* the fix failed */
1455 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv
, &errorCode
));
1461 static void TestAmbiguous()
1463 UErrorCode status
= U_ZERO_ERROR
;
1464 UConverter
*ascii_cnv
= 0, *sjis_cnv
= 0, *cnv
;
1465 static const char target
[] = {
1466 /* "\\usr\\local\\share\\data\\icutest.txt" */
1467 0x5c, 0x75, 0x73, 0x72,
1468 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1469 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1470 0x5c, 0x64, 0x61, 0x74, 0x61,
1471 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1474 UChar asciiResult
[200], sjisResult
[200];
1475 int32_t asciiLength
= 0, sjisLength
= 0, i
;
1478 /* enumerate all converters */
1479 status
=U_ZERO_ERROR
;
1480 for(i
=0; (name
=ucnv_getAvailableName(i
))!=NULL
; ++i
) {
1481 cnv
=ucnv_open(name
, &status
);
1482 if(U_SUCCESS(status
)) {
1483 TestAmbiguousConverter(cnv
);
1486 log_err("error: unable to open available converter \"%s\"\n", name
);
1487 status
=U_ZERO_ERROR
;
1491 #if !UCONFIG_NO_LEGACY_CONVERSION
1492 sjis_cnv
= ucnv_open("ibm-943", &status
);
1493 if (U_FAILURE(status
))
1495 log_data_err("Failed to create a SJIS converter\n");
1498 ascii_cnv
= ucnv_open("LATIN-1", &status
);
1499 if (U_FAILURE(status
))
1501 log_data_err("Failed to create a LATIN-1 converter\n");
1502 ucnv_close(sjis_cnv
);
1505 /* convert target from SJIS to Unicode */
1506 sjisLength
= ucnv_toUChars(sjis_cnv
, sjisResult
, sizeof(sjisResult
)/U_SIZEOF_UCHAR
, target
, (int32_t)strlen(target
), &status
);
1507 if (U_FAILURE(status
))
1509 log_err("Failed to convert the SJIS string.\n");
1510 ucnv_close(sjis_cnv
);
1511 ucnv_close(ascii_cnv
);
1514 /* convert target from Latin-1 to Unicode */
1515 asciiLength
= ucnv_toUChars(ascii_cnv
, asciiResult
, sizeof(asciiResult
)/U_SIZEOF_UCHAR
, target
, (int32_t)strlen(target
), &status
);
1516 if (U_FAILURE(status
))
1518 log_err("Failed to convert the Latin-1 string.\n");
1520 ucnv_close(sjis_cnv
);
1521 ucnv_close(ascii_cnv
);
1524 if (!ucnv_isAmbiguous(sjis_cnv
))
1526 log_err("SJIS converter should contain ambiguous character mappings.\n");
1529 ucnv_close(sjis_cnv
);
1530 ucnv_close(ascii_cnv
);
1533 if (u_strcmp(sjisResult
, asciiResult
) == 0)
1535 log_err("File separators for SJIS don't need to be fixed.\n");
1537 ucnv_fixFileSeparator(sjis_cnv
, sjisResult
, sjisLength
);
1538 if (u_strcmp(sjisResult
, asciiResult
) != 0)
1540 log_err("Fixing file separator for SJIS failed.\n");
1542 ucnv_close(sjis_cnv
);
1543 ucnv_close(ascii_cnv
);
1548 TestSignatureDetection(){
1549 /* with null terminated strings */
1551 static const char* data
[] = {
1552 "\xFE\xFF\x00\x00", /* UTF-16BE */
1553 "\xFF\xFE\x00\x00", /* UTF-16LE */
1554 "\xEF\xBB\xBF\x00", /* UTF-8 */
1555 "\x0E\xFE\xFF\x00", /* SCSU */
1557 "\xFE\xFF", /* UTF-16BE */
1558 "\xFF\xFE", /* UTF-16LE */
1559 "\xEF\xBB\xBF", /* UTF-8 */
1560 "\x0E\xFE\xFF", /* SCSU */
1562 "\xFE\xFF\x41\x42", /* UTF-16BE */
1563 "\xFF\xFE\x41\x41", /* UTF-16LE */
1564 "\xEF\xBB\xBF\x41", /* UTF-8 */
1565 "\x0E\xFE\xFF\x41", /* SCSU */
1567 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */
1568 "\x2B\x2F\x76\x38\x41", /* UTF-7 */
1569 "\x2B\x2F\x76\x39\x41", /* UTF-7 */
1570 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */
1571 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */
1573 "\xDD\x73\x66\x73" /* UTF-EBCDIC */
1575 static const char* expected
[] = {
1598 static const int32_t expectedLength
[] ={
1623 int32_t signatureLength
= -1;
1624 const char* source
= NULL
;
1625 const char* enc
= NULL
;
1626 for( ; i
<sizeof(data
)/sizeof(char*); i
++){
1629 enc
= ucnv_detectUnicodeSignature(source
, -1 , &signatureLength
, &err
);
1631 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source
,i
,u_errorName(err
));
1634 if(enc
== NULL
|| strcmp(enc
,expected
[i
]) !=0){
1635 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source
,i
,expected
[i
],enc
);
1638 if(signatureLength
!= expectedLength
[i
]){
1639 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source
,i
,signatureLength
,expectedLength
[i
]);
1644 static const char* data
[] = {
1645 "\xFE\xFF\x00", /* UTF-16BE */
1646 "\xFF\xFE\x00", /* UTF-16LE */
1647 "\xEF\xBB\xBF\x00", /* UTF-8 */
1648 "\x0E\xFE\xFF\x00", /* SCSU */
1649 "\x00\x00\xFE\xFF", /* UTF-32BE */
1650 "\xFF\xFE\x00\x00", /* UTF-32LE */
1651 "\xFE\xFF", /* UTF-16BE */
1652 "\xFF\xFE", /* UTF-16LE */
1653 "\xEF\xBB\xBF", /* UTF-8 */
1654 "\x0E\xFE\xFF", /* SCSU */
1655 "\x00\x00\xFE\xFF", /* UTF-32BE */
1656 "\xFF\xFE\x00\x00", /* UTF-32LE */
1657 "\xFE\xFF\x41\x42", /* UTF-16BE */
1658 "\xFF\xFE\x41\x41", /* UTF-16LE */
1659 "\xEF\xBB\xBF\x41", /* UTF-8 */
1660 "\x0E\xFE\xFF\x41", /* SCSU */
1661 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1662 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1663 "\xFB\xEE\x28", /* BOCU-1 */
1664 "\xFF\x41\x42" /* NULL */
1666 static const int len
[] = {
1689 static const char* expected
[] = {
1711 static const int32_t expectedLength
[] ={
1735 int32_t signatureLength
= -1;
1736 int32_t sourceLength
=-1;
1737 const char* source
= NULL
;
1738 const char* enc
= NULL
;
1739 for( ; i
<sizeof(data
)/sizeof(char*); i
++){
1742 sourceLength
= len
[i
];
1743 enc
= ucnv_detectUnicodeSignature(source
, sourceLength
, &signatureLength
, &err
);
1745 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source
,i
,u_errorName(err
));
1748 if(enc
== NULL
|| strcmp(enc
,expected
[i
]) !=0){
1749 if(expected
[i
] !=NULL
){
1750 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source
,i
,expected
[i
],enc
);
1754 if(signatureLength
!= expectedLength
[i
]){
1755 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source
,i
,signatureLength
,expectedLength
[i
]);
1764 static const uint8_t in
[]={
1765 /* H - +Jjo- - ! +- +2AHcAQ */
1768 0x2b, 0x4a, 0x6a, 0x6f,
1772 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1775 /* expected test results */
1776 static const int32_t results
[]={
1777 /* number of bytes read, code point */
1780 4, 0x263a, /* <WHITE SMILING FACE> */
1787 const char *cnvName
;
1788 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
1789 UErrorCode errorCode
=U_ZERO_ERROR
;
1790 UConverter
*cnv
=ucnv_open("UTF-7", &errorCode
);
1791 if(U_FAILURE(errorCode
)) {
1792 log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode
)); /* sholdn't be a data err */
1795 TestNextUChar(cnv
, source
, limit
, results
, "UTF-7");
1796 /* Test the condition when source >= sourceLimit */
1797 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1798 cnvName
= ucnv_getName(cnv
, &errorCode
);
1799 if (U_FAILURE(errorCode
) || uprv_strcmp(cnvName
, "UTF-7") != 0) {
1800 log_err("UTF-7 converter is called %s: %s\n", cnvName
, u_errorName(errorCode
));
1808 static const uint8_t in
[]={
1809 /* H - &Jjo- - ! &- &2AHcAQ- \ */
1812 0x26, 0x4a, 0x6a, 0x6f,
1816 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1819 /* expected test results */
1820 static const int32_t results
[]={
1821 /* number of bytes read, code point */
1824 4, 0x263a, /* <WHITE SMILING FACE> */
1831 const char *cnvName
;
1832 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
1833 UErrorCode errorCode
=U_ZERO_ERROR
;
1834 UConverter
*cnv
=ucnv_open("IMAP-mailbox-name", &errorCode
);
1835 if(U_FAILURE(errorCode
)) {
1836 log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode
)); /* sholdn't be a data err */
1839 TestNextUChar(cnv
, source
, limit
, results
, "IMAP-mailbox-name");
1840 /* Test the condition when source >= sourceLimit */
1841 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1842 cnvName
= ucnv_getName(cnv
, &errorCode
);
1843 if (U_FAILURE(errorCode
) || uprv_strcmp(cnvName
, "IMAP-mailbox-name") != 0) {
1844 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName
, u_errorName(errorCode
));
1852 static const uint8_t in
[]={
1856 0xf0, 0x90, 0x80, 0x80,
1857 0xf4, 0x84, 0x8c, 0xa1,
1858 0xf0, 0x90, 0x90, 0x81
1861 /* expected test results */
1862 static const int32_t results
[]={
1863 /* number of bytes read, code point */
1872 /* error test input */
1873 static const uint8_t in2
[]={
1875 0xc0, 0x80, /* illegal non-shortest form */
1876 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1877 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1878 0xc0, 0xc0, /* illegal trail byte */
1879 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1880 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1881 0xfe, /* illegal byte altogether */
1885 /* expected error test results */
1886 static const int32_t results2
[]={
1887 /* number of bytes read, code point */
1892 UConverterToUCallback cb
;
1895 const char *source
=(const char *)in
,*limit
=(const char *)in
+sizeof(in
);
1896 UErrorCode errorCode
=U_ZERO_ERROR
;
1897 UConverter
*cnv
=ucnv_open("UTF-8", &errorCode
);
1898 if(U_FAILURE(errorCode
)) {
1899 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode
));
1902 TestNextUChar(cnv
, source
, limit
, results
, "UTF-8");
1903 /* Test the condition when source >= sourceLimit */
1904 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1906 /* test error behavior with a skip callback */
1907 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
1908 source
=(const char *)in2
;
1909 limit
=(const char *)(in2
+sizeof(in2
));
1910 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-8");
1916 static TestCESU8() {
1918 static const uint8_t in
[]={
1922 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1923 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1924 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1928 /* expected test results */
1929 static const int32_t results
[]={
1930 /* number of bytes read, code point */
1936 -1,0xd802, /* may read 3 or 6 bytes */
1937 -1,0x10ffff,/* may read 0 or 3 bytes */
1941 /* error test input */
1942 static const uint8_t in2
[]={
1944 0xc0, 0x80, /* illegal non-shortest form */
1945 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1946 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1947 0xc0, 0xc0, /* illegal trail byte */
1948 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */
1949 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */
1950 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */
1951 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1952 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1953 0xfe, /* illegal byte altogether */
1957 /* expected error test results */
1958 static const int32_t results2
[]={
1959 /* number of bytes read, code point */
1964 UConverterToUCallback cb
;
1967 const char *source
=(const char *)in
,*limit
=(const char *)in
+sizeof(in
);
1968 UErrorCode errorCode
=U_ZERO_ERROR
;
1969 UConverter
*cnv
=ucnv_open("CESU-8", &errorCode
);
1970 if(U_FAILURE(errorCode
)) {
1971 log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode
));
1974 TestNextUChar(cnv
, source
, limit
, results
, "CESU-8");
1975 /* Test the condition when source >= sourceLimit */
1976 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1978 /* test error behavior with a skip callback */
1979 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
1980 source
=(const char *)in2
;
1981 limit
=(const char *)(in2
+sizeof(in2
));
1982 TestNextUChar(cnv
, source
, limit
, results2
, "CESU-8");
1988 static TestUTF16() {
1990 static const uint8_t in1
[]={
1991 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
1993 static const uint8_t in2
[]={
1994 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
1996 static const uint8_t in3
[]={
1997 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2000 /* expected test results */
2001 static const int32_t results1
[]={
2002 /* number of bytes read, code point */
2006 static const int32_t results2
[]={
2007 /* number of bytes read, code point */
2011 static const int32_t results3
[]={
2012 /* number of bytes read, code point */
2019 const char *source
, *limit
;
2021 UErrorCode errorCode
=U_ZERO_ERROR
;
2022 UConverter
*cnv
=ucnv_open("UTF-16", &errorCode
);
2023 if(U_FAILURE(errorCode
)) {
2024 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode
));
2028 source
=(const char *)in1
, limit
=(const char *)in1
+sizeof(in1
);
2029 TestNextUChar(cnv
, source
, limit
, results1
, "UTF-16");
2031 source
=(const char *)in2
, limit
=(const char *)in2
+sizeof(in2
);
2032 ucnv_resetToUnicode(cnv
);
2033 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-16");
2035 source
=(const char *)in3
, limit
=(const char *)in3
+sizeof(in3
);
2036 ucnv_resetToUnicode(cnv
);
2037 TestNextUChar(cnv
, source
, limit
, results3
, "UTF-16");
2039 /* Test the condition when source >= sourceLimit */
2040 ucnv_resetToUnicode(cnv
);
2041 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2047 static TestUTF16BE() {
2049 static const uint8_t in
[]={
2055 0xd8, 0x01, 0xdc, 0x01
2058 /* expected test results */
2059 static const int32_t results
[]={
2060 /* number of bytes read, code point */
2069 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2070 UErrorCode errorCode
=U_ZERO_ERROR
;
2071 UConverter
*cnv
=ucnv_open("utf-16be", &errorCode
);
2072 if(U_FAILURE(errorCode
)) {
2073 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode
));
2076 TestNextUChar(cnv
, source
, limit
, results
, "UTF-16BE");
2077 /* Test the condition when source >= sourceLimit */
2078 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2079 /*Test for the condition where there is an invalid character*/
2081 static const uint8_t source2
[]={0x61};
2082 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2083 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an invalid character");
2087 * Test disabled because currently the UTF-16BE/LE converters are supposed
2088 * to not set errors for unpaired surrogates.
2089 * This may change with
2090 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2093 /*Test for the condition where there is a surrogate pair*/
2095 const uint8_t source2
[]={0xd8, 0x01};
2096 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an truncated surrogate character");
2105 static const uint8_t in
[]={
2110 0x01, 0xd8, 0x01, 0xdc
2113 /* expected test results */
2114 static const int32_t results
[]={
2115 /* number of bytes read, code point */
2123 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2124 UErrorCode errorCode
=U_ZERO_ERROR
;
2125 UConverter
*cnv
=ucnv_open("utf-16le", &errorCode
);
2126 if(U_FAILURE(errorCode
)) {
2127 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode
));
2130 TestNextUChar(cnv
, source
, limit
, results
, "UTF-16LE");
2131 /* Test the condition when source >= sourceLimit */
2132 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2133 /*Test for the condition where there is an invalid character*/
2135 static const uint8_t source2
[]={0x61};
2136 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2137 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an invalid character");
2141 * Test disabled because currently the UTF-16BE/LE converters are supposed
2142 * to not set errors for unpaired surrogates.
2143 * This may change with
2144 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2147 /*Test for the condition where there is a surrogate character*/
2149 static const uint8_t source2
[]={0x01, 0xd8};
2150 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an truncated surrogate character");
2158 static TestUTF32() {
2160 static const uint8_t in1
[]={
2161 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff
2163 static const uint8_t in2
[]={
2164 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00
2166 static const uint8_t in3
[]={
2167 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01
2170 /* expected test results */
2171 static const int32_t results1
[]={
2172 /* number of bytes read, code point */
2176 static const int32_t results2
[]={
2177 /* number of bytes read, code point */
2181 static const int32_t results3
[]={
2182 /* number of bytes read, code point */
2185 4, 0xfffd, /* unmatched surrogate */
2186 4, 0xfffd /* unmatched surrogate */
2189 const char *source
, *limit
;
2191 UErrorCode errorCode
=U_ZERO_ERROR
;
2192 UConverter
*cnv
=ucnv_open("UTF-32", &errorCode
);
2193 if(U_FAILURE(errorCode
)) {
2194 log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode
));
2198 source
=(const char *)in1
, limit
=(const char *)in1
+sizeof(in1
);
2199 TestNextUChar(cnv
, source
, limit
, results1
, "UTF-32");
2201 source
=(const char *)in2
, limit
=(const char *)in2
+sizeof(in2
);
2202 ucnv_resetToUnicode(cnv
);
2203 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32");
2205 source
=(const char *)in3
, limit
=(const char *)in3
+sizeof(in3
);
2206 ucnv_resetToUnicode(cnv
);
2207 TestNextUChar(cnv
, source
, limit
, results3
, "UTF-32");
2209 /* Test the condition when source >= sourceLimit */
2210 ucnv_resetToUnicode(cnv
);
2211 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2219 static const uint8_t in
[]={
2220 0x00, 0x00, 0x00, 0x61,
2221 0x00, 0x00, 0x30, 0x61,
2222 0x00, 0x00, 0xdc, 0x00,
2223 0x00, 0x00, 0xd8, 0x00,
2224 0x00, 0x00, 0xdf, 0xff,
2225 0x00, 0x00, 0xff, 0xfe,
2226 0x00, 0x10, 0xab, 0xcd,
2227 0x00, 0x10, 0xff, 0xff
2230 /* expected test results */
2231 static const int32_t results
[]={
2232 /* number of bytes read, code point */
2243 /* error test input */
2244 static const uint8_t in2
[]={
2245 0x00, 0x00, 0x00, 0x61,
2246 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
2247 0x00, 0x00, 0x00, 0x62,
2248 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2249 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
2250 0x00, 0x00, 0x01, 0x62,
2251 0x00, 0x00, 0x02, 0x62
2254 /* expected error test results */
2255 static const int32_t results2
[]={
2256 /* number of bytes read, code point */
2263 UConverterToUCallback cb
;
2266 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2267 UErrorCode errorCode
=U_ZERO_ERROR
;
2268 UConverter
*cnv
=ucnv_open("UTF-32BE", &errorCode
);
2269 if(U_FAILURE(errorCode
)) {
2270 log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode
));
2273 TestNextUChar(cnv
, source
, limit
, results
, "UTF-32BE");
2275 /* Test the condition when source >= sourceLimit */
2276 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2278 /* test error behavior with a skip callback */
2279 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
2280 source
=(const char *)in2
;
2281 limit
=(const char *)(in2
+sizeof(in2
));
2282 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32BE");
2290 static const uint8_t in
[]={
2291 0x61, 0x00, 0x00, 0x00,
2292 0x61, 0x30, 0x00, 0x00,
2293 0x00, 0xdc, 0x00, 0x00,
2294 0x00, 0xd8, 0x00, 0x00,
2295 0xff, 0xdf, 0x00, 0x00,
2296 0xfe, 0xff, 0x00, 0x00,
2297 0xcd, 0xab, 0x10, 0x00,
2298 0xff, 0xff, 0x10, 0x00
2301 /* expected test results */
2302 static const int32_t results
[]={
2303 /* number of bytes read, code point */
2314 /* error test input */
2315 static const uint8_t in2
[]={
2316 0x61, 0x00, 0x00, 0x00,
2317 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
2318 0x62, 0x00, 0x00, 0x00,
2319 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2320 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
2321 0x62, 0x01, 0x00, 0x00,
2322 0x62, 0x02, 0x00, 0x00,
2325 /* expected error test results */
2326 static const int32_t results2
[]={
2327 /* number of bytes read, code point */
2334 UConverterToUCallback cb
;
2337 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2338 UErrorCode errorCode
=U_ZERO_ERROR
;
2339 UConverter
*cnv
=ucnv_open("UTF-32LE", &errorCode
);
2340 if(U_FAILURE(errorCode
)) {
2341 log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode
));
2344 TestNextUChar(cnv
, source
, limit
, results
, "UTF-32LE");
2346 /* Test the condition when source >= sourceLimit */
2347 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2349 /* test error behavior with a skip callback */
2350 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
2351 source
=(const char *)in2
;
2352 limit
=(const char *)(in2
+sizeof(in2
));
2353 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32LE");
2361 static const uint8_t in
[]={
2370 /* expected test results */
2371 static const int32_t results
[]={
2372 /* number of bytes read, code point */
2380 static const uint16_t in1
[] = {
2381 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2382 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2383 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2384 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2385 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2386 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2387 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2388 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2389 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2390 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2391 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2394 static const uint8_t out1
[] = {
2395 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2396 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2397 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2398 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2399 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2400 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2401 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2402 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2403 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2404 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2405 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2408 static const uint16_t in2
[]={
2409 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2410 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2411 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2412 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2413 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2414 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2415 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2416 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2417 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2418 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2419 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2420 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2421 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2422 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2423 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2424 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2425 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2426 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2427 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2428 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2429 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2430 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2431 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2432 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2433 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2434 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2435 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2436 0x37, 0x20, 0x2A, 0x2F,
2438 static const unsigned char out2
[]={
2439 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2440 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2441 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2442 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2443 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2444 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2445 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2446 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2447 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2448 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2449 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2450 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2451 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2452 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2453 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2454 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2455 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2456 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2457 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2458 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2459 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2460 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2461 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2462 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2463 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2464 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2465 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2466 0x37, 0x20, 0x2A, 0x2F,
2468 const char *source
=(const char *)in
;
2469 const char *limit
=(const char *)in
+sizeof(in
);
2471 UErrorCode errorCode
=U_ZERO_ERROR
;
2472 UConverter
*cnv
=ucnv_open("LATIN_1", &errorCode
);
2473 if(U_FAILURE(errorCode
)) {
2474 log_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode
));
2477 TestNextUChar(cnv
, source
, limit
, results
, "LATIN_1");
2478 /* Test the condition when source >= sourceLimit */
2479 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2480 TestConv((uint16_t*)in1
,sizeof(in1
)/2,"LATIN_1","LATIN-1",(char*)out1
,sizeof(out1
));
2481 TestConv((uint16_t*)in2
,sizeof(in2
)/2,"ASCII","ASCII",(char*)out2
,sizeof(out2
));
2489 static const uint8_t in
[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2490 /* expected test results */
2491 static const int32_t results
[]={
2492 /* number of bytes read, code point */
2501 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2502 UErrorCode errorCode
=U_ZERO_ERROR
;
2503 UConverter
*cnv
=ucnv_open("x-mac-turkish", &errorCode
);
2504 if(U_FAILURE(errorCode
)) {
2505 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode
));
2508 TestNextUChar(cnv
, source
, limit
, results
, "SBCS(x-mac-turkish)");
2509 /* Test the condition when source >= sourceLimit */
2510 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2511 /*Test for Illegal character */ /*
2513 static const uint8_t input1[]={ 0xA1 };
2514 const char* illegalsource=(const char*)input1;
2515 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2524 static const uint8_t in
[]={
2533 /* expected test results */
2534 static const int32_t results
[]={
2535 /* number of bytes read, code point */
2543 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2544 UErrorCode errorCode
=U_ZERO_ERROR
;
2546 UConverter
*cnv
=my_ucnv_open("@ibm9027", &errorCode
);
2547 if(U_FAILURE(errorCode
)) {
2548 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode
));
2551 TestNextUChar(cnv
, source
, limit
, results
, "DBCS(@ibm9027)");
2552 /* Test the condition when source >= sourceLimit */
2553 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2554 /*Test for the condition where there is an invalid character*/
2556 static const uint8_t source2
[]={0x1a, 0x1b};
2557 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character");
2559 /*Test for the condition where we have a truncated char*/
2561 static const uint8_t source1
[]={0xc4};
2562 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2563 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2571 static const uint8_t in
[]={
2582 /* expected test results */
2583 static const int32_t results
[]={
2584 /* number of bytes read, code point */
2594 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2595 UErrorCode errorCode
=U_ZERO_ERROR
;
2597 UConverter
*cnv
=ucnv_open("ibm-1363", &errorCode
);
2598 if(U_FAILURE(errorCode
)) {
2599 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode
));
2602 TestNextUChar(cnv
, source
, limit
, results
, "MBCS(ibm-1363)");
2603 /* Test the condition when source >= sourceLimit */
2604 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2605 /*Test for the condition where there is an invalid character*/
2607 static const uint8_t source2
[]={0xa1, 0x01};
2608 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character");
2610 /*Test for the condition where we have a truncated char*/
2612 static const uint8_t source1
[]={0xc4};
2613 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2614 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2620 #ifdef U_ENABLE_GENERIC_ISO_2022
2625 static const uint8_t in
[]={
2632 0xf0, 0x90, 0x80, 0x80
2637 /* expected test results */
2638 static const int32_t results
[]={
2639 /* number of bytes read, code point */
2640 4, 0x0031, /* 4 bytes including the escape sequence */
2648 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2649 UErrorCode errorCode
=U_ZERO_ERROR
;
2652 cnv
=ucnv_open("ISO_2022", &errorCode
);
2653 if(U_FAILURE(errorCode
)) {
2654 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
2657 TestNextUChar(cnv
, source
, limit
, results
, "ISO_2022");
2659 /* Test the condition when source >= sourceLimit */
2660 TestNextUCharError(cnv
, source
, source
-1, U_ILLEGAL_ARGUMENT_ERROR
, "sourceLimit < source");
2661 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2662 /*Test for the condition where we have a truncated char*/
2664 static const uint8_t source1
[]={0xc4};
2665 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2666 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2668 /*Test for the condition where there is an invalid character*/
2670 static const uint8_t source2
[]={0xa1, 0x01};
2671 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ILLEGAL_CHAR_FOUND
, "an invalid character");
2679 TestSmallTargetBuffer(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2680 const UChar
* uSource
;
2681 const UChar
* uSourceLimit
;
2682 const char* cSource
;
2683 const char* cSourceLimit
;
2684 UChar
*uTargetLimit
=NULL
;
2687 const char *cTargetLimit
;
2690 int32_t uBufSize
= 120;
2693 UErrorCode errorCode
=U_ZERO_ERROR
;
2694 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2695 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
2698 uSource
= (UChar
*) source
;
2699 uSourceLimit
=(const UChar
*)sourceLimit
;
2703 cTargetLimit
= cBuf
;
2704 uTargetLimit
= uBuf
;
2708 cTargetLimit
= cTargetLimit
+ i
;
2709 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,FALSE
, &errorCode
);
2710 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2711 errorCode
=U_ZERO_ERROR
;
2715 if(U_FAILURE(errorCode
)){
2716 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2720 }while (uSource
<uSourceLimit
);
2722 cSourceLimit
=cTarget
;
2724 uTargetLimit
=uTargetLimit
+i
;
2725 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,FALSE
,&errorCode
);
2726 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2727 errorCode
=U_ZERO_ERROR
;
2730 if(U_FAILURE(errorCode
)){
2731 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2734 }while(cSource
<cSourceLimit
);
2738 for(len
=0;len
<(int)(source
- sourceLimit
);len
++){
2739 if(uBuf
[len
]!=uSource
[len
]){
2740 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource
[len
],(int)uBuf
[len
]) ;
2747 /* Test for Jitterbug 778 */
2748 static void TestToAndFromUChars(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2749 const UChar
* uSource
;
2750 const UChar
* uSourceLimit
;
2751 const char* cSource
;
2752 UChar
*uTargetLimit
=NULL
;
2755 const char *cTargetLimit
;
2758 int32_t uBufSize
= 120;
2759 int numCharsInTarget
=0;
2760 UErrorCode errorCode
=U_ZERO_ERROR
;
2761 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2762 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
2764 uSourceLimit
=sourceLimit
;
2766 cTargetLimit
= cBuf
+uBufSize
*5;
2768 uTargetLimit
= uBuf
+ uBufSize
*5;
2770 numCharsInTarget
=ucnv_fromUChars(cnv
, cTarget
, (int32_t)(cTargetLimit
-cTarget
), uSource
, (int32_t)(uSourceLimit
-uSource
), &errorCode
);
2771 if(U_FAILURE(errorCode
)){
2772 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2777 ucnv_toUChars(cnv
,uTarget
,(int32_t)(uTargetLimit
-uTarget
),cSource
,numCharsInTarget
,&errorCode
);
2778 if(U_FAILURE(errorCode
)){
2779 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode
));
2783 while(uSource
<uSourceLimit
){
2784 if(*test
!=*uSource
){
2786 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
2795 static void TestSmallSourceBuffer(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2796 const UChar
* uSource
;
2797 const UChar
* uSourceLimit
;
2798 const char* cSource
;
2799 const char* cSourceLimit
;
2800 UChar
*uTargetLimit
=NULL
;
2803 const char *cTargetLimit
;
2806 int32_t uBufSize
= 120;
2809 const UChar
*temp
= sourceLimit
;
2810 UErrorCode errorCode
=U_ZERO_ERROR
;
2811 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2812 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
2816 uSource
= (UChar
*) source
;
2820 cTargetLimit
= cBuf
;
2821 uTargetLimit
= uBuf
+uBufSize
*5;
2822 cTargetLimit
= cTargetLimit
+uBufSize
*10;
2823 uSourceLimit
=uSource
;
2826 if (uSourceLimit
< sourceLimit
) {
2827 uSourceLimit
= uSourceLimit
+1;
2829 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,FALSE
, &errorCode
);
2830 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2831 errorCode
=U_ZERO_ERROR
;
2835 if(U_FAILURE(errorCode
)){
2836 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2840 }while (uSource
<temp
);
2844 if (cSourceLimit
< cBuf
+ (cTarget
- cBuf
)) {
2845 cSourceLimit
= cSourceLimit
+1;
2847 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,FALSE
,&errorCode
);
2848 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2849 errorCode
=U_ZERO_ERROR
;
2852 if(U_FAILURE(errorCode
)){
2853 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2856 }while(cSource
<cTarget
);
2860 for(;len
<(int)(source
- sourceLimit
);len
++){
2861 if(uBuf
[len
]!=uSource
[len
]){
2862 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource
[len
],(int)uBuf
[len
]) ;
2870 TestGetNextUChar2022(UConverter
* cnv
, const char* source
, const char* limit
,
2871 const uint16_t results
[], const char* message
){
2873 const char* s
=(char*)source
;
2874 const uint16_t *r
=results
;
2875 UErrorCode errorCode
=U_ZERO_ERROR
;
2880 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
2881 if(errorCode
==U_INDEX_OUTOFBOUNDS_ERROR
) {
2882 break; /* no more significant input */
2883 } else if(U_FAILURE(errorCode
)) {
2884 log_err("%s ucnv_getNextUChar() failed: %s\n", message
, u_errorName(errorCode
));
2887 if(UTF_IS_FIRST_SURROGATE(*r
)){
2889 UTF_NEXT_CHAR_SAFE(r
, i
, len
, exC
, FALSE
);
2894 if(c
!=(uint32_t)(exC
))
2895 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message
,(uint32_t) (*r
),c
);
2901 static int TestJitterbug930(const char* enc
){
2902 UErrorCode err
= U_ZERO_ERROR
;
2903 UConverter
*converter
;
2907 const UChar
*source
= in
;
2909 int32_t* offsets
= off
;
2910 int numOffWritten
=0;
2912 converter
= my_ucnv_open(enc
, &err
);
2914 in
[0] = 0x41; /* 0x4E00;*/
2919 memset(off
, '*', sizeof(off
));
2921 ucnv_fromUnicode (converter
,
2930 /* writes three bytes into the output buffer: 41 1B 24
2931 * but offsets contains 0 1 1
2933 while(*offsets
< off
[10]){
2937 log_verbose("Testing Jitterbug 930 for encoding %s",enc
);
2938 if(numOffWritten
!= (int)(target
-out
)){
2939 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc
, (int)(target
-out
),numOffWritten
);
2944 memset(off
,'*' , sizeof(off
));
2948 ucnv_fromUnicode (converter
,
2957 while(*offsets
< off
[10]){
2960 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc
,-1,*offsets
) ;
2965 /* writes 42 43 7A into output buffer,
2966 * offsets contains -1 -1 -1
2968 ucnv_close(converter
);
2975 static const uint16_t in
[]={
2976 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
2977 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
2978 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
2979 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
2980 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
2981 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
2982 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
2983 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
2984 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
2985 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
2986 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
2987 0x005A, 0x005B, 0x005C, 0x000A
2989 const UChar
* uSource
;
2990 const UChar
* uSourceLimit
;
2991 const char* cSource
;
2992 const char* cSourceLimit
;
2993 UChar
*uTargetLimit
=NULL
;
2996 const char *cTargetLimit
;
2999 int32_t uBufSize
= 120;
3000 UErrorCode errorCode
=U_ZERO_ERROR
;
3002 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3003 int32_t* myOff
= offsets
;
3004 cnv
=ucnv_open("HZ", &errorCode
);
3005 if(U_FAILURE(errorCode
)) {
3006 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode
));
3010 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3011 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3012 uSource
= (const UChar
*)in
;
3013 uSourceLimit
=(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0]));
3015 cTargetLimit
= cBuf
+uBufSize
*5;
3017 uTargetLimit
= uBuf
+ uBufSize
*5;
3018 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3019 if(U_FAILURE(errorCode
)){
3020 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3024 cSourceLimit
=cTarget
;
3027 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3028 if(U_FAILURE(errorCode
)){
3029 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3032 uSource
= (const UChar
*)in
;
3033 while(uSource
<uSourceLimit
){
3034 if(*test
!=*uSource
){
3036 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3041 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "HZ encoding");
3042 TestSmallTargetBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3043 TestSmallSourceBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3044 TestToAndFromUChars(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3045 TestJitterbug930("csISO2022JP");
3055 static const uint16_t in
[]={
3056 /* test full range of Devanagari */
3057 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3058 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3059 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3060 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3061 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3062 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3063 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3064 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3065 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3066 0x096D,0x096E,0x096F,
3067 /* test Soft halant*/
3068 0x0915,0x094d, 0x200D,
3069 /* test explicit halant */
3070 0x0915,0x094d, 0x200c,
3071 /* test double danda */
3074 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3075 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3076 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3077 /* tests from Lotus */
3078 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3079 0x0930,0x094D,0x200D,
3080 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3081 0x0915,0x0921,0x002B,0x095F,
3083 0x0B86, 0xB87, 0xB88,
3085 0x0C05, 0x0C02, 0x0C03,0x0c31,
3087 0x0C85, 0xC82, 0x0C83,
3088 /* test Abbr sign and Anudatta */
3098 0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3099 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3102 0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3103 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3104 0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3105 0x093D /* Avagraha 0xEA, 0xE9*/,
3113 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3115 static const unsigned char byteArr
[]={
3117 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3118 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3119 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3120 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3121 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3122 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3123 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3124 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3125 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3127 /* test soft halant */
3129 /* test explicit halant */
3131 /* test double danda */
3134 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3135 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3136 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3139 /* tests from Lotus */
3140 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3141 0xEF,0x42,0xCF,0xE8,0xD9,
3142 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3143 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3145 0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3147 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3149 0xEF, 0x48,0xa4, 0xa2, 0xa3,
3150 /* anudatta and abbreviation sign */
3151 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3154 0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3156 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3158 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3160 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3162 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3164 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3166 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3168 0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3170 0xB3, 0xE9, /* Ka + NUKTA */
3172 0xB4, 0xE9, /* Kha + NUKTA */
3174 0xB5, 0xE9, /* Ga + NUKTA */
3186 /* just consume unhandled codepoints */
3190 testConvertToU(byteArr
,(sizeof(byteArr
)),in
,(sizeof(in
)/U_SIZEOF_UCHAR
),"x-iscii-de",NULL
,TRUE
);
3191 TestConv(in
,(sizeof(in
)/2),"ISCII,version=0","hindi", (char *)byteArr
,sizeof(byteArr
));
3198 static const uint16_t in
[]={
3199 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3200 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3201 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3202 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3203 0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3204 0x201D, 0x3014, 0x000D, 0x000A,
3205 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3206 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3208 const UChar
* uSource
;
3209 const UChar
* uSourceLimit
;
3210 const char* cSource
;
3211 const char* cSourceLimit
;
3212 UChar
*uTargetLimit
=NULL
;
3215 const char *cTargetLimit
;
3218 int32_t uBufSize
= 120;
3219 UErrorCode errorCode
=U_ZERO_ERROR
;
3221 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3222 int32_t* myOff
= offsets
;
3223 cnv
=ucnv_open("ISO_2022_JP_1", &errorCode
);
3224 if(U_FAILURE(errorCode
)) {
3225 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode
));
3229 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3230 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3231 uSource
= (const UChar
*)in
;
3232 uSourceLimit
=(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0]));
3234 cTargetLimit
= cBuf
+uBufSize
*5;
3236 uTargetLimit
= uBuf
+ uBufSize
*5;
3237 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3238 if(U_FAILURE(errorCode
)){
3239 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3243 cSourceLimit
=cTarget
;
3246 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3247 if(U_FAILURE(errorCode
)){
3248 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3252 uSource
= (const UChar
*)in
;
3253 while(uSource
<uSourceLimit
){
3254 if(*test
!=*uSource
){
3256 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3262 TestSmallTargetBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3263 TestSmallSourceBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3264 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-JP encoding");
3265 TestToAndFromUChars(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3266 TestJitterbug930("csISO2022JP");
3273 static void TestConv(const uint16_t in
[],int len
, const char* conv
, const char* lang
, char byteArr
[],int byteArrLen
){
3274 const UChar
* uSource
;
3275 const UChar
* uSourceLimit
;
3276 const char* cSource
;
3277 const char* cSourceLimit
;
3278 UChar
*uTargetLimit
=NULL
;
3281 const char *cTargetLimit
;
3284 int32_t uBufSize
= 120*10;
3285 UErrorCode errorCode
=U_ZERO_ERROR
;
3287 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) );
3288 int32_t* myOff
= offsets
;
3289 cnv
=my_ucnv_open(conv
, &errorCode
);
3290 if(U_FAILURE(errorCode
)) {
3291 log_data_err("Unable to open a %s converter: %s\n", conv
, u_errorName(errorCode
));
3295 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
));
3296 cBuf
=(char*)malloc(uBufSize
* sizeof(char));
3297 uSource
= (const UChar
*)in
;
3298 uSourceLimit
=uSource
+len
;
3300 cTargetLimit
= cBuf
+uBufSize
;
3302 uTargetLimit
= uBuf
+ uBufSize
;
3303 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3304 if(U_FAILURE(errorCode
)){
3305 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3308 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3310 cSourceLimit
=cTarget
;
3313 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3314 if(U_FAILURE(errorCode
)){
3315 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode
));
3319 uSource
= (const UChar
*)in
;
3320 while(uSource
<uSourceLimit
){
3321 if(*test
!=*uSource
){
3322 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv
,*uSource
,(int)*test
) ;
3327 TestSmallTargetBuffer(in
,(const UChar
*)&in
[len
],cnv
);
3328 TestSmallSourceBuffer(in
,(const UChar
*)&in
[len
],cnv
);
3329 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, conv
);
3330 if(byteArr
&& byteArrLen
!=0){
3331 TestGetNextUChar2022(cnv
, byteArr
, (byteArr
+byteArrLen
), in
, lang
);
3332 TestToAndFromUChars(in
,(const UChar
*)&in
[len
],cnv
);
3335 cSourceLimit
= cSource
+byteArrLen
;
3338 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3339 if(U_FAILURE(errorCode
)){
3340 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3344 uSource
= (const UChar
*)in
;
3345 while(uSource
<uSourceLimit
){
3346 if(*test
!=*uSource
){
3347 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3360 static UChar U_CALLCONV
3361 _charAt(int32_t offset
, void *context
) {
3362 return ((char*)context
)[offset
];
3366 unescape(UChar
* dst
, int32_t dstLen
,const char* src
,int32_t srcLen
,UErrorCode
*status
){
3369 if(U_FAILURE(*status
)){
3372 if((dst
==NULL
&& dstLen
>0) || (src
==NULL
) || dstLen
< -1 || srcLen
<-1 ){
3373 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
3377 srcLen
= (int32_t)uprv_strlen(src
);
3380 for (; srcIndex
<srcLen
; ) {
3381 UChar32 c
= src
[srcIndex
++];
3382 if (c
== 0x005C /*'\\'*/) {
3383 c
= u_unescapeAt(_charAt
,&srcIndex
,srcLen
,(void*)src
); /* advances i*/
3384 if (c
== (UChar32
)0xFFFFFFFF) {
3385 *status
=U_INVALID_CHAR_FOUND
; /* return empty string */
3386 break; /* invalid escape sequence */
3389 if(dstIndex
< dstLen
){
3391 dst
[dstIndex
++] = UTF16_LEAD(c
);
3392 if(dstIndex
<dstLen
){
3393 dst
[dstIndex
]=UTF16_TRAIL(c
);
3395 *status
=U_BUFFER_OVERFLOW_ERROR
;
3398 dst
[dstIndex
]=(UChar
)c
;
3402 *status
= U_BUFFER_OVERFLOW_ERROR
;
3404 dstIndex
++; /* for preflighting */
3410 TestFullRoundtrip(const char* cp
){
3411 UChar usource
[10] ={0};
3412 UChar nsrc
[10] = {0};
3416 /* Test codepoint 0 */
3417 TestConv(usource
,1,cp
,"",NULL
,0);
3418 TestConv(usource
,2,cp
,"",NULL
,0);
3420 TestConv(nsrc
,3,cp
,"",NULL
,0);
3422 for(;i
<=0x10FFFF;i
++){
3428 usource
[0] =(UChar
) i
;
3431 usource
[0]=UTF16_LEAD(i
);
3432 usource
[1]=UTF16_TRAIL(i
);
3439 /* Test only single code points */
3440 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3441 /* Test codepoint repeated twice */
3442 usource
[ulen
]=usource
[0];
3443 usource
[ulen
+1]=usource
[1];
3445 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3446 /* Test codepoint repeated 3 times */
3447 usource
[ulen
]=usource
[0];
3448 usource
[ulen
+1]=usource
[1];
3450 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3451 /* Test codepoint in between 2 codepoints */
3455 TestConv(nsrc
,len
+2,cp
,"",NULL
,0);
3456 uprv_memset(usource
,0,sizeof(UChar
)*10);
3461 TestRoundTrippingAllUTF(void){
3463 log_verbose("Running exhaustive round trip test for BOCU-1\n");
3464 TestFullRoundtrip("BOCU-1");
3465 log_verbose("Running exhaustive round trip test for SCSU\n");
3466 TestFullRoundtrip("SCSU");
3467 log_verbose("Running exhaustive round trip test for UTF-8\n");
3468 TestFullRoundtrip("UTF-8");
3469 log_verbose("Running exhaustive round trip test for CESU-8\n");
3470 TestFullRoundtrip("CESU-8");
3471 log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3472 TestFullRoundtrip("UTF-16BE");
3473 log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3474 TestFullRoundtrip("UTF-16LE");
3475 log_verbose("Running exhaustive round trip test for UTF-16\n");
3476 TestFullRoundtrip("UTF-16");
3477 log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3478 TestFullRoundtrip("UTF-32BE");
3479 log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3480 TestFullRoundtrip("UTF-32LE");
3481 log_verbose("Running exhaustive round trip test for UTF-32\n");
3482 TestFullRoundtrip("UTF-32");
3483 log_verbose("Running exhaustive round trip test for UTF-7\n");
3484 TestFullRoundtrip("UTF-7");
3485 log_verbose("Running exhaustive round trip test for UTF-7\n");
3486 TestFullRoundtrip("UTF-7,version=1");
3487 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3488 TestFullRoundtrip("IMAP-mailbox-name");
3489 log_verbose("Running exhaustive round trip test for GB18030\n");
3490 TestFullRoundtrip("GB18030");
3497 static const uint16_t germanUTF16
[]={
3498 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3501 static const uint8_t germanSCSU
[]={
3502 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3505 static const uint16_t russianUTF16
[]={
3506 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3509 static const uint8_t russianSCSU
[]={
3510 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3513 static const uint16_t japaneseUTF16
[]={
3514 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3515 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3516 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3517 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3518 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3519 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3520 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3521 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3522 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3523 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3524 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3525 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3526 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3527 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3528 0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3531 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3532 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3533 static const uint8_t japaneseSCSU
[]={
3534 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3535 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3536 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3537 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3538 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3539 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3540 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3541 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3542 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3543 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3544 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3548 static const uint16_t allFeaturesUTF16
[]={
3549 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3550 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3551 0x01df, 0xf000, 0xdbff, 0xdfff
3554 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3555 * result here (34B vs. 35B)
3557 static const uint8_t allFeaturesSCSU
[]={
3558 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3559 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3560 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3561 0xdf, 0x14, 0x80, 0x15, 0xff
3563 static const uint16_t monkeyIn
[]={
3564 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3565 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3566 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3567 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3568 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3569 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3570 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3571 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3572 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3573 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3574 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3575 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3576 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3577 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3578 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3579 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3580 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3581 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3582 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3583 /* test non-BMP code points */
3584 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3585 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3586 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3587 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3588 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3589 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3590 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3591 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3592 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3593 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3594 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3597 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3598 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3599 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3600 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3601 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3603 static const char *fTestCases
[] = {
3604 "\\ud800\\udc00", /* smallest surrogate*/
3606 "\\udBff\\udFff", /* largest surrogate pair*/
3609 "Hello \\u9292 \\u9192 World!",
3610 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3611 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3613 "\\u0648\\u06c8", /* catch missing reset*/
3616 "\\u4444\\uE001", /* lowest quotable*/
3617 "\\u4444\\uf2FF", /* highest quotable*/
3618 "\\u4444\\uf188\\u4444",
3619 "\\u4444\\uf188\\uf288",
3620 "\\u4444\\uf188abc\\u0429\\uf288",
3622 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3623 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3624 "Hello World!123456",
3625 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3627 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/
3628 "abc\\u4411d", /* uses SQU*/
3629 "abc\\u4411\\u4412d",/* uses SCU*/
3630 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3631 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3633 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3634 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3635 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3637 "", /* empty input*/
3638 "\\u0000", /* smallest BMP character*/
3639 "\\uFFFF", /* largest BMP character*/
3641 /* regression tests*/
3642 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3643 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3644 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3645 "\\u0041\\u00df\\u0401\\u015f",
3646 "\\u9066\\u2123abc",
3647 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3648 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3651 for(;i
<sizeof(fTestCases
)/sizeof(*fTestCases
);i
++){
3652 const char* cSrc
= fTestCases
[i
];
3653 UErrorCode status
= U_ZERO_ERROR
;
3654 int32_t cSrcLen
,srcLen
;
3656 /* UConverter* cnv = ucnv_open("SCSU",&status); */
3657 cSrcLen
= srcLen
= (int32_t)uprv_strlen(fTestCases
[i
]);
3658 src
= (UChar
*) malloc((sizeof(UChar
) * srcLen
) + sizeof(UChar
));
3659 srcLen
=unescape(src
,srcLen
,cSrc
,cSrcLen
,&status
);
3660 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc
,i
);
3661 TestConv(src
,srcLen
,"SCSU","Coverage",NULL
,0);
3664 TestConv(allFeaturesUTF16
,(sizeof(allFeaturesUTF16
)/2),"SCSU","all features", (char *)allFeaturesSCSU
,sizeof(allFeaturesSCSU
));
3665 TestConv(allFeaturesUTF16
,(sizeof(allFeaturesUTF16
)/2),"SCSU","all features",(char *)allFeaturesSCSU
,sizeof(allFeaturesSCSU
));
3666 TestConv(japaneseUTF16
,(sizeof(japaneseUTF16
)/2),"SCSU","japaneese",(char *)japaneseSCSU
,sizeof(japaneseSCSU
));
3667 TestConv(japaneseUTF16
,(sizeof(japaneseUTF16
)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU
,sizeof(japaneseSCSU
));
3668 TestConv(germanUTF16
,(sizeof(germanUTF16
)/2),"SCSU","german",(char *)germanSCSU
,sizeof(germanSCSU
));
3669 TestConv(russianUTF16
,(sizeof(russianUTF16
)/2), "SCSU","russian",(char *)russianSCSU
,sizeof(russianSCSU
));
3670 TestConv(monkeyIn
,(sizeof(monkeyIn
)/2),"SCSU","monkey",NULL
,0);
3673 #if !UCONFIG_NO_LEGACY_CONVERSION
3674 static void TestJitterbug2346(){
3675 char source
[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3676 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3677 uint16_t expected
[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3679 UChar uTarget
[500]={'\0'};
3680 UChar
* utarget
=uTarget
;
3681 UChar
* utargetLimit
=uTarget
+sizeof(uTarget
)/2;
3683 char cTarget
[500]={'\0'};
3684 char* ctarget
=cTarget
;
3685 char* ctargetLimit
=cTarget
+sizeof(cTarget
);
3686 const char* csource
=source
;
3687 UChar
* temp
= expected
;
3688 UErrorCode err
=U_ZERO_ERROR
;
3690 UConverter
* conv
=ucnv_open("ISO_2022_JP",&err
);
3691 if(U_FAILURE(err
)) {
3692 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err
));
3695 ucnv_toUnicode(conv
,&utarget
,utargetLimit
,&csource
,csource
+sizeof(source
),NULL
,TRUE
,&err
);
3696 if(U_FAILURE(err
)) {
3697 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err
));
3700 utargetLimit
=utarget
;
3702 while(utarget
<utargetLimit
){
3703 if(*temp
!=*utarget
){
3705 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget
,(int)*temp
) ;
3710 ucnv_fromUnicode(conv
,&ctarget
,ctargetLimit
,(const UChar
**)&utarget
,utargetLimit
,NULL
,TRUE
,&err
);
3711 if(U_FAILURE(err
)) {
3712 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err
));
3715 ctargetLimit
=ctarget
;
3723 TestISO_2022_JP_1() {
3725 static const uint16_t in
[]={
3726 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3727 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3728 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3729 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3730 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3731 0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3732 0x201D, 0x000D, 0x000A,
3733 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3734 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3735 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3736 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3737 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3738 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3740 const UChar
* uSource
;
3741 const UChar
* uSourceLimit
;
3742 const char* cSource
;
3743 const char* cSourceLimit
;
3744 UChar
*uTargetLimit
=NULL
;
3747 const char *cTargetLimit
;
3750 int32_t uBufSize
= 120;
3751 UErrorCode errorCode
=U_ZERO_ERROR
;
3754 cnv
=ucnv_open("ISO_2022_JP_1", &errorCode
);
3755 if(U_FAILURE(errorCode
)) {
3756 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
3760 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3761 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3762 uSource
= (const UChar
*)in
;
3763 uSourceLimit
=(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0]));
3765 cTargetLimit
= cBuf
+uBufSize
*5;
3767 uTargetLimit
= uBuf
+ uBufSize
*5;
3768 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,TRUE
, &errorCode
);
3769 if(U_FAILURE(errorCode
)){
3770 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3774 cSourceLimit
=cTarget
;
3776 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,TRUE
,&errorCode
);
3777 if(U_FAILURE(errorCode
)){
3778 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3781 uSource
= (const UChar
*)in
;
3782 while(uSource
<uSourceLimit
){
3783 if(*test
!=*uSource
){
3785 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3791 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3792 /*Test for the condition where there is an invalid character*/
3795 static const uint8_t source2
[]={0x0e,0x24,0x053};
3796 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-JP-1]");
3798 TestSmallTargetBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3799 TestSmallSourceBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3806 TestISO_2022_JP_2() {
3808 static const uint16_t in
[]={
3809 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3810 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3811 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3812 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3813 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3814 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3815 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3816 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3817 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3818 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3819 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3820 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3821 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3822 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3823 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3824 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3825 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3826 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3827 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3829 const UChar
* uSource
;
3830 const UChar
* uSourceLimit
;
3831 const char* cSource
;
3832 const char* cSourceLimit
;
3833 UChar
*uTargetLimit
=NULL
;
3836 const char *cTargetLimit
;
3839 int32_t uBufSize
= 120;
3840 UErrorCode errorCode
=U_ZERO_ERROR
;
3842 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3843 int32_t* myOff
= offsets
;
3844 cnv
=ucnv_open("ISO_2022_JP_2", &errorCode
);
3845 if(U_FAILURE(errorCode
)) {
3846 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
3850 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3851 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3852 uSource
= (const UChar
*)in
;
3853 uSourceLimit
=(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0]));
3855 cTargetLimit
= cBuf
+uBufSize
*5;
3857 uTargetLimit
= uBuf
+ uBufSize
*5;
3858 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3859 if(U_FAILURE(errorCode
)){
3860 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3864 cSourceLimit
=cTarget
;
3867 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3868 if(U_FAILURE(errorCode
)){
3869 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3872 uSource
= (const UChar
*)in
;
3873 while(uSource
<uSourceLimit
){
3874 if(*test
!=*uSource
){
3876 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3881 TestSmallTargetBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3882 TestSmallSourceBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3883 TestToAndFromUChars(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3884 /*Test for the condition where there is an invalid character*/
3887 static const uint8_t source2
[]={0x0e,0x24,0x053};
3888 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-JP-2]");
3899 static const uint16_t in
[]={
3900 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F66,0x9F67,0x9F6A,0x000A,0x000D
3901 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC02,0xAC04
3902 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3903 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3904 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53DF,0x53E1,0x53E2
3905 ,0x53E3,0x53E4,0x000A,0x000D};
3906 const UChar
* uSource
;
3907 const UChar
* uSourceLimit
;
3908 const char* cSource
;
3909 const char* cSourceLimit
;
3910 UChar
*uTargetLimit
=NULL
;
3913 const char *cTargetLimit
;
3916 int32_t uBufSize
= 120;
3917 UErrorCode errorCode
=U_ZERO_ERROR
;
3919 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3920 int32_t* myOff
= offsets
;
3921 cnv
=ucnv_open("ISO_2022,locale=kr", &errorCode
);
3922 if(U_FAILURE(errorCode
)) {
3923 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
3927 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3928 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3929 uSource
= (const UChar
*)in
;
3930 uSourceLimit
=(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0]));
3932 cTargetLimit
= cBuf
+uBufSize
*5;
3934 uTargetLimit
= uBuf
+ uBufSize
*5;
3935 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3936 if(U_FAILURE(errorCode
)){
3937 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3941 cSourceLimit
=cTarget
;
3944 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3945 if(U_FAILURE(errorCode
)){
3946 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3949 uSource
= (const UChar
*)in
;
3950 while(uSource
<uSourceLimit
){
3951 if(*test
!=*uSource
){
3952 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,*test
) ;
3957 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-KR encoding");
3958 TestSmallTargetBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3959 TestSmallSourceBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3960 TestToAndFromUChars(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3961 TestJitterbug930("csISO2022KR");
3962 /*Test for the condition where there is an invalid character*/
3965 static const uint8_t source2
[]={0x1b,0x24,0x053};
3966 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
3967 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ILLEGAL_ESCAPE_SEQUENCE
, "an invalid character [ISO-2022-KR]");
3976 TestISO_2022_KR_1() {
3978 static const uint16_t in
[]={
3979 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
3980 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
3981 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3982 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3983 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
3984 ,0x53E3,0x53E4,0x000A,0x000D};
3985 const UChar
* uSource
;
3986 const UChar
* uSourceLimit
;
3987 const char* cSource
;
3988 const char* cSourceLimit
;
3989 UChar
*uTargetLimit
=NULL
;
3992 const char *cTargetLimit
;
3995 int32_t uBufSize
= 120;
3996 UErrorCode errorCode
=U_ZERO_ERROR
;
3998 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3999 int32_t* myOff
= offsets
;
4000 cnv
=ucnv_open("ibm-25546", &errorCode
);
4001 if(U_FAILURE(errorCode
)) {
4002 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4006 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4007 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
4008 uSource
= (const UChar
*)in
;
4009 uSourceLimit
=(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0]));
4011 cTargetLimit
= cBuf
+uBufSize
*5;
4013 uTargetLimit
= uBuf
+ uBufSize
*5;
4014 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4015 if(U_FAILURE(errorCode
)){
4016 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4020 cSourceLimit
=cTarget
;
4023 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4024 if(U_FAILURE(errorCode
)){
4025 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4028 uSource
= (const UChar
*)in
;
4029 while(uSource
<uSourceLimit
){
4030 if(*test
!=*uSource
){
4031 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,*test
) ;
4037 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-KR encoding");
4038 TestSmallTargetBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
4039 TestSmallSourceBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
4041 TestToAndFromUChars(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
4042 /*Test for the condition where there is an invalid character*/
4045 static const uint8_t source2
[]={0x1b,0x24,0x053};
4046 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
4047 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ILLEGAL_ESCAPE_SEQUENCE
, "an invalid character [ISO-2022-KR]");
4055 static void TestJitterbug2411(){
4056 static const char* source
= "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4057 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4058 UConverter
* kr
=NULL
, *kr1
=NULL
;
4059 UErrorCode errorCode
= U_ZERO_ERROR
;
4060 UChar tgt
[100]={'\0'};
4061 UChar
* target
= tgt
;
4062 UChar
* targetLimit
= target
+100;
4063 kr
=ucnv_open("iso-2022-kr", &errorCode
);
4064 if(U_FAILURE(errorCode
)) {
4065 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode
));
4068 ucnv_toUnicode(kr
,&target
,targetLimit
,&source
,source
+uprv_strlen(source
),NULL
,TRUE
,&errorCode
);
4069 if(U_FAILURE(errorCode
)) {
4070 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode
));
4073 kr1
= ucnv_open("ibm-25546", &errorCode
);
4074 if(U_FAILURE(errorCode
)) {
4075 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode
));
4079 targetLimit
= target
+100;
4080 ucnv_toUnicode(kr
,&target
,targetLimit
,&source
,source
+uprv_strlen(source
),NULL
,TRUE
,&errorCode
);
4082 if(U_FAILURE(errorCode
)) {
4083 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode
));
4094 /* From Unicode moved to testdata/conversion.txt */
4097 static const uint8_t sampleTextJIS
[] = {
4098 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4099 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4100 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4102 static const uint16_t expectedISO2022JIS
[] = {
4107 static const int32_t toISO2022JISOffs
[]={
4113 static const uint8_t sampleTextJIS7
[] = {
4114 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4115 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4116 0x1b,0x24,0x42,0x21,0x21,
4117 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */
4119 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4121 static const uint16_t expectedISO2022JIS7
[] = {
4129 static const int32_t toISO2022JIS7Offs
[]={
4136 static const uint8_t sampleTextJIS8
[] = {
4137 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4138 0xa1,0xc8,0xd9,/*Katakana Set*/
4141 0xb1,0xc3, /*Katakana Set*/
4142 0x1b,0x24,0x42,0x21,0x21
4144 static const uint16_t expectedISO2022JIS8
[] = {
4146 0xff61, 0xff88, 0xff99,
4151 static const int32_t toISO2022JIS8Offs
[]={
4157 testConvertToU(sampleTextJIS
,sizeof(sampleTextJIS
),expectedISO2022JIS
,
4158 sizeof(expectedISO2022JIS
)/sizeof(expectedISO2022JIS
[0]),"JIS", toISO2022JISOffs
,TRUE
);
4159 testConvertToU(sampleTextJIS7
,sizeof(sampleTextJIS7
),expectedISO2022JIS7
,
4160 sizeof(expectedISO2022JIS7
)/sizeof(expectedISO2022JIS7
[0]),"JIS7", toISO2022JIS7Offs
,TRUE
);
4161 testConvertToU(sampleTextJIS8
,sizeof(sampleTextJIS8
),expectedISO2022JIS8
,
4162 sizeof(expectedISO2022JIS8
)/sizeof(expectedISO2022JIS8
[0]),"JIS8", toISO2022JIS8Offs
,TRUE
);
4167 static void TestJitterbug915(){
4168 /* tests for roundtripping of the below sequence
4169 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * /
4170 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4171 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4172 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4173 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4174 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4175 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4177 static const char cSource
[]={
4178 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4179 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4180 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4181 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4182 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4183 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4184 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4185 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4186 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4187 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4188 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4189 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4190 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4191 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4192 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4193 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4194 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4195 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4196 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4197 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4198 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4199 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4200 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4201 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4202 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4203 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4204 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4205 0x37, 0x20, 0x2A, 0x2F
4207 UChar uTarget
[500]={'\0'};
4208 UChar
* utarget
=uTarget
;
4209 UChar
* utargetLimit
=uTarget
+sizeof(uTarget
)/2;
4211 char cTarget
[500]={'\0'};
4212 char* ctarget
=cTarget
;
4213 char* ctargetLimit
=cTarget
+sizeof(cTarget
);
4214 const char* csource
=cSource
;
4215 const char* tempSrc
= cSource
;
4216 UErrorCode err
=U_ZERO_ERROR
;
4218 UConverter
* conv
=ucnv_open("ISO_2022_CN_EXT",&err
);
4219 if(U_FAILURE(err
)) {
4220 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err
));
4223 ucnv_toUnicode(conv
,&utarget
,utargetLimit
,&csource
,csource
+sizeof(cSource
),NULL
,TRUE
,&err
);
4224 if(U_FAILURE(err
)) {
4225 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err
));
4228 utargetLimit
=utarget
;
4230 ucnv_fromUnicode(conv
,&ctarget
,ctargetLimit
,(const UChar
**)&utarget
,utargetLimit
,NULL
,TRUE
,&err
);
4231 if(U_FAILURE(err
)) {
4232 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err
));
4235 ctargetLimit
=ctarget
;
4237 while(ctarget
<ctargetLimit
){
4238 if(*ctarget
!= *tempSrc
){
4239 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget
-cTarget
), *ctarget
,(int)*tempSrc
) ;
4249 TestISO_2022_CN_EXT() {
4251 static const uint16_t in
[]={
4252 /* test Non-BMP code points */
4253 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4254 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4255 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4256 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4257 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4258 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4259 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4260 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4261 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4264 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4265 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4266 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4267 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4268 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4269 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4270 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4271 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4272 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4273 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4274 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4275 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4276 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4277 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4278 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4279 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4280 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4281 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4283 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4287 const UChar
* uSource
;
4288 const UChar
* uSourceLimit
;
4289 const char* cSource
;
4290 const char* cSourceLimit
;
4291 UChar
*uTargetLimit
=NULL
;
4294 const char *cTargetLimit
;
4297 int32_t uBufSize
= 180;
4298 UErrorCode errorCode
=U_ZERO_ERROR
;
4300 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4301 int32_t* myOff
= offsets
;
4302 cnv
=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode
);
4303 if(U_FAILURE(errorCode
)) {
4304 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4308 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4309 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
4310 uSource
= (const UChar
*)in
;
4311 uSourceLimit
=(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0]));
4313 cTargetLimit
= cBuf
+uBufSize
*5;
4315 uTargetLimit
= uBuf
+ uBufSize
*5;
4316 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4317 if(U_FAILURE(errorCode
)){
4318 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4322 cSourceLimit
=cTarget
;
4325 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4326 if(U_FAILURE(errorCode
)){
4327 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4330 uSource
= (const UChar
*)in
;
4331 while(uSource
<uSourceLimit
){
4332 if(*test
!=*uSource
){
4333 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
4336 log_verbose(" Got: \\u%04X\n",(int)*test
) ;
4341 TestSmallTargetBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
4342 TestSmallSourceBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
4343 /*Test for the condition where there is an invalid character*/
4346 static const uint8_t source2
[]={0x0e,0x24,0x053};
4347 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-CN-EXT]");
4358 static const uint16_t in
[]={
4360 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4361 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4362 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4363 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4364 0x0020, 0x0045, 0x004e, 0x0044,
4366 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4367 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4368 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4369 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4370 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4371 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4372 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4373 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4374 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4375 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4376 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4377 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4378 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4379 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4380 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4381 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4382 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4385 const UChar
* uSource
;
4386 const UChar
* uSourceLimit
;
4387 const char* cSource
;
4388 const char* cSourceLimit
;
4389 UChar
*uTargetLimit
=NULL
;
4392 const char *cTargetLimit
;
4395 int32_t uBufSize
= 180;
4396 UErrorCode errorCode
=U_ZERO_ERROR
;
4398 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4399 int32_t* myOff
= offsets
;
4400 cnv
=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode
);
4401 if(U_FAILURE(errorCode
)) {
4402 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4406 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4407 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
4408 uSource
= (const UChar
*)in
;
4409 uSourceLimit
=(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0]));
4411 cTargetLimit
= cBuf
+uBufSize
*5;
4413 uTargetLimit
= uBuf
+ uBufSize
*5;
4414 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4415 if(U_FAILURE(errorCode
)){
4416 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4420 cSourceLimit
=cTarget
;
4423 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4424 if(U_FAILURE(errorCode
)){
4425 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4428 uSource
= (const UChar
*)in
;
4429 while(uSource
<uSourceLimit
){
4430 if(*test
!=*uSource
){
4431 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
4434 log_verbose(" Got: \\u%04X\n",(int)*test
) ;
4439 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-CN encoding");
4440 TestSmallTargetBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
4441 TestSmallSourceBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
4442 TestToAndFromUChars(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
4443 TestJitterbug930("csISO2022CN");
4444 /*Test for the condition where there is an invalid character*/
4447 static const uint8_t source2
[]={0x0e,0x24,0x053};
4448 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-CN]");
4458 TestEBCDIC_STATEFUL() {
4460 static const uint8_t in
[]={
4469 /* expected test results */
4470 static const int32_t results
[]={
4471 /* number of bytes read, code point */
4480 static const uint8_t in2
[]={
4486 /* expected test results */
4487 static const int32_t results2
[]={
4488 /* number of bytes read, code point */
4493 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
4494 UErrorCode errorCode
=U_ZERO_ERROR
;
4495 UConverter
*cnv
=ucnv_open("ibm-930", &errorCode
);
4496 if(U_FAILURE(errorCode
)) {
4497 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode
));
4500 TestNextUChar(cnv
, source
, limit
, results
, "EBCDIC_STATEFUL(ibm-930)");
4502 /* Test the condition when source >= sourceLimit */
4503 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
4505 /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4507 static const uint8_t source1
[]={0x0f};
4508 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_INDEX_OUTOFBOUNDS_ERROR
, "a character is truncated");
4510 /*Test for the condition where there is an invalid character*/
4513 static const uint8_t source2
[]={0x0e, 0x7F, 0xFF};
4514 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [EBCDIC STATEFUL]");
4517 source
=(const char*)in2
;
4518 limit
=(const char*)in2
+sizeof(in2
);
4519 TestNextUChar(cnv
,source
,limit
,results2
,"EBCDIC_STATEFUL(ibm-930),seq#2");
4527 static const uint8_t in
[]={
4530 0x81, 0x30, 0x81, 0x30,
4534 0x82, 0x35, 0x8f, 0x33,
4535 0x84, 0x31, 0xa4, 0x39,
4536 0x90, 0x30, 0x81, 0x30,
4537 0xe3, 0x32, 0x9a, 0x35
4540 * Feature removed markus 2000-oct-26
4541 * Only some codepages must match surrogate pairs into supplementary code points -
4542 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4543 * GB 18030 provides direct encodings for supplementary code points, therefore
4544 * it must not combine two single-encoded surrogates into one code point.
4546 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4550 /* expected test results */
4551 static const int32_t results
[]={
4552 /* number of bytes read, code point */
4564 /* Feature removed. See comment above. */
4569 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4570 UErrorCode errorCode
=U_ZERO_ERROR
;
4571 UConverter
*cnv
=ucnv_open("gb18030", &errorCode
);
4572 if(U_FAILURE(errorCode
)) {
4573 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode
));
4576 TestNextUChar(cnv
, (const char *)in
, (const char *)in
+sizeof(in
), results
, "gb18030");
4582 /* LMBCS-1 string */
4583 static const uint8_t pszLMBCS
[]={
4592 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4596 /* Unicode UChar32 equivalents */
4597 static const UChar32 pszUnicode32
[]={
4607 0x00023456, /* code point for surrogate pair */
4611 /* Unicode UChar equivalents */
4612 static const UChar pszUnicode
[]={
4622 0xD84D, /* low surrogate */
4623 0xDC56, /* high surrogate */
4627 /* expected test results */
4628 static const int offsets32
[]={
4629 /* number of bytes read, code point */
4643 /* expected test results */
4644 static const int offsets
[]={
4645 /* number of bytes read, code point */
4663 #define NAME_LMBCS_1 "LMBCS-1"
4664 #define NAME_LMBCS_2 "LMBCS-2"
4667 /* Some basic open/close/property tests on some LMBCS converters */
4670 char expected_subchars
[] = {0x3F}; /* ANSI Question Mark */
4671 char new_subchars
[] = {0x7F}; /* subst char used by SmartSuite..*/
4672 char get_subchars
[1];
4673 const char * get_name
;
4677 int8_t len
= sizeof(get_subchars
);
4679 UErrorCode errorCode
=U_ZERO_ERROR
;
4682 cnv1
=ucnv_open(NAME_LMBCS_1
, &errorCode
);
4683 if(U_FAILURE(errorCode
)) {
4684 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
4687 cnv2
=ucnv_open(NAME_LMBCS_2
, &errorCode
);
4688 if(U_FAILURE(errorCode
)) {
4689 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode
));
4694 get_name
= ucnv_getName (cnv1
, &errorCode
);
4695 if (strcmp(NAME_LMBCS_1
,get_name
)){
4696 log_err("Unexpected converter name: %s\n", get_name
);
4698 get_name
= ucnv_getName (cnv2
, &errorCode
);
4699 if (strcmp(NAME_LMBCS_2
,get_name
)){
4700 log_err("Unexpected converter name: %s\n", get_name
);
4703 /* substitution chars */
4704 ucnv_getSubstChars (cnv1
, get_subchars
, &len
, &errorCode
);
4705 if(U_FAILURE(errorCode
)) {
4706 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode
));
4709 log_err("Unexpected length of sub chars\n");
4711 if (get_subchars
[0] != expected_subchars
[0]){
4712 log_err("Unexpected value of sub chars\n");
4714 ucnv_setSubstChars (cnv2
,new_subchars
, len
, &errorCode
);
4715 if(U_FAILURE(errorCode
)) {
4716 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode
));
4718 ucnv_getSubstChars (cnv2
, get_subchars
, &len
, &errorCode
);
4719 if(U_FAILURE(errorCode
)) {
4720 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode
));
4723 log_err("Unexpected length of sub chars\n");
4725 if (get_subchars
[0] != new_subchars
[0]){
4726 log_err("Unexpected value of sub chars\n");
4733 /* LMBCS to Unicode - offsets */
4735 UErrorCode errorCode
=U_ZERO_ERROR
;
4737 const char * pSource
= (const char *)pszLMBCS
;
4738 const char * sourceLimit
= (const char *)pszLMBCS
+ sizeof(pszLMBCS
);
4740 UChar Out
[sizeof(pszUnicode
) + 1];
4742 UChar
* OutLimit
= Out
+ sizeof(pszUnicode
)/sizeof(UChar
);
4744 int32_t off
[sizeof(offsets
)];
4746 /* last 'offset' in expected results is just the final size.
4747 (Makes other tests easier). Compensate here: */
4749 off
[(sizeof(offsets
)/sizeof(offsets
[0]))-1] = sizeof(pszLMBCS
);
4753 cnv
=ucnv_open("lmbcs", &errorCode
); /* use generic name for LMBCS-1 */
4754 if(U_FAILURE(errorCode
)) {
4755 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode
));
4761 ucnv_toUnicode (cnv
,
4771 if (memcmp(off
,offsets
,sizeof(offsets
)))
4773 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4775 if (memcmp(Out
,pszUnicode
,sizeof(pszUnicode
)))
4777 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4782 /* LMBCS to Unicode - getNextUChar */
4783 const char * sourceStart
;
4784 const char *source
=(const char *)pszLMBCS
;
4785 const char *limit
=(const char *)pszLMBCS
+sizeof(pszLMBCS
);
4786 const UChar32
*results
= pszUnicode32
;
4787 const int *off
= offsets32
;
4789 UErrorCode errorCode
=U_ZERO_ERROR
;
4792 cnv
=ucnv_open("LMBCS-1", &errorCode
);
4793 if(U_FAILURE(errorCode
)) {
4794 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
4800 while(source
<limit
) {
4802 uniChar
=ucnv_getNextUChar(cnv
, &source
, source
+ (off
[1] - off
[0]), &errorCode
);
4803 if(U_FAILURE(errorCode
)) {
4804 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode
));
4806 } else if(source
-sourceStart
!= off
[1] - off
[0] || uniChar
!= *results
) {
4807 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4808 uniChar
, (source
-sourceStart
), *results
, *off
);
4817 { /* test locale & optimization group operations: Unicode to LMBCS */
4819 UErrorCode errorCode
=U_ZERO_ERROR
;
4820 UConverter
*cnv16he
= ucnv_open("LMBCS-16,locale=he", &errorCode
);
4821 UConverter
*cnv16jp
= ucnv_open("LMBCS-16,locale=ja_JP", &errorCode
);
4822 UConverter
*cnv01us
= ucnv_open("LMBCS-1,locale=us_EN", &errorCode
);
4823 UChar uniString
[] = {0x0192}; /* Latin Small letter f with hook */
4824 const UChar
* pUniOut
= uniString
;
4825 UChar
* pUniIn
= uniString
;
4826 uint8_t lmbcsString
[4];
4827 const char * pLMBCSOut
= (const char *)lmbcsString
;
4828 char * pLMBCSIn
= (char *)lmbcsString
;
4830 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
4831 ucnv_fromUnicode (cnv16he
,
4832 &pLMBCSIn
, (pLMBCSIn
+ sizeof(lmbcsString
)/sizeof(lmbcsString
[0])),
4833 &pUniOut
, pUniOut
+ sizeof(uniString
)/sizeof(uniString
[0]),
4834 NULL
, 1, &errorCode
);
4836 if (lmbcsString
[0] != 0x3 || lmbcsString
[1] != 0x83)
4838 log_err("LMBCS-16,locale=he gives unexpected translation\n");
4841 pLMBCSIn
= (char *)lmbcsString
;
4842 pUniOut
= uniString
;
4843 ucnv_fromUnicode (cnv01us
,
4844 &pLMBCSIn
, (const char *)(lmbcsString
+ sizeof(lmbcsString
)/sizeof(lmbcsString
[0])),
4845 &pUniOut
, pUniOut
+ sizeof(uniString
)/sizeof(uniString
[0]),
4846 NULL
, 1, &errorCode
);
4848 if (lmbcsString
[0] != 0x9F)
4850 log_err("LMBCS-1,locale=US gives unexpected translation\n");
4853 /* single byte char from mbcs char set */
4854 lmbcsString
[0] = 0xAE; /* 1/2 width katakana letter small Yo */
4855 pLMBCSOut
= (const char *)lmbcsString
;
4857 ucnv_toUnicode (cnv16jp
,
4858 &pUniIn
, pUniIn
+ 1,
4859 &pLMBCSOut
, (pLMBCSOut
+ 1),
4860 NULL
, 1, &errorCode
);
4861 if (U_FAILURE(errorCode
) || pLMBCSOut
!= (const char *)lmbcsString
+1 || pUniIn
!= uniString
+1 || uniString
[0] != 0xFF6E)
4863 log_err("Unexpected results from LMBCS-16 single byte char\n");
4865 /* convert to group 1: should be 3 bytes */
4866 pLMBCSIn
= (char *)lmbcsString
;
4867 pUniOut
= uniString
;
4868 ucnv_fromUnicode (cnv01us
,
4869 &pLMBCSIn
, (const char *)(pLMBCSIn
+ 3),
4870 &pUniOut
, pUniOut
+ 1,
4871 NULL
, 1, &errorCode
);
4872 if (U_FAILURE(errorCode
) || pLMBCSIn
!= (const char *)lmbcsString
+3 || pUniOut
!= uniString
+1
4873 || lmbcsString
[0] != 0x10 || lmbcsString
[1] != 0x10 || lmbcsString
[2] != 0xAE)
4875 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
4877 pLMBCSOut
= (const char *)lmbcsString
;
4879 ucnv_toUnicode (cnv01us
,
4880 &pUniIn
, pUniIn
+ 1,
4881 &pLMBCSOut
, (const char *)(pLMBCSOut
+ 3),
4882 NULL
, 1, &errorCode
);
4883 if (U_FAILURE(errorCode
) || pLMBCSOut
!= (const char *)lmbcsString
+3 || pUniIn
!= uniString
+1 || uniString
[0] != 0xFF6E)
4885 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
4887 pLMBCSIn
= (char *)lmbcsString
;
4888 pUniOut
= uniString
;
4889 ucnv_fromUnicode (cnv16jp
,
4890 &pLMBCSIn
, (const char *)(pLMBCSIn
+ 1),
4891 &pUniOut
, pUniOut
+ 1,
4892 NULL
, 1, &errorCode
);
4893 if (U_FAILURE(errorCode
) || pLMBCSIn
!= (const char *)lmbcsString
+1 || pUniOut
!= uniString
+1 || lmbcsString
[0] != 0xAE)
4895 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
4897 ucnv_close(cnv16he
);
4898 ucnv_close(cnv16jp
);
4899 ucnv_close(cnv01us
);
4902 /* Small source buffer testing, LMBCS -> Unicode */
4904 UErrorCode errorCode
=U_ZERO_ERROR
;
4906 const char * pSource
= (const char *)pszLMBCS
;
4907 const char * sourceLimit
= (const char *)pszLMBCS
+ sizeof(pszLMBCS
);
4908 int codepointCount
= 0;
4910 UChar Out
[sizeof(pszUnicode
) + 1];
4912 UChar
* OutLimit
= Out
+ sizeof(pszUnicode
)/sizeof(UChar
);
4915 cnv
= ucnv_open(NAME_LMBCS_1
, &errorCode
);
4916 if(U_FAILURE(errorCode
)) {
4917 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
4922 while ((pSource
< sourceLimit
) && U_SUCCESS (errorCode
))
4924 ucnv_toUnicode (cnv
,
4928 (pSource
+1), /* claim that this is a 1- byte buffer */
4930 FALSE
, /* FALSE means there might be more chars in the next buffer */
4933 if (U_SUCCESS (errorCode
))
4935 if ((pSource
- (const char *)pszLMBCS
) == offsets
[codepointCount
+1])
4937 /* we are on to the next code point: check value */
4939 if (Out
[0] != pszUnicode
[codepointCount
]){
4940 log_err("LMBCS->Uni result %lx should have been %lx \n",
4941 Out
[0], pszUnicode
[codepointCount
]);
4944 pOut
= Out
; /* reset for accumulating next code point */
4950 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode
));
4954 /* limits & surrogate error testing */
4955 char LIn
[sizeof(pszLMBCS
)];
4956 const char * pLIn
= LIn
;
4958 char LOut
[sizeof(pszLMBCS
)];
4959 char * pLOut
= LOut
;
4961 UChar UOut
[sizeof(pszUnicode
)];
4962 UChar
* pUOut
= UOut
;
4964 UChar UIn
[sizeof(pszUnicode
)];
4965 const UChar
* pUIn
= UIn
;
4967 int32_t off
[sizeof(offsets
)];
4970 errorCode
=U_ZERO_ERROR
;
4972 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
4973 ucnv_fromUnicode(cnv
, &pLOut
,pLOut
+1,&pUIn
,pUIn
-1,off
,FALSE
, &errorCode
);
4974 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
4976 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode
));
4978 errorCode
=U_ZERO_ERROR
;
4979 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+1,(const char **)&pLIn
,(const char *)(pLIn
-1),off
,FALSE
, &errorCode
);
4980 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
4982 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode
));
4984 errorCode
=U_ZERO_ERROR
;
4986 uniChar
= ucnv_getNextUChar(cnv
, (const char **)&pLIn
, (const char *)(pLIn
-1), &errorCode
);
4987 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
4989 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode
));
4991 errorCode
=U_ZERO_ERROR
;
4993 /* 0 byte source request - no error, no pointer movement */
4994 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+1,(const char **)&pLIn
,(const char *)pLIn
,off
,FALSE
, &errorCode
);
4995 ucnv_fromUnicode(cnv
, &pLOut
,pLOut
+1,&pUIn
,pUIn
,off
,FALSE
, &errorCode
);
4996 if(U_FAILURE(errorCode
)) {
4997 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode
));
4999 if ((pUOut
!= UOut
) || (pUIn
!= UIn
) || (pLOut
!= LOut
) || (pLIn
!= LIn
))
5001 log_err("Unexpected pointer move in 0 byte source request \n");
5003 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5004 uniChar
= ucnv_getNextUChar(cnv
, (const char **)&pLIn
, (const char *)pLIn
, &errorCode
);
5005 if (errorCode
!= U_INDEX_OUTOFBOUNDS_ERROR
)
5007 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode
));
5009 if (((uint32_t)uniChar
- 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5011 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5013 errorCode
= U_ZERO_ERROR
;
5015 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5018 ucnv_fromUnicode(cnv
, &pLOut
,pLOut
+offsets
[4],&pUIn
,pUIn
+sizeof(pszUnicode
)/sizeof(UChar
),off
,FALSE
, &errorCode
);
5019 if (errorCode
!= U_BUFFER_OVERFLOW_ERROR
|| pLOut
!= LOut
+ offsets
[4] || pUIn
!= pszUnicode
+4 )
5021 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5024 errorCode
= U_ZERO_ERROR
;
5026 pLIn
= (const char *)pszLMBCS
;
5027 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+4,&pLIn
,(pLIn
+sizeof(pszLMBCS
)),off
,FALSE
, &errorCode
);
5028 if (errorCode
!= U_BUFFER_OVERFLOW_ERROR
|| pUOut
!= UOut
+ 4 || pLIn
!= (const char *)pszLMBCS
+offsets
[4])
5030 log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5033 /* unpaired or chopped LMBCS surrogates */
5035 /* OK high surrogate, Low surrogate is chopped */
5036 LIn
[0] = (char)0x14;
5037 LIn
[1] = (char)0xD8;
5038 LIn
[2] = (char)0x01;
5039 LIn
[3] = (char)0x14;
5040 LIn
[4] = (char)0xDC;
5042 errorCode
= U_ZERO_ERROR
;
5045 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
5046 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
5047 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 5)
5049 log_err("Unexpected results on chopped low surrogate\n");
5052 /* chopped at surrogate boundary */
5053 LIn
[0] = (char)0x14;
5054 LIn
[1] = (char)0xD8;
5055 LIn
[2] = (char)0x01;
5057 errorCode
= U_ZERO_ERROR
;
5060 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+3),off
,TRUE
, &errorCode
);
5061 if (UOut
[0] != 0xD801 || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 3)
5063 log_err("Unexpected results on chopped at surrogate boundary \n");
5066 /* unpaired surrogate plus valid Unichar */
5067 LIn
[0] = (char)0x14;
5068 LIn
[1] = (char)0xD8;
5069 LIn
[2] = (char)0x01;
5070 LIn
[3] = (char)0x14;
5071 LIn
[4] = (char)0xC9;
5072 LIn
[5] = (char)0xD0;
5074 errorCode
= U_ZERO_ERROR
;
5077 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+6),off
,TRUE
, &errorCode
);
5078 if (UOut
[0] != 0xD801 || UOut
[1] != 0xC9D0 || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 2 || pLIn
!= LIn
+ 6)
5080 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5083 /* unpaired surrogate plus chopped Unichar */
5084 LIn
[0] = (char)0x14;
5085 LIn
[1] = (char)0xD8;
5086 LIn
[2] = (char)0x01;
5087 LIn
[3] = (char)0x14;
5088 LIn
[4] = (char)0xC9;
5091 errorCode
= U_ZERO_ERROR
;
5094 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
5095 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 5)
5097 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5100 /* unpaired surrogate plus valid non-Unichar */
5101 LIn
[0] = (char)0x14;
5102 LIn
[1] = (char)0xD8;
5103 LIn
[2] = (char)0x01;
5104 LIn
[3] = (char)0x0F;
5105 LIn
[4] = (char)0x3B;
5108 errorCode
= U_ZERO_ERROR
;
5111 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
5112 if (UOut
[0] != 0xD801 || UOut
[1] != 0x1B || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 2 || pLIn
!= LIn
+ 5)
5114 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5117 /* unpaired surrogate plus chopped non-Unichar */
5118 LIn
[0] = (char)0x14;
5119 LIn
[1] = (char)0xD8;
5120 LIn
[2] = (char)0x01;
5121 LIn
[3] = (char)0x0F;
5124 errorCode
= U_ZERO_ERROR
;
5127 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+4),off
,TRUE
, &errorCode
);
5129 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 4)
5131 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5135 ucnv_close(cnv
); /* final cleanup */
5139 static void TestJitterbug255()
5141 static const uint8_t testBytes
[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5142 const char *testBuffer
= (const char *)testBytes
;
5143 const char *testEnd
= (const char *)testBytes
+ sizeof(testBytes
);
5144 UErrorCode status
= U_ZERO_ERROR
;
5146 UConverter
*cnv
= 0;
5148 cnv
= ucnv_open("shift-jis", &status
);
5149 if (U_FAILURE(status
) || cnv
== 0) {
5150 log_data_err("Failed to open the converter for SJIS.\n");
5153 while (testBuffer
!= testEnd
)
5155 result
= ucnv_getNextUChar (cnv
, &testBuffer
, testEnd
, &status
);
5156 if (U_FAILURE(status
))
5158 log_err("Failed to convert the next UChar for SJIS.\n");
5165 static void TestEBCDICUS4XML()
5167 UChar unicodes_x
[] = {0x0000, 0x0000, 0x0000, 0x0000};
5168 static const UChar toUnicodeMaps_x
[] = {0x000A, 0x000A, 0x000D, 0x0000};
5169 static const char fromUnicodeMaps_x
[] = {0x25, 0x25, 0x0D, 0x00};
5170 static const char newLines_x
[] = {0x25, 0x15, 0x0D, 0x00};
5171 char target_x
[] = {0x00, 0x00, 0x00, 0x00};
5172 UChar
*unicodes
= unicodes_x
;
5173 const UChar
*toUnicodeMaps
= toUnicodeMaps_x
;
5174 char *target
= target_x
;
5175 const char* fromUnicodeMaps
= fromUnicodeMaps_x
, *newLines
= newLines_x
;
5176 UErrorCode status
= U_ZERO_ERROR
;
5177 UConverter
*cnv
= 0;
5179 cnv
= ucnv_open("ebcdic-xml-us", &status
);
5180 if (U_FAILURE(status
) || cnv
== 0) {
5181 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5184 ucnv_toUnicode(cnv
, &unicodes
, unicodes
+3, (const char**)&newLines
, newLines
+3, NULL
, TRUE
, &status
);
5185 if (U_FAILURE(status
) || memcmp(unicodes_x
, toUnicodeMaps
, sizeof(UChar
)*3) != 0) {
5186 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5187 u_errorName(status
));
5188 printUSeqErr(unicodes_x
, 3);
5189 printUSeqErr(toUnicodeMaps
, 3);
5191 status
= U_ZERO_ERROR
;
5192 ucnv_fromUnicode(cnv
, &target
, target
+3, (const UChar
**)&toUnicodeMaps
, toUnicodeMaps
+3, NULL
, TRUE
, &status
);
5193 if (U_FAILURE(status
) || memcmp(target_x
, fromUnicodeMaps
, sizeof(char)*3) != 0) {
5194 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5195 u_errorName(status
));
5196 printSeqErr((const unsigned char*)target_x
, 3);
5197 printSeqErr((const unsigned char*)fromUnicodeMaps
, 3);
5201 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5203 #if !UCONFIG_NO_COLLATION
5205 static void TestJitterbug981(){
5207 int32_t rules_length
, target_cap
, bytes_needed
, buff_size
;
5208 UErrorCode status
= U_ZERO_ERROR
;
5209 UConverter
*utf8cnv
;
5210 UCollator
* myCollator
;
5213 utf8cnv
= ucnv_open ("utf8", &status
);
5214 if(U_FAILURE(status
)){
5215 log_err("Could not open UTF-8 converter. Error: %s", u_errorName(status
));
5218 myCollator
= ucol_open("zh", &status
);
5219 if(U_FAILURE(status
)){
5220 log_err("Could not open collator for zh locale. Error: %s", u_errorName(status
));
5224 rules
= ucol_getRules(myCollator
, &rules_length
);
5225 buff_size
= rules_length
* ucnv_getMaxCharSize(utf8cnv
);
5226 buff
= malloc(buff_size
);
5230 ucnv_reset(utf8cnv
);
5231 status
= U_ZERO_ERROR
;
5232 if(target_cap
>= buff_size
) {
5233 log_err("wanted %d bytes, only %d available\n", target_cap
, buff_size
);
5236 bytes_needed
= ucnv_fromUChars(utf8cnv
, buff
, target_cap
,
5237 rules
, rules_length
, &status
);
5238 target_cap
= (bytes_needed
> target_cap
) ? bytes_needed
: target_cap
+1;
5239 if(numNeeded
!=0 && numNeeded
!= bytes_needed
){
5240 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5242 numNeeded
= bytes_needed
;
5243 } while (status
== U_BUFFER_OVERFLOW_ERROR
);
5244 ucol_close(myCollator
);
5245 ucnv_close(utf8cnv
);
5251 static void TestJitterbug1293(){
5252 static const UChar src
[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5254 UErrorCode status
= U_ZERO_ERROR
;
5255 UConverter
* conv
=NULL
;
5256 int32_t target_cap
, bytes_needed
, numNeeded
= 0;
5257 conv
= ucnv_open("shift-jis",&status
);
5258 if(U_FAILURE(status
)){
5259 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status
));
5265 bytes_needed
= ucnv_fromUChars(conv
,target
,256,src
,u_strlen(src
),&status
);
5266 target_cap
= (bytes_needed
> target_cap
) ? bytes_needed
: target_cap
+1;
5267 if(numNeeded
!=0 && numNeeded
!= bytes_needed
){
5268 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5270 numNeeded
= bytes_needed
;
5271 } while (status
== U_BUFFER_OVERFLOW_ERROR
);
5272 if(U_FAILURE(status
)){
5273 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status
));