1 /********************************************************************
3 * Copyright (c) 1997-2004, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
10 * Modification History:
12 * Steven R. Loomis 7/8/1999 Adding input buffer test
13 *********************************************************************************
17 #include "unicode/uloc.h"
18 #include "unicode/ucnv.h"
19 #include "unicode/ucnv_err.h"
21 #include "unicode/utypes.h"
22 #include "unicode/ustring.h"
23 #include "unicode/ucol.h"
26 static void TestNextUChar(UConverter
* cnv
, const char* source
, const char* limit
, const int32_t results
[], const char* message
);
27 static void TestNextUCharError(UConverter
* cnv
, const char* source
, const char* limit
, UErrorCode expected
, const char* message
);
28 #if !UCONFIG_NO_COLLATION
29 static void TestJitterbug981(void);
31 static void TestJitterbug1293(void);
32 static void TestNewConvertWithBufferSizes(int32_t osize
, int32_t isize
) ;
33 static void TestConverterTypesAndStarters(void);
34 static void TestAmbiguous(void);
35 static void TestSignatureDetection(void);
36 static void TestUTF7(void);
37 static void TestIMAP(void);
38 static void TestUTF8(void);
39 static void TestCESU8(void);
40 static void TestUTF16(void);
41 static void TestUTF16BE(void);
42 static void TestUTF16LE(void);
43 static void TestUTF32(void);
44 static void TestUTF32BE(void);
45 static void TestUTF32LE(void);
46 static void TestLATIN1(void);
47 static void TestSBCS(void);
48 static void TestDBCS(void);
49 static void TestMBCS(void);
50 #ifdef U_ENABLE_GENERIC_ISO_2022
51 static void TestISO_2022(void);
53 static void TestISO_2022_JP(void);
54 static void TestISO_2022_JP_1(void);
55 static void TestISO_2022_JP_2(void);
56 static void TestISO_2022_KR(void);
57 static void TestISO_2022_KR_1(void);
58 static void TestISO_2022_CN(void);
59 static void TestISO_2022_CN_EXT(void);
60 static void TestJIS(void);
61 static void TestHZ(void);
62 static void TestSCSU(void);
63 static void TestEBCDIC_STATEFUL(void);
64 static void TestGB18030(void);
65 static void TestLMBCS(void);
66 static void TestJitterbug255(void);
67 static void TestEBCDICUS4XML(void);
68 static void TestJitterbug915(void);
69 static void TestISCII(void);
70 static void TestConv(const uint16_t in
[],
76 static void TestRoundTrippingAllUTF(void);
77 static void TestCoverageMBCS(void);
78 static void TestJitterbug2346(void);
79 static void TestJitterbug2411(void);
80 void addTestNewConvert(TestNode
** root
);
82 /* open a converter, using test data if it begins with '@' */
83 static UConverter
*my_ucnv_open(const char *cnv
, UErrorCode
*err
);
86 #define NEW_MAX_BUFFER 999
88 static int32_t gInBufferSize
= NEW_MAX_BUFFER
;
89 static int32_t gOutBufferSize
= NEW_MAX_BUFFER
;
90 static char gNuConvTestName
[1024];
92 #define nct_min(x,y) ((x<y) ? x : y)
94 static UConverter
*my_ucnv_open(const char *cnv
, UErrorCode
*err
)
96 if(cnv
&& cnv
[0] == '@') {
97 return ucnv_openPackage(loadTestData(err
), cnv
+1, err
);
99 return ucnv_open(cnv
, err
);
103 static void printSeq(const unsigned char* a
, int len
)
108 log_verbose("0x%02x ", a
[i
++]);
112 static void printUSeq(const UChar
* a
, int len
)
116 while (i
<len
) log_verbose("0x%04x ", a
[i
++]);
120 static void printSeqErr(const unsigned char* a
, int len
)
123 fprintf(stderr
, "{");
125 fprintf(stderr
, "0x%02x ", a
[i
++]);
126 fprintf(stderr
, "}\n");
129 static void printUSeqErr(const UChar
* a
, int len
)
132 fprintf(stderr
, "{U+");
134 fprintf(stderr
, "0x%04x ", a
[i
++]);
135 fprintf(stderr
,"}\n");
139 TestNextUChar(UConverter
* cnv
, const char* source
, const char* limit
, const int32_t results
[], const char* message
)
142 const char* s
=(char*)source
;
143 const int32_t *r
=results
;
144 UErrorCode errorCode
=U_ZERO_ERROR
;
149 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
150 if(errorCode
==U_INDEX_OUTOFBOUNDS_ERROR
) {
151 break; /* no more significant input */
152 } else if(U_FAILURE(errorCode
)) {
153 log_err("%s ucnv_getNextUChar() failed: %s\n", message
, u_errorName(errorCode
));
156 /* test the expected number of input bytes only if >=0 */
157 (*r
>=0 && (int32_t)(s
-s0
)!=*r
) ||
160 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
161 message
, c
, (s
-s0
), *(r
+1), *r
);
169 TestNextUCharError(UConverter
* cnv
, const char* source
, const char* limit
, UErrorCode expected
, const char* message
)
171 const char* s
=(char*)source
;
172 UErrorCode errorCode
=U_ZERO_ERROR
;
174 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
175 if(errorCode
!= expected
){
176 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected
), message
, myErrorName(errorCode
));
178 if(c
!= 0xFFFD && c
!= 0xffff){
179 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message
, c
);
184 static void TestInBufSizes(void)
186 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,1);
188 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,2);
189 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,3);
190 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,4);
191 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,5);
192 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,6);
193 TestNewConvertWithBufferSizes(1,1);
194 TestNewConvertWithBufferSizes(2,3);
195 TestNewConvertWithBufferSizes(3,2);
199 static void TestOutBufSizes(void)
202 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,NEW_MAX_BUFFER
);
203 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER
);
204 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER
);
205 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER
);
206 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER
);
207 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER
);
213 void addTestNewConvert(TestNode
** root
)
215 addTest(root
, &TestInBufSizes
, "tsconv/nucnvtst/TestInBufSizes");
216 addTest(root
, &TestOutBufSizes
, "tsconv/nucnvtst/TestOutBufSizes");
217 addTest(root
, &TestConverterTypesAndStarters
, "tsconv/nucnvtst/TestConverterTypesAndStarters");
218 addTest(root
, &TestAmbiguous
, "tsconv/nucnvtst/TestAmbiguous");
219 addTest(root
, &TestSignatureDetection
, "tsconv/nucnvtst/TestSignatureDetection");
220 addTest(root
, &TestUTF7
, "tsconv/nucnvtst/TestUTF7");
221 addTest(root
, &TestIMAP
, "tsconv/nucnvtst/TestIMAP");
222 addTest(root
, &TestUTF8
, "tsconv/nucnvtst/TestUTF8");
224 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
225 addTest(root
, &TestCESU8
, "tsconv/nucnvtst/TestCESU8");
226 addTest(root
, &TestUTF16
, "tsconv/nucnvtst/TestUTF16");
227 addTest(root
, &TestUTF16BE
, "tsconv/nucnvtst/TestUTF16BE");
228 addTest(root
, &TestUTF16LE
, "tsconv/nucnvtst/TestUTF16LE");
229 addTest(root
, &TestUTF32
, "tsconv/nucnvtst/TestUTF32");
230 addTest(root
, &TestUTF32BE
, "tsconv/nucnvtst/TestUTF32BE");
231 addTest(root
, &TestUTF32LE
, "tsconv/nucnvtst/TestUTF32LE");
232 addTest(root
, &TestLMBCS
, "tsconv/nucnvtst/TestLMBCS");
234 addTest(root
, &TestLATIN1
, "tsconv/nucnvtst/TestLATIN1");
235 addTest(root
, &TestSBCS
, "tsconv/nucnvtst/TestSBCS");
236 addTest(root
, &TestDBCS
, "tsconv/nucnvtst/TestDBCS");
237 addTest(root
, &TestMBCS
, "tsconv/nucnvtst/TestMBCS");
238 #ifdef U_ENABLE_GENERIC_ISO_2022
239 addTest(root
, &TestISO_2022
, "tsconv/nucnvtst/TestISO_2022");
241 addTest(root
, &TestISO_2022_JP
, "tsconv/nucnvtst/TestISO_2022_JP");
242 addTest(root
, &TestJIS
, "tsconv/nucnvtst/TestJIS");
243 addTest(root
, &TestISO_2022_JP_1
, "tsconv/nucnvtst/TestISO_2022_JP_1");
244 addTest(root
, &TestISO_2022_JP_2
, "tsconv/nucnvtst/TestISO_2022_JP_2");
245 addTest(root
, &TestISO_2022_KR
, "tsconv/nucnvtst/TestISO_2022_KR");
246 addTest(root
, &TestISO_2022_KR_1
, "tsconv/nucnvtst/TestISO_2022_KR_1");
247 addTest(root
, &TestISO_2022_CN
, "tsconv/nucnvtst/TestISO_2022_CN");
248 addTest(root
, &TestISO_2022_CN_EXT
, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
249 addTest(root
, &TestJitterbug915
, "tsconv/nucnvtst/TestJitterbug915");
250 addTest(root
, &TestHZ
, "tsconv/nucnvtst/TestHZ");
251 addTest(root
, &TestSCSU
, "tsconv/nucnvtst/TestSCSU");
252 addTest(root
, &TestEBCDIC_STATEFUL
, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
253 addTest(root
, &TestGB18030
, "tsconv/nucnvtst/TestGB18030");
254 addTest(root
, &TestJitterbug255
, "tsconv/nucnvtst/TestJitterbug255");
255 addTest(root
, &TestEBCDICUS4XML
, "tsconv/nucnvtst/TestEBCDICUS4XML");
256 addTest(root
, &TestISCII
, "tsconv/nucnvtst/TestISCII");
257 #if !UCONFIG_NO_COLLATION
258 addTest(root
, &TestJitterbug981
, "tsconv/nucnvtst/TestJitterbug981");
260 addTest(root
, &TestJitterbug1293
, "tsconv/nucnvtst/TestJitterbug1293");
261 addTest(root
, &TestCoverageMBCS
, "tsconv/nucnvtst/TestCoverageMBCS");
262 addTest(root
, &TestRoundTrippingAllUTF
, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
263 addTest(root
, &TestJitterbug2346
, "tsconv/nucnvtst/TestJitterbug2346");
264 addTest(root
, &TestJitterbug2411
, "tsconv/nucnvtst/TestJitterbug2411");
269 /* Note that this test already makes use of statics, so it's not really
271 This convenience function lets us make the error messages actually useful.
274 static void setNuConvTestName(const char *codepage
, const char *direction
)
276 sprintf(gNuConvTestName
, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
280 (int)gOutBufferSize
);
285 TC_OK
= 0, /* test was OK */
286 TC_MISMATCH
= 1, /* Match failed - err was printed */
287 TC_FAIL
= 2 /* Test failed, don't print an err because it was already printed. */
288 } ETestConvertResult
;
290 /* Note: This function uses global variables and it will not do offset
291 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
292 static ETestConvertResult
testConvertFromU( const UChar
*source
, int sourceLen
, const uint8_t *expect
, int expectLen
,
293 const char *codepage
, const int32_t *expectOffsets
, UBool useFallback
)
295 UErrorCode status
= U_ZERO_ERROR
;
296 UConverter
*conv
= 0;
297 uint8_t junkout
[NEW_MAX_BUFFER
]; /* FIX */
298 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
305 int32_t realBufferSize
;
306 uint8_t *realBufferEnd
;
307 const UChar
*realSourceEnd
;
308 const UChar
*sourceLimit
;
309 UBool checkOffsets
= TRUE
;
312 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
314 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
317 setNuConvTestName(codepage
, "FROM");
319 log_verbose("\n========= %s\n", gNuConvTestName
);
321 conv
= my_ucnv_open(codepage
, &status
);
323 if(U_FAILURE(status
))
325 log_data_err("Couldn't open converter %s\n",codepage
);
329 ucnv_setFallback(conv
,useFallback
);
332 log_verbose("Converter opened..\n");
338 realBufferSize
= (sizeof(junkout
)/sizeof(junkout
[0]));
339 realBufferEnd
= junkout
+ realBufferSize
;
340 realSourceEnd
= source
+ sourceLen
;
342 if ( gOutBufferSize
!= realBufferSize
|| gInBufferSize
!= NEW_MAX_BUFFER
)
343 checkOffsets
= FALSE
;
347 end
= nct_min(targ
+ gOutBufferSize
, realBufferEnd
);
348 sourceLimit
= nct_min(src
+ gInBufferSize
, realSourceEnd
);
350 doFlush
= (UBool
)(sourceLimit
== realSourceEnd
);
352 if(targ
== realBufferEnd
) {
353 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ
, gNuConvTestName
);
356 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src
,sourceLimit
, targ
,end
, doFlush
?"TRUE":"FALSE");
359 status
= U_ZERO_ERROR
;
361 ucnv_fromUnicode (conv
,
366 checkOffsets
? offs
: NULL
,
367 doFlush
, /* flush if we're at the end of the input data */
369 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && sourceLimit
< realSourceEnd
) );
371 if(U_FAILURE(status
)) {
372 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage
, myErrorName(status
), gNuConvTestName
);
376 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
377 sourceLen
, targ
-junkout
);
382 char offset_str
[9999];
387 for(ptr
= junkout
;ptr
<targ
;ptr
++) {
388 sprintf(junk
+ strlen(junk
), "0x%02x, ", (int)(0xFF & *ptr
));
389 sprintf(offset_str
+ strlen(offset_str
), "0x%02x, ", (int)(0xFF & junokout
[ptr
-junkout
]));
393 printSeq((const uint8_t *)expect
, expectLen
);
394 if ( checkOffsets
) {
395 log_verbose("\nOffsets:");
396 log_verbose(offset_str
);
402 if(expectLen
!= targ
-junkout
) {
403 log_err("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
404 log_verbose("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
406 printSeqErr((const unsigned char*)junkout
, targ
-junkout
);
407 printf("\nExpected:");
408 printSeqErr((const unsigned char*)expect
, expectLen
);
412 if (checkOffsets
&& (expectOffsets
!= 0) ) {
413 log_verbose("comparing %d offsets..\n", targ
-junkout
);
414 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t) )){
415 log_err("did not get the expected offsets. %s\n", gNuConvTestName
);
416 printSeqErr((const unsigned char*)junkout
, targ
-junkout
);
419 for(p
=junkout
;p
<targ
;p
++) {
420 log_err("%d,", junokout
[p
-junkout
]);
423 log_err("Expected: ");
424 for(i
=0; i
<(targ
-junkout
); i
++) {
425 log_err("%d,", expectOffsets
[i
]);
431 log_verbose("comparing..\n");
432 if(!memcmp(junkout
, expect
, expectLen
)) {
433 log_verbose("Matches!\n");
436 log_err("String does not match u->%s\n", gNuConvTestName
);
437 printUSeqErr(source
, sourceLen
);
439 printSeqErr((const unsigned char *)junkout
, expectLen
);
440 printf("\nExpected:");
441 printSeqErr((const unsigned char *)expect
, expectLen
);
447 /* Note: This function uses global variables and it will not do offset
448 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
449 static ETestConvertResult
testConvertToU( const uint8_t *source
, int sourcelen
, const UChar
*expect
, int expectlen
,
450 const char *codepage
, const int32_t *expectOffsets
, UBool useFallback
)
452 UErrorCode status
= U_ZERO_ERROR
;
453 UConverter
*conv
= 0;
454 UChar junkout
[NEW_MAX_BUFFER
]; /* FIX */
455 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
457 const uint8_t *realSourceEnd
;
458 const uint8_t *srcLimit
;
464 UBool checkOffsets
= TRUE
;
466 int32_t realBufferSize
;
467 UChar
*realBufferEnd
;
470 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
473 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
476 setNuConvTestName(codepage
, "TO");
478 log_verbose("\n========= %s\n", gNuConvTestName
);
480 conv
= my_ucnv_open(codepage
, &status
);
482 if(U_FAILURE(status
))
484 log_data_err("Couldn't open converter %s\n",gNuConvTestName
);
488 ucnv_setFallback(conv
,useFallback
);
490 log_verbose("Converter opened..\n");
496 realBufferSize
= (sizeof(junkout
)/sizeof(junkout
[0]));
497 realBufferEnd
= junkout
+ realBufferSize
;
498 realSourceEnd
= src
+ sourcelen
;
500 if ( gOutBufferSize
!= realBufferSize
|| gInBufferSize
!= NEW_MAX_BUFFER
)
501 checkOffsets
= FALSE
;
505 end
= nct_min( targ
+ gOutBufferSize
, realBufferEnd
);
506 srcLimit
= nct_min(realSourceEnd
, src
+ gInBufferSize
);
508 if(targ
== realBufferEnd
)
510 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ
,gNuConvTestName
);
513 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ
,end
);
515 /* oldTarg = targ; */
517 status
= U_ZERO_ERROR
;
519 ucnv_toUnicode (conv
,
523 (const char *)srcLimit
,
524 checkOffsets
? offs
: NULL
,
525 (UBool
)(srcLimit
== realSourceEnd
), /* flush if we're at the end of hte source data */
528 /* offs += (targ-oldTarg); */
530 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (srcLimit
< realSourceEnd
)) ); /* while we just need another buffer */
532 if(U_FAILURE(status
))
534 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage
, myErrorName(status
), gNuConvTestName
);
538 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
539 sourcelen
, targ
-junkout
);
543 char offset_str
[9999];
549 for(ptr
= junkout
;ptr
<targ
;ptr
++)
551 sprintf(junk
+ strlen(junk
), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr
);
552 sprintf(offset_str
+ strlen(offset_str
), "0x%04x, ", (0xFFFF) & (unsigned int)junokout
[ptr
-junkout
]);
556 printUSeq(expect
, expectlen
);
559 log_verbose("\nOffsets:");
560 log_verbose(offset_str
);
566 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen
,expectlen
*2);
568 if (checkOffsets
&& (expectOffsets
!= 0))
570 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t))){
571 log_err("did not get the expected offsets. %s\n",gNuConvTestName
);
573 for(p
=junkout
;p
<targ
;p
++) {
574 log_err("%d,", junokout
[p
-junkout
]);
577 log_err("Expected: ");
578 for(i
=0; i
<(targ
-junkout
); i
++) {
579 log_err("%d,", expectOffsets
[i
]);
583 for(i
=0; i
<(targ
-junkout
); i
++) {
584 log_err("%X,", junkout
[i
]);
588 for(i
=0; i
<(src
-source
); i
++) {
589 log_err("%X,", (unsigned char)source
[i
]);
595 if(!memcmp(junkout
, expect
, expectlen
*2))
597 log_verbose("Matches!\n");
602 log_err("String does not match. %s\n", gNuConvTestName
);
603 log_verbose("String does not match. %s\n", gNuConvTestName
);
605 printUSeqErr(junkout
, expectlen
);
606 printf("\nExpected:");
607 printUSeqErr(expect
, expectlen
);
613 static void TestNewConvertWithBufferSizes(int32_t outsize
, int32_t insize
)
616 /* 1 2 3 1Han 2Han 3Han . */
618 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E };
621 const uint8_t expectedUTF8
[] =
622 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
623 int32_t toUTF8Offs
[] =
624 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07};
625 int32_t fmUTF8Offs
[] =
626 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };
628 #ifdef U_ENABLE_GENERIC_ISO_2022
629 /* Same as UTF8, but with ^[%B preceeding */
630 const uint8_t expectedISO2022
[] =
631 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
632 int32_t toISO2022Offs
[] =
633 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
634 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
635 int32_t fmISO2022Offs
[] =
636 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
639 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
640 const uint8_t expectedIBM930
[] =
641 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B };
642 int32_t toIBM930Offs
[] =
643 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, };
644 int32_t fmIBM930Offs
[] =
645 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c};
647 /* 1 2 3 0 h1 h2 h3 . MBCS*/
648 const uint8_t expectedIBM943
[] =
649 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e };
650 int32_t toIBM943Offs
[] =
651 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07 };
652 int32_t fmIBM943Offs
[] =
653 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a};
655 /* 1 2 3 0 h1 h2 h3 . DBCS*/
656 const uint8_t expectedIBM9027
[] =
657 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe};
658 int32_t toIBM9027Offs
[] =
659 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
661 /* 1 2 3 0 <?> <?> <?> . SBCS*/
662 const uint8_t expectedIBM920
[] =
663 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e };
664 int32_t toIBM920Offs
[] =
665 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
667 /* 1 2 3 0 <?> <?> <?> . SBCS*/
668 const uint8_t expectedISO88593
[] =
669 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
670 int32_t toISO88593Offs
[] =
671 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
673 /* 1 2 3 0 <?> <?> <?> . LATIN_1*/
674 const uint8_t expectedLATIN1
[] =
675 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
676 int32_t toLATIN1Offs
[] =
677 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
681 const uint8_t expectedUTF16BE
[] =
682 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e };
683 int32_t toUTF16BEOffs
[]=
684 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
685 int32_t fmUTF16BEOffs
[] =
686 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e };
688 const uint8_t expectedUTF16LE
[] =
689 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00 };
690 int32_t toUTF16LEOffs
[]=
691 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
692 int32_t fmUTF16LEOffs
[] =
693 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e };
695 const uint8_t expectedUTF32BE
[] =
696 { 0x00, 0x00, 0x00, 0x31,
697 0x00, 0x00, 0x00, 0x32,
698 0x00, 0x00, 0x00, 0x33,
699 0x00, 0x00, 0x00, 0x00,
700 0x00, 0x00, 0x4e, 0x00,
701 0x00, 0x00, 0x4e, 0x8c,
702 0x00, 0x00, 0x4e, 0x09,
703 0x00, 0x00, 0x00, 0x2e };
704 int32_t toUTF32BEOffs
[]=
705 { 0x00, 0x00, 0x00, 0x00,
706 0x01, 0x01, 0x01, 0x01,
707 0x02, 0x02, 0x02, 0x02,
708 0x03, 0x03, 0x03, 0x03,
709 0x04, 0x04, 0x04, 0x04,
710 0x05, 0x05, 0x05, 0x05,
711 0x06, 0x06, 0x06, 0x06,
712 0x07, 0x07, 0x07, 0x07,
713 0x08, 0x08, 0x08, 0x08 };
714 int32_t fmUTF32BEOffs
[] =
715 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c };
717 const uint8_t expectedUTF32LE
[] =
718 { 0x31, 0x00, 0x00, 0x00,
719 0x32, 0x00, 0x00, 0x00,
720 0x33, 0x00, 0x00, 0x00,
721 0x00, 0x00, 0x00, 0x00,
722 0x00, 0x4e, 0x00, 0x00,
723 0x8c, 0x4e, 0x00, 0x00,
724 0x09, 0x4e, 0x00, 0x00,
725 0x2e, 0x00, 0x00, 0x00 };
726 int32_t toUTF32LEOffs
[]=
727 { 0x00, 0x00, 0x00, 0x00,
728 0x01, 0x01, 0x01, 0x01,
729 0x02, 0x02, 0x02, 0x02,
730 0x03, 0x03, 0x03, 0x03,
731 0x04, 0x04, 0x04, 0x04,
732 0x05, 0x05, 0x05, 0x05,
733 0x06, 0x06, 0x06, 0x06,
734 0x07, 0x07, 0x07, 0x07,
735 0x08, 0x08, 0x08, 0x08 };
736 int32_t fmUTF32LEOffs
[] =
737 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c };
742 /** Test chars #2 **/
744 /* Sahha [health], slashed h's */
745 const UChar malteseUChars
[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
746 const uint8_t expectedMaltese913
[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
749 const UChar LMBCSUChars
[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
750 const uint8_t expectedLMBCS
[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
751 int32_t toLMBCSOffs
[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
752 int32_t fmLMBCSOffs
[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
753 /*********************************** START OF CODE finally *************/
755 gInBufferSize
= insize
;
756 gOutBufferSize
= outsize
;
758 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize
, gOutBufferSize
);
763 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
764 expectedUTF8
, sizeof(expectedUTF8
), "UTF8", toUTF8Offs
,FALSE
);
766 log_verbose("Test surrogate behaviour for UTF8\n");
768 const UChar testinput
[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
769 const uint8_t expectedUTF8test2
[]= { 0xe2, 0x82, 0xac,
770 0xf0, 0x90, 0x90, 0x81,
773 int32_t offsets
[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
774 testConvertFromU(testinput
, sizeof(testinput
)/sizeof(testinput
[0]),
775 expectedUTF8test2
, sizeof(expectedUTF8test2
), "UTF8", offsets
,FALSE
);
779 #ifdef U_ENABLE_GENERIC_ISO_2022
781 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
782 expectedISO2022
, sizeof(expectedISO2022
), "ISO_2022", toISO2022Offs
,FALSE
);
785 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
786 expectedUTF16LE
, sizeof(expectedUTF16LE
), "utf-16le", toUTF16LEOffs
,FALSE
);
788 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
789 expectedUTF16BE
, sizeof(expectedUTF16BE
), "utf-16be", toUTF16BEOffs
,FALSE
);
791 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
792 expectedUTF32LE
, sizeof(expectedUTF32LE
), "utf-32le", toUTF32LEOffs
,FALSE
);
794 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
795 expectedUTF32BE
, sizeof(expectedUTF32BE
), "utf-32be", toUTF32BEOffs
,FALSE
);
797 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
798 expectedLATIN1
, sizeof(expectedLATIN1
), "LATIN_1", toLATIN1Offs
,FALSE
);
800 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
801 expectedIBM930
, sizeof(expectedIBM930
), "ibm-930", toIBM930Offs
,FALSE
);
803 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
804 expectedISO88593
, sizeof(expectedISO88593
), "iso-8859-3", toISO88593Offs
,FALSE
);
808 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
809 expectedIBM943
, sizeof(expectedIBM943
), "ibm-943", toIBM943Offs
,FALSE
);
811 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
812 expectedIBM9027
, sizeof(expectedIBM9027
), "@ibm9027", toIBM9027Offs
,FALSE
);
814 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
815 expectedIBM920
, sizeof(expectedIBM920
), "ibm-920", toIBM920Offs
,FALSE
);
817 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
818 expectedISO88593
, sizeof(expectedISO88593
), "iso-8859-3", toISO88593Offs
,FALSE
);
826 testConvertToU(expectedUTF8
, sizeof(expectedUTF8
),
827 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf8", fmUTF8Offs
,FALSE
);
828 #ifdef U_ENABLE_GENERIC_ISO_2022
830 testConvertToU(expectedISO2022
, sizeof(expectedISO2022
),
831 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "ISO_2022", fmISO2022Offs
,FALSE
);
834 testConvertToU(expectedUTF16LE
, sizeof(expectedUTF16LE
),
835 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf-16le", fmUTF16LEOffs
,FALSE
);
837 testConvertToU(expectedUTF16BE
, sizeof(expectedUTF16BE
),
838 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf-16be", fmUTF16BEOffs
,FALSE
);
840 testConvertToU(expectedUTF32LE
, sizeof(expectedUTF32LE
),
841 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf-32le", fmUTF32LEOffs
,FALSE
);
843 testConvertToU(expectedUTF32BE
, sizeof(expectedUTF32BE
),
844 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf-32be", fmUTF32BEOffs
,FALSE
);
846 testConvertToU(expectedIBM930
, sizeof(expectedIBM930
),
847 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "ibm-930", fmIBM930Offs
,FALSE
);
849 testConvertToU(expectedIBM943
, sizeof(expectedIBM943
),
850 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "ibm-943", fmIBM943Offs
,FALSE
);
852 /* Try it again to make sure it still works */
853 testConvertToU(expectedUTF16LE
, sizeof(expectedUTF16LE
),
854 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf-16le", fmUTF16LEOffs
,FALSE
);
856 testConvertToU(expectedMaltese913
, sizeof(expectedMaltese913
),
857 malteseUChars
, sizeof(malteseUChars
)/sizeof(malteseUChars
[0]), "latin3", NULL
,FALSE
);
859 testConvertFromU(malteseUChars
, sizeof(malteseUChars
)/sizeof(malteseUChars
[0]),
860 expectedMaltese913
, sizeof(expectedMaltese913
), "iso-8859-3", NULL
,FALSE
);
863 testConvertFromU(LMBCSUChars
, sizeof(LMBCSUChars
)/sizeof(LMBCSUChars
[0]),
864 expectedLMBCS
, sizeof(expectedLMBCS
), "LMBCS-1", toLMBCSOffs
,FALSE
);
865 testConvertToU(expectedLMBCS
, sizeof(expectedLMBCS
),
866 LMBCSUChars
, sizeof(LMBCSUChars
)/sizeof(LMBCSUChars
[0]), "LMBCS-1", fmLMBCSOffs
,FALSE
);
868 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
870 /* encode directly set D and set O */
871 static const uint8_t utf7
[] = {
878 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
879 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
881 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
883 static const UChar unicode
[] = {
885 Hi Mom -<WHITE SMILING FACE>-!
886 A<NOT IDENTICAL TO><ALPHA>.
888 [Japanese word "nihongo"]
890 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
891 0x41, 0x2262, 0x0391, 0x2e,
893 0x65e5, 0x672c, 0x8a9e
895 static const int32_t toUnicodeOffsets
[] = {
896 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
901 static const int32_t fromUnicodeOffsets
[] = {
902 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
903 11, 12, 12, 12, 13, 13, 13, 13, 14,
905 16, 16, 16, 17, 17, 17, 18, 18, 18
908 /* same but escaping set O (the exclamation mark) */
909 static const uint8_t utf7Restricted
[] = {
916 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
917 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
919 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
921 static const int32_t toUnicodeOffsetsR
[] = {
922 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
927 static const int32_t fromUnicodeOffsetsR
[] = {
928 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
929 11, 12, 12, 12, 13, 13, 13, 13, 14,
931 16, 16, 16, 17, 17, 17, 18, 18, 18
934 testConvertFromU(unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, utf7
, sizeof(utf7
), "UTF-7", fromUnicodeOffsets
,FALSE
);
936 testConvertToU(utf7
, sizeof(utf7
), unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, "UTF-7", toUnicodeOffsets
,FALSE
);
938 testConvertFromU(unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, utf7Restricted
, sizeof(utf7Restricted
), "UTF-7,version=1", fromUnicodeOffsetsR
,FALSE
);
940 testConvertToU(utf7Restricted
, sizeof(utf7Restricted
), unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, "UTF-7,version=1", toUnicodeOffsetsR
,FALSE
);
944 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
945 * modified according to RFC 2060,
946 * and supplemented with the one example in RFC 2060 itself.
949 static const uint8_t imap
[] = {
960 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
961 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
963 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
965 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
966 0x2f, 0x6d, 0x61, 0x69, 0x6c,
967 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
968 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
970 static const UChar unicode
[] = {
971 /* Hi Mom -<WHITE SMILING FACE>-!
972 A<NOT IDENTICAL TO><ALPHA>.
974 [Japanese word "nihongo"]
981 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
982 0x41, 0x2262, 0x0391, 0x2e,
984 0x65e5, 0x672c, 0x8a9e,
986 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
987 0x2f, 0x6d, 0x61, 0x69, 0x6c,
988 0x2f, 0x65e5, 0x672c, 0x8a9e,
991 static const int32_t toUnicodeOffsets
[] = {
992 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
997 38, 39, 40, 41, 42, 43,
1002 static const int32_t fromUnicodeOffsets
[] = {
1003 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1004 11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1006 16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1008 20, 21, 22, 23, 24, 25,
1010 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1011 35, 36, 36, 36, 37, 37, 37, 37, 37
1014 testConvertFromU(unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, imap
, sizeof(imap
), "IMAP-mailbox-name", fromUnicodeOffsets
,FALSE
);
1016 testConvertToU(imap
, sizeof(imap
), unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, "IMAP-mailbox-name", toUnicodeOffsets
,FALSE
);
1019 /* Test UTF-8 bad data handling*/
1021 static const uint8_t utf8
[]={
1023 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1026 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1027 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1028 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */
1029 0xdf, 0xbf, /* 7ff */
1030 0xbf, /* truncated tail */
1031 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */
1035 static const uint16_t utf8Expected
[]={
1049 static const int32_t utf8Offsets
[]={
1050 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
1052 testConvertToU(utf8
, sizeof(utf8
),
1053 utf8Expected
, sizeof(utf8Expected
)/sizeof(utf8Expected
[0]), "utf-8", utf8Offsets
,FALSE
);
1057 /* Test UTF-32BE bad data handling*/
1059 static const uint8_t utf32
[]={
1060 0x00, 0x00, 0x00, 0x61,
1061 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
1062 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1063 0x00, 0x00, 0x00, 0x62,
1064 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1065 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
1066 0x00, 0x00, 0x01, 0x62,
1067 0x00, 0x00, 0x02, 0x62
1070 static const uint16_t utf32Expected
[]={
1072 0xfffd, /* 0x110000 out of range */
1073 0xDBFF, /* 0x10FFFF in range */
1076 0xfffd, /* 0xffffffff out of range */
1077 0xfffd, /* 0x7fffffff out of range */
1082 static const int32_t utf32Offsets
[]={
1083 0, 4, 8, 8, 12, 16, 20, 24, 28
1085 testConvertToU(utf32
, sizeof(utf32
),
1086 utf32Expected
, sizeof(utf32Expected
)/sizeof(utf32Expected
[0]), "utf-32be", utf32Offsets
,FALSE
);
1090 /* Test UTF-32LE bad data handling*/
1092 static const uint8_t utf32
[]={
1093 0x61, 0x00, 0x00, 0x00,
1094 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
1095 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1096 0x62, 0x00, 0x00, 0x00,
1097 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1098 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
1099 0x62, 0x01, 0x00, 0x00,
1100 0x62, 0x02, 0x00, 0x00,
1103 static const uint16_t utf32Expected
[]={
1105 0xfffd, /* 0x110000 out of range */
1106 0xDBFF, /* 0x10FFFF in range */
1109 0xfffd, /* 0xffffffff out of range */
1110 0xfffd, /* 0x7fffffff out of range */
1115 static const int32_t utf32Offsets
[]={
1116 0, 4, 8, 8, 12, 16, 20, 24, 28
1118 testConvertToU(utf32
, sizeof(utf32
),
1119 utf32Expected
, sizeof(utf32Expected
)/sizeof(utf32Expected
[0]), "utf-32le", utf32Offsets
,FALSE
);
1124 static void TestCoverageMBCS(){
1126 UErrorCode status
= U_ZERO_ERROR
;
1127 const char *directory
= loadTestData(&status
);
1128 char* tdpath
= NULL
;
1129 char* saveDirectory
= (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1130 int len
= strlen(directory
);
1133 tdpath
= (char*) malloc(sizeof(char) * (len
* 2));
1134 uprv_strcpy(saveDirectory
,u_getDataDirectory());
1135 log_verbose("Retrieved data directory %s \n",saveDirectory
);
1136 uprv_strcpy(tdpath
,directory
);
1137 index
=strrchr(tdpath
,(char)U_FILE_SEP_CHAR
);
1139 if((unsigned int)(index
-tdpath
) != (strlen(tdpath
)-1)){
1142 u_setDataDirectory(tdpath
);
1143 log_verbose("ICU data directory is set to: %s \n" ,tdpath
);
1146 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
1147 which is test file for MBCS conversion with single-byte codepage data.*/
1150 /* MBCS with single byte codepage data test1.ucm*/
1151 const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1152 const uint8_t expectedtest1
[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1153 int32_t totest1Offs
[] = { 0, 1, 2, 3, 5, };
1156 testConvertFromU(unicodeInput
, sizeof(unicodeInput
)/sizeof(unicodeInput
[0]),
1157 expectedtest1
, sizeof(expectedtest1
), "@test1", totest1Offs
,FALSE
);
1160 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
1161 which is test file for MBCS conversion with three-byte codepage data.*/
1164 /* MBCS with three byte codepage data test3.ucm*/
1165 const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1166 const uint8_t expectedtest3
[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,};
1167 int32_t totest3Offs
[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1169 const uint8_t test3input
[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1170 const UChar expectedUnicode
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1171 int32_t fromtest3Offs
[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1174 testConvertFromU(unicodeInput
, sizeof(unicodeInput
)/sizeof(unicodeInput
[0]),
1175 expectedtest3
, sizeof(expectedtest3
), "@test3", totest3Offs
,FALSE
);
1178 testConvertToU(test3input
, sizeof(test3input
),
1179 expectedUnicode
, sizeof(expectedUnicode
)/sizeof(expectedUnicode
[0]), "@test3", fromtest3Offs
,FALSE
);
1183 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
1184 which is test file for MBCS conversion with four-byte codepage data.*/
1187 /* MBCS with three byte codepage data test4.ucm*/
1188 static const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1189 static const uint8_t expectedtest4
[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,};
1190 static const int32_t totest4Offs
[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1192 static const uint8_t test4input
[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1193 static const UChar expectedUnicode
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1194 static const int32_t fromtest4Offs
[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1197 testConvertFromU(unicodeInput
, sizeof(unicodeInput
)/sizeof(unicodeInput
[0]),
1198 expectedtest4
, sizeof(expectedtest4
), "@test4", totest4Offs
,FALSE
);
1201 testConvertToU(test4input
, sizeof(test4input
),
1202 expectedUnicode
, sizeof(expectedUnicode
)/sizeof(expectedUnicode
[0]), "@test4", fromtest4Offs
,FALSE
);
1207 /* restore the original data directory */
1208 log_verbose("Setting the data directory to %s \n", saveDirectory
);
1209 u_setDataDirectory(saveDirectory
);
1210 free(saveDirectory
);
1215 static void TestConverterType(const char *convName
, UConverterType convType
) {
1216 UConverter
* myConverter
;
1217 UErrorCode err
= U_ZERO_ERROR
;
1219 myConverter
= my_ucnv_open(convName
, &err
);
1221 if (U_FAILURE(err
)) {
1222 log_data_err("Failed to create an %s converter\n", convName
);
1227 if (ucnv_getType(myConverter
)!=convType
) {
1228 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1229 convName
, convType
);
1232 log_verbose("ucnv_getType %s ok\n", convName
);
1235 ucnv_close(myConverter
);
1238 static void TestConverterTypesAndStarters()
1240 UConverter
* myConverter
;
1241 UErrorCode err
= U_ZERO_ERROR
;
1242 UBool mystarters
[256];
1244 /* const UBool expectedKSCstarters[256] = {
1245 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1246 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1247 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1248 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1249 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1250 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1251 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1252 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1253 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1254 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1255 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1256 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1257 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1258 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1259 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1260 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1261 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1262 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1263 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1264 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1265 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1266 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1267 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1268 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1269 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1270 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1273 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types.");
1275 myConverter
= ucnv_open("ksc", &err
);
1276 if (U_FAILURE(err
)) {
1277 log_data_err("Failed to create an ibm-ksc converter\n");
1282 if (ucnv_getType(myConverter
)!=UCNV_MBCS
)
1283 log_err("ucnv_getType Failed for ibm-949\n");
1285 log_verbose("ucnv_getType ibm-949 ok\n");
1287 if(myConverter
!=NULL
)
1288 ucnv_getStarters(myConverter
, mystarters
, &err
);
1290 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1291 log_err("Failed ucnv_getStarters for ksc\n");
1293 log_verbose("ucnv_getStarters ok\n");*/
1296 ucnv_close(myConverter
);
1298 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL
);
1299 TestConverterType("ibm-878", UCNV_SBCS
);
1300 TestConverterType("iso-8859-1", UCNV_LATIN_1
);
1301 TestConverterType("ibm-1208", UCNV_UTF8
);
1302 TestConverterType("utf-8", UCNV_UTF8
);
1303 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian
);
1304 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian
);
1305 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian
);
1306 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian
);
1307 #ifdef U_ENABLE_GENERIC_ISO_2022
1308 TestConverterType("iso-2022", UCNV_ISO_2022
);
1310 TestConverterType("hz", UCNV_HZ
);
1311 TestConverterType("scsu", UCNV_SCSU
);
1312 TestConverterType("x-iscii-de", UCNV_ISCII
);
1313 TestConverterType("ascii", UCNV_US_ASCII
);
1314 TestConverterType("utf-7", UCNV_UTF7
);
1315 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX
);
1316 TestConverterType("bocu-1", UCNV_BOCU1
);
1320 TestAmbiguousConverter(UConverter
*cnv
) {
1321 static const char inBytes
[2]={ 0x61, 0x5c };
1322 UChar outUnicode
[20]={ 0, 0, 0, 0 };
1326 UErrorCode errorCode
;
1329 /* try to convert an 'a' and a US-ASCII backslash */
1330 errorCode
=U_ZERO_ERROR
;
1333 ucnv_toUnicode(cnv
, &u
, u
+20, &s
, s
+2, NULL
, TRUE
, &errorCode
);
1334 if(U_FAILURE(errorCode
)) {
1335 /* we do not care about general failures in this test; the input may just not be mappable */
1339 if(outUnicode
[0]!=0x61 || outUnicode
[1]==0xfffd) {
1340 /* not an ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1344 isAmbiguous
=ucnv_isAmbiguous(cnv
);
1346 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1347 if((outUnicode
[1]!=0x5c)!=isAmbiguous
) {
1348 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1349 ucnv_getName(cnv
, &errorCode
), outUnicode
[1]!=0x5c, isAmbiguous
);
1353 if(outUnicode
[1]!=0x5c) {
1354 /* needs fixup, fix it */
1355 ucnv_fixFileSeparator(cnv
, outUnicode
, (int32_t)(u
-outUnicode
));
1356 if(outUnicode
[1]!=0x5c) {
1357 /* the fix failed */
1358 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv
, &errorCode
));
1364 static void TestAmbiguous()
1366 UErrorCode status
= U_ZERO_ERROR
;
1367 UConverter
*ascii_cnv
= 0, *sjis_cnv
= 0, *cnv
;
1368 const char target
[] = {
1369 /* "\\usr\\local\\share\\data\\icutest.txt" */
1370 0x5c, 0x75, 0x73, 0x72,
1371 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1372 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1373 0x5c, 0x64, 0x61, 0x74, 0x61,
1374 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1377 UChar asciiResult
[200], sjisResult
[200];
1378 int32_t asciiLength
= 0, sjisLength
= 0, i
;
1381 /* enumerate all converters */
1382 status
=U_ZERO_ERROR
;
1383 for(i
=0; (name
=ucnv_getAvailableName(i
))!=NULL
; ++i
) {
1384 cnv
=ucnv_open(name
, &status
);
1385 if(U_SUCCESS(status
)) {
1386 TestAmbiguousConverter(cnv
);
1389 log_err("error: unable to open available converter \"%s\"\n", name
);
1390 status
=U_ZERO_ERROR
;
1394 sjis_cnv
= ucnv_open("ibm-943", &status
);
1395 if (U_FAILURE(status
))
1397 log_data_err("Failed to create a SJIS converter\n");
1400 ascii_cnv
= ucnv_open("LATIN-1", &status
);
1401 if (U_FAILURE(status
))
1403 log_data_err("Failed to create a LATIN-1 converter\n");
1404 ucnv_close(sjis_cnv
);
1407 /* convert target from SJIS to Unicode */
1408 sjisLength
= ucnv_toUChars(sjis_cnv
, sjisResult
, sizeof(sjisResult
)/U_SIZEOF_UCHAR
, target
, strlen(target
), &status
);
1409 if (U_FAILURE(status
))
1411 log_err("Failed to convert the SJIS string.\n");
1412 ucnv_close(sjis_cnv
);
1413 ucnv_close(ascii_cnv
);
1416 /* convert target from Latin-1 to Unicode */
1417 asciiLength
= ucnv_toUChars(ascii_cnv
, asciiResult
, sizeof(asciiResult
)/U_SIZEOF_UCHAR
, target
, strlen(target
), &status
);
1418 if (U_FAILURE(status
))
1420 log_err("Failed to convert the Latin-1 string.\n");
1422 ucnv_close(sjis_cnv
);
1423 ucnv_close(ascii_cnv
);
1426 if (!ucnv_isAmbiguous(sjis_cnv
))
1428 log_err("SJIS converter should contain ambiguous character mappings.\n");
1431 ucnv_close(sjis_cnv
);
1432 ucnv_close(ascii_cnv
);
1435 if (u_strcmp(sjisResult
, asciiResult
) == 0)
1437 log_err("File separators for SJIS don't need to be fixed.\n");
1439 ucnv_fixFileSeparator(sjis_cnv
, sjisResult
, sjisLength
);
1440 if (u_strcmp(sjisResult
, asciiResult
) != 0)
1442 log_err("Fixing file separator for SJIS failed.\n");
1444 ucnv_close(sjis_cnv
);
1445 ucnv_close(ascii_cnv
);
1449 TestSignatureDetection(){
1450 /* with null terminated strings */
1452 static const char* data
[] = {
1453 "\xFE\xFF\x00\x00", /* UTF-16BE */
1454 "\xFF\xFE\x00\x00", /* UTF-16LE */
1455 "\xEF\xBB\xBF\x00", /* UTF-8 */
1456 "\x0E\xFE\xFF\x00", /* SCSU */
1458 "\xFE\xFF", /* UTF-16BE */
1459 "\xFF\xFE", /* UTF-16LE */
1460 "\xEF\xBB\xBF", /* UTF-8 */
1461 "\x0E\xFE\xFF", /* SCSU */
1463 "\xFE\xFF\x41\x42", /* UTF-16BE */
1464 "\xFF\xFE\x41\x41", /* UTF-16LE */
1465 "\xEF\xBB\xBF\x41", /* UTF-8 */
1466 "\x0E\xFE\xFF\x41", /* SCSU */
1468 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */
1469 "\x2B\x2F\x76\x38\x41", /* UTF-7 */
1470 "\x2B\x2F\x76\x39\x41", /* UTF-7 */
1471 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */
1472 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */
1474 "\xDD\x73\x66\x73" /* UTF-EBCDIC */
1476 static const char* expected
[] = {
1499 static const int32_t expectedLength
[] ={
1524 int32_t signatureLength
= -1;
1525 const char* source
= NULL
;
1526 const char* enc
= NULL
;
1527 for( ; i
<sizeof(data
)/sizeof(char*); i
++){
1530 enc
= ucnv_detectUnicodeSignature(source
, -1 , &signatureLength
, &err
);
1532 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source
,i
,u_errorName(err
));
1535 if(enc
== NULL
|| strcmp(enc
,expected
[i
]) !=0){
1536 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source
,i
,expected
[i
],enc
);
1539 if(signatureLength
!= expectedLength
[i
]){
1540 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source
,i
,signatureLength
,expectedLength
[i
]);
1545 static const char* data
[] = {
1546 "\xFE\xFF\x00", /* UTF-16BE */
1547 "\xFF\xFE\x00", /* UTF-16LE */
1548 "\xEF\xBB\xBF\x00", /* UTF-8 */
1549 "\x0E\xFE\xFF\x00", /* SCSU */
1550 "\x00\x00\xFE\xFF", /* UTF-32BE */
1551 "\xFF\xFE\x00\x00", /* UTF-32LE */
1552 "\xFE\xFF", /* UTF-16BE */
1553 "\xFF\xFE", /* UTF-16LE */
1554 "\xEF\xBB\xBF", /* UTF-8 */
1555 "\x0E\xFE\xFF", /* SCSU */
1556 "\x00\x00\xFE\xFF", /* UTF-32BE */
1557 "\xFF\xFE\x00\x00", /* UTF-32LE */
1558 "\xFE\xFF\x41\x42", /* UTF-16BE */
1559 "\xFF\xFE\x41\x41", /* UTF-16LE */
1560 "\xEF\xBB\xBF\x41", /* UTF-8 */
1561 "\x0E\xFE\xFF\x41", /* SCSU */
1562 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1563 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1564 "\xFB\xEE\x28", /* BOCU-1 */
1565 "\xFF\x41\x42" /* NULL */
1567 static const int len
[] = {
1590 static const char* expected
[] = {
1612 static const int32_t expectedLength
[] ={
1636 int32_t signatureLength
= -1;
1637 int32_t sourceLength
=-1;
1638 const char* source
= NULL
;
1639 const char* enc
= NULL
;
1640 for( ; i
<sizeof(data
)/sizeof(char*); i
++){
1643 sourceLength
= len
[i
];
1644 enc
= ucnv_detectUnicodeSignature(source
, sourceLength
, &signatureLength
, &err
);
1646 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source
,i
,u_errorName(err
));
1649 if(enc
== NULL
|| strcmp(enc
,expected
[i
]) !=0){
1650 if(expected
[i
] !=NULL
){
1651 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source
,i
,expected
[i
],enc
);
1655 if(signatureLength
!= expectedLength
[i
]){
1656 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source
,i
,signatureLength
,expectedLength
[i
]);
1665 static const uint8_t in
[]={
1666 /* H - +Jjo- - ! +- +2AHcAQ */
1669 0x2b, 0x4a, 0x6a, 0x6f,
1673 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1676 /* expected test results */
1677 static const int32_t results
[]={
1678 /* number of bytes read, code point */
1681 4, 0x263a, /* <WHITE SMILING FACE> */
1688 const char *cnvName
;
1689 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
1690 UErrorCode errorCode
=U_ZERO_ERROR
;
1691 UConverter
*cnv
=ucnv_open("UTF-7", &errorCode
);
1692 if(U_FAILURE(errorCode
)) {
1693 log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode
)); /* sholdn't be a data err */
1696 TestNextUChar(cnv
, source
, limit
, results
, "UTF-7");
1697 /* Test the condition when source >= sourceLimit */
1698 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1699 cnvName
= ucnv_getName(cnv
, &errorCode
);
1700 if (U_FAILURE(errorCode
) || uprv_strcmp(cnvName
, "UTF-7") != 0) {
1701 log_err("UTF-7 converter is called %s: %s\n", cnvName
, u_errorName(errorCode
));
1709 static const uint8_t in
[]={
1710 /* H - &Jjo- - ! &- &2AHcAQ- \ */
1713 0x26, 0x4a, 0x6a, 0x6f,
1717 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1720 /* expected test results */
1721 static const int32_t results
[]={
1722 /* number of bytes read, code point */
1725 4, 0x263a, /* <WHITE SMILING FACE> */
1732 const char *cnvName
;
1733 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
1734 UErrorCode errorCode
=U_ZERO_ERROR
;
1735 UConverter
*cnv
=ucnv_open("IMAP-mailbox-name", &errorCode
);
1736 if(U_FAILURE(errorCode
)) {
1737 log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode
)); /* sholdn't be a data err */
1740 TestNextUChar(cnv
, source
, limit
, results
, "IMAP-mailbox-name");
1741 /* Test the condition when source >= sourceLimit */
1742 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1743 cnvName
= ucnv_getName(cnv
, &errorCode
);
1744 if (U_FAILURE(errorCode
) || uprv_strcmp(cnvName
, "IMAP-mailbox-name") != 0) {
1745 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName
, u_errorName(errorCode
));
1753 static const uint8_t in
[]={
1757 0xf0, 0x90, 0x80, 0x80,
1758 0xf4, 0x84, 0x8c, 0xa1,
1759 0xf0, 0x90, 0x90, 0x81
1762 /* expected test results */
1763 static const int32_t results
[]={
1764 /* number of bytes read, code point */
1773 /* error test input */
1774 static const uint8_t in2
[]={
1776 0xc0, 0x80, /* illegal non-shortest form */
1777 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1778 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1779 0xc0, 0xc0, /* illegal trail byte */
1780 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1781 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1782 0xfe, /* illegal byte altogether */
1786 /* expected error test results */
1787 static const int32_t results2
[]={
1788 /* number of bytes read, code point */
1793 UConverterToUCallback cb
;
1796 const char *source
=(const char *)in
,*limit
=(const char *)in
+sizeof(in
);
1797 UErrorCode errorCode
=U_ZERO_ERROR
;
1798 UConverter
*cnv
=ucnv_open("UTF-8", &errorCode
);
1799 if(U_FAILURE(errorCode
)) {
1800 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode
));
1803 TestNextUChar(cnv
, source
, limit
, results
, "UTF-8");
1804 /* Test the condition when source >= sourceLimit */
1805 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1807 /* test error behavior with a skip callback */
1808 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
1809 source
=(const char *)in2
;
1810 limit
=(const char *)(in2
+sizeof(in2
));
1811 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-8");
1817 static TestCESU8() {
1819 static const uint8_t in
[]={
1823 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1824 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1825 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1829 /* expected test results */
1830 static const int32_t results
[]={
1831 /* number of bytes read, code point */
1837 -1,0xd802, /* may read 3 or 6 bytes */
1838 -1,0x10ffff,/* may read 0 or 3 bytes */
1842 /* error test input */
1843 static const uint8_t in2
[]={
1845 0xc0, 0x80, /* illegal non-shortest form */
1846 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1847 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1848 0xc0, 0xc0, /* illegal trail byte */
1849 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */
1850 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */
1851 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */
1852 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1853 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1854 0xfe, /* illegal byte altogether */
1858 /* expected error test results */
1859 static const int32_t results2
[]={
1860 /* number of bytes read, code point */
1865 UConverterToUCallback cb
;
1868 const char *source
=(const char *)in
,*limit
=(const char *)in
+sizeof(in
);
1869 UErrorCode errorCode
=U_ZERO_ERROR
;
1870 UConverter
*cnv
=ucnv_open("CESU-8", &errorCode
);
1871 if(U_FAILURE(errorCode
)) {
1872 log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode
));
1875 TestNextUChar(cnv
, source
, limit
, results
, "CESU-8");
1876 /* Test the condition when source >= sourceLimit */
1877 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1879 /* test error behavior with a skip callback */
1880 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
1881 source
=(const char *)in2
;
1882 limit
=(const char *)(in2
+sizeof(in2
));
1883 TestNextUChar(cnv
, source
, limit
, results2
, "CESU-8");
1889 static TestUTF16() {
1891 static const uint8_t in1
[]={
1892 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
1894 static const uint8_t in2
[]={
1895 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
1897 static const uint8_t in3
[]={
1898 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
1901 /* expected test results */
1902 static const int32_t results1
[]={
1903 /* number of bytes read, code point */
1907 static const int32_t results2
[]={
1908 /* number of bytes read, code point */
1912 static const int32_t results3
[]={
1913 /* number of bytes read, code point */
1920 const char *source
, *limit
;
1922 UErrorCode errorCode
=U_ZERO_ERROR
;
1923 UConverter
*cnv
=ucnv_open("UTF-16", &errorCode
);
1924 if(U_FAILURE(errorCode
)) {
1925 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode
));
1929 source
=(const char *)in1
, limit
=(const char *)in1
+sizeof(in1
);
1930 TestNextUChar(cnv
, source
, limit
, results1
, "UTF-16");
1932 source
=(const char *)in2
, limit
=(const char *)in2
+sizeof(in2
);
1933 ucnv_resetToUnicode(cnv
);
1934 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-16");
1936 source
=(const char *)in3
, limit
=(const char *)in3
+sizeof(in3
);
1937 ucnv_resetToUnicode(cnv
);
1938 TestNextUChar(cnv
, source
, limit
, results3
, "UTF-16");
1940 /* Test the condition when source >= sourceLimit */
1941 ucnv_resetToUnicode(cnv
);
1942 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1948 static TestUTF16BE() {
1950 static const uint8_t in
[]={
1956 0xd8, 0x01, 0xdc, 0x01
1959 /* expected test results */
1960 static const int32_t results
[]={
1961 /* number of bytes read, code point */
1970 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
1971 UErrorCode errorCode
=U_ZERO_ERROR
;
1972 UConverter
*cnv
=ucnv_open("utf-16be", &errorCode
);
1973 if(U_FAILURE(errorCode
)) {
1974 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode
));
1977 TestNextUChar(cnv
, source
, limit
, results
, "UTF-16BE");
1978 /* Test the condition when source >= sourceLimit */
1979 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1980 /*Test for the condition where there is an invalid character*/
1982 static const uint8_t source2
[]={0x61};
1983 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
1984 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an invalid character");
1988 * Test disabled because currently the UTF-16BE/LE converters are supposed
1989 * to not set errors for unpaired surrogates.
1990 * This may change with
1991 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
1994 /*Test for the condition where there is a surrogate pair*/
1996 const uint8_t source2
[]={0xd8, 0x01};
1997 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an truncated surrogate character");
2006 static const uint8_t in
[]={
2011 0x01, 0xd8, 0x01, 0xdc
2014 /* expected test results */
2015 static const int32_t results
[]={
2016 /* number of bytes read, code point */
2024 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2025 UErrorCode errorCode
=U_ZERO_ERROR
;
2026 UConverter
*cnv
=ucnv_open("utf-16le", &errorCode
);
2027 if(U_FAILURE(errorCode
)) {
2028 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode
));
2031 TestNextUChar(cnv
, source
, limit
, results
, "UTF-16LE");
2032 /* Test the condition when source >= sourceLimit */
2033 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2034 /*Test for the condition where there is an invalid character*/
2036 static const uint8_t source2
[]={0x61};
2037 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2038 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an invalid character");
2042 * Test disabled because currently the UTF-16BE/LE converters are supposed
2043 * to not set errors for unpaired surrogates.
2044 * This may change with
2045 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2048 /*Test for the condition where there is a surrogate character*/
2050 static const uint8_t source2
[]={0x01, 0xd8};
2051 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an truncated surrogate character");
2059 static TestUTF32() {
2061 static const uint8_t in1
[]={
2062 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff
2064 static const uint8_t in2
[]={
2065 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00
2067 static const uint8_t in3
[]={
2068 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01
2071 /* expected test results */
2072 static const int32_t results1
[]={
2073 /* number of bytes read, code point */
2077 static const int32_t results2
[]={
2078 /* number of bytes read, code point */
2082 static const int32_t results3
[]={
2083 /* number of bytes read, code point */
2086 4, 0xfffd, /* unmatched surrogate */
2087 4, 0xfffd /* unmatched surrogate */
2090 const char *source
, *limit
;
2092 UErrorCode errorCode
=U_ZERO_ERROR
;
2093 UConverter
*cnv
=ucnv_open("UTF-32", &errorCode
);
2094 if(U_FAILURE(errorCode
)) {
2095 log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode
));
2099 source
=(const char *)in1
, limit
=(const char *)in1
+sizeof(in1
);
2100 TestNextUChar(cnv
, source
, limit
, results1
, "UTF-32");
2102 source
=(const char *)in2
, limit
=(const char *)in2
+sizeof(in2
);
2103 ucnv_resetToUnicode(cnv
);
2104 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32");
2106 source
=(const char *)in3
, limit
=(const char *)in3
+sizeof(in3
);
2107 ucnv_resetToUnicode(cnv
);
2108 TestNextUChar(cnv
, source
, limit
, results3
, "UTF-32");
2110 /* Test the condition when source >= sourceLimit */
2111 ucnv_resetToUnicode(cnv
);
2112 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2120 static const uint8_t in
[]={
2121 0x00, 0x00, 0x00, 0x61,
2122 0x00, 0x00, 0x30, 0x61,
2123 0x00, 0x00, 0xdc, 0x00,
2124 0x00, 0x00, 0xd8, 0x00,
2125 0x00, 0x00, 0xdf, 0xff,
2126 0x00, 0x00, 0xff, 0xfe,
2127 0x00, 0x10, 0xab, 0xcd,
2128 0x00, 0x10, 0xff, 0xff
2131 /* expected test results */
2132 static const int32_t results
[]={
2133 /* number of bytes read, code point */
2144 /* error test input */
2145 static const uint8_t in2
[]={
2146 0x00, 0x00, 0x00, 0x61,
2147 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
2148 0x00, 0x00, 0x00, 0x62,
2149 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2150 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
2151 0x00, 0x00, 0x01, 0x62,
2152 0x00, 0x00, 0x02, 0x62
2155 /* expected error test results */
2156 static const int32_t results2
[]={
2157 /* number of bytes read, code point */
2164 UConverterToUCallback cb
;
2167 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2168 UErrorCode errorCode
=U_ZERO_ERROR
;
2169 UConverter
*cnv
=ucnv_open("UTF-32BE", &errorCode
);
2170 if(U_FAILURE(errorCode
)) {
2171 log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode
));
2174 TestNextUChar(cnv
, source
, limit
, results
, "UTF-32BE");
2176 /* Test the condition when source >= sourceLimit */
2177 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2179 /* test error behavior with a skip callback */
2180 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
2181 source
=(const char *)in2
;
2182 limit
=(const char *)(in2
+sizeof(in2
));
2183 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32BE");
2191 static const uint8_t in
[]={
2192 0x61, 0x00, 0x00, 0x00,
2193 0x61, 0x30, 0x00, 0x00,
2194 0x00, 0xdc, 0x00, 0x00,
2195 0x00, 0xd8, 0x00, 0x00,
2196 0xff, 0xdf, 0x00, 0x00,
2197 0xfe, 0xff, 0x00, 0x00,
2198 0xcd, 0xab, 0x10, 0x00,
2199 0xff, 0xff, 0x10, 0x00
2202 /* expected test results */
2203 static const int32_t results
[]={
2204 /* number of bytes read, code point */
2215 /* error test input */
2216 static const uint8_t in2
[]={
2217 0x61, 0x00, 0x00, 0x00,
2218 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
2219 0x62, 0x00, 0x00, 0x00,
2220 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2221 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
2222 0x62, 0x01, 0x00, 0x00,
2223 0x62, 0x02, 0x00, 0x00,
2226 /* expected error test results */
2227 static const int32_t results2
[]={
2228 /* number of bytes read, code point */
2235 UConverterToUCallback cb
;
2238 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2239 UErrorCode errorCode
=U_ZERO_ERROR
;
2240 UConverter
*cnv
=ucnv_open("UTF-32LE", &errorCode
);
2241 if(U_FAILURE(errorCode
)) {
2242 log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode
));
2245 TestNextUChar(cnv
, source
, limit
, results
, "UTF-32LE");
2247 /* Test the condition when source >= sourceLimit */
2248 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2250 /* test error behavior with a skip callback */
2251 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
2252 source
=(const char *)in2
;
2253 limit
=(const char *)(in2
+sizeof(in2
));
2254 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32LE");
2262 static const uint8_t in
[]={
2271 /* expected test results */
2272 static const int32_t results
[]={
2273 /* number of bytes read, code point */
2281 static const uint16_t in1
[] = {
2282 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2283 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2284 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2285 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2286 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2287 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2288 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2289 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2290 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2291 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2292 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2295 static const uint8_t out1
[] = {
2296 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2297 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2298 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2299 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2300 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2301 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2302 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2303 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2304 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2305 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2306 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2309 static const uint16_t in2
[]={
2310 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2311 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2312 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2313 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2314 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2315 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2316 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2317 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2318 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2319 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2320 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2321 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2322 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2323 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2324 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2325 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2326 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2327 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2328 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2329 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2330 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2331 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2332 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2333 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2334 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2335 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2336 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2337 0x37, 0x20, 0x2A, 0x2F,
2339 static const unsigned char out2
[]={
2340 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2341 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2342 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2343 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2344 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2345 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2346 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2347 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2348 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2349 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2350 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2351 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2352 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2353 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2354 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2355 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2356 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2357 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2358 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2359 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2360 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2361 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2362 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2363 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2364 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2365 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2366 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2367 0x37, 0x20, 0x2A, 0x2F,
2369 const char *source
=(const char *)in
;
2370 const char *limit
=(const char *)in
+sizeof(in
);
2372 UErrorCode errorCode
=U_ZERO_ERROR
;
2373 UConverter
*cnv
=ucnv_open("LATIN_1", &errorCode
);
2374 if(U_FAILURE(errorCode
)) {
2375 log_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode
));
2378 TestNextUChar(cnv
, source
, limit
, results
, "LATIN_1");
2379 /* Test the condition when source >= sourceLimit */
2380 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2381 TestConv((uint16_t*)in1
,sizeof(in1
)/2,"LATIN_1","LATIN-1",(char*)out1
,sizeof(out1
));
2382 TestConv((uint16_t*)in2
,sizeof(in2
)/2,"ASCII","ASCII",(char*)out2
,sizeof(out2
));
2390 static const uint8_t in
[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2391 /* expected test results */
2392 static const int32_t results
[]={
2393 /* number of bytes read, code point */
2402 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2403 UErrorCode errorCode
=U_ZERO_ERROR
;
2404 UConverter
*cnv
=ucnv_open("x-mac-turkish", &errorCode
);
2405 if(U_FAILURE(errorCode
)) {
2406 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode
));
2409 TestNextUChar(cnv
, source
, limit
, results
, "SBCS(x-mac-turkish)");
2410 /* Test the condition when source >= sourceLimit */
2411 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2412 /*Test for Illegal character */ /*
2414 static const uint8_t input1[]={ 0xA1 };
2415 const char* illegalsource=(const char*)input1;
2416 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2425 static const uint8_t in
[]={
2434 /* expected test results */
2435 static const int32_t results
[]={
2436 /* number of bytes read, code point */
2444 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2445 UErrorCode errorCode
=U_ZERO_ERROR
;
2447 UConverter
*cnv
=my_ucnv_open("@ibm9027", &errorCode
);
2448 if(U_FAILURE(errorCode
)) {
2449 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode
));
2452 TestNextUChar(cnv
, source
, limit
, results
, "DBCS(@ibm9027)");
2453 /* Test the condition when source >= sourceLimit */
2454 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2455 /*Test for the condition where there is an invalid character*/
2457 static const uint8_t source2
[]={0x1a, 0x1b};
2458 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character");
2460 /*Test for the condition where we have a truncated char*/
2462 static const uint8_t source1
[]={0xc4};
2463 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2464 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2472 static const uint8_t in
[]={
2483 /* expected test results */
2484 static const int32_t results
[]={
2485 /* number of bytes read, code point */
2495 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2496 UErrorCode errorCode
=U_ZERO_ERROR
;
2498 UConverter
*cnv
=ucnv_open("ibm-1363", &errorCode
);
2499 if(U_FAILURE(errorCode
)) {
2500 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode
));
2503 TestNextUChar(cnv
, source
, limit
, results
, "MBCS(ibm-1363)");
2504 /* Test the condition when source >= sourceLimit */
2505 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2506 /*Test for the condition where there is an invalid character*/
2508 static const uint8_t source2
[]={0xa1, 0x01};
2509 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character");
2511 /*Test for the condition where we have a truncated char*/
2513 static const uint8_t source1
[]={0xc4};
2514 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2515 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2521 #ifdef U_ENABLE_GENERIC_ISO_2022
2526 static const uint8_t in
[]={
2533 0xf0, 0x90, 0x80, 0x80
2538 /* expected test results */
2539 static const int32_t results
[]={
2540 /* number of bytes read, code point */
2541 4, 0x0031, /* 4 bytes including the escape sequence */
2549 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2550 UErrorCode errorCode
=U_ZERO_ERROR
;
2553 cnv
=ucnv_open("ISO_2022", &errorCode
);
2554 if(U_FAILURE(errorCode
)) {
2555 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
2558 TestNextUChar(cnv
, source
, limit
, results
, "ISO_2022");
2560 /* Test the condition when source >= sourceLimit */
2561 TestNextUCharError(cnv
, source
, source
-1, U_ILLEGAL_ARGUMENT_ERROR
, "sourceLimit < source");
2562 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2563 /*Test for the condition where we have a truncated char*/
2565 static const uint8_t source1
[]={0xc4};
2566 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2567 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2569 /*Test for the condition where there is an invalid character*/
2571 static const uint8_t source2
[]={0xa1, 0x01};
2572 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ILLEGAL_CHAR_FOUND
, "an invalid character");
2580 TestSmallTargetBuffer(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2581 const UChar
* uSource
;
2582 const UChar
* uSourceLimit
;
2583 const char* cSource
;
2584 const char* cSourceLimit
;
2585 UChar
*uTargetLimit
=NULL
;
2588 const char *cTargetLimit
;
2591 int32_t uBufSize
= 120;
2594 UErrorCode errorCode
=U_ZERO_ERROR
;
2595 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2596 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
2599 uSource
= (UChar
*) source
;
2600 uSourceLimit
=(const UChar
*)sourceLimit
;
2604 cTargetLimit
= cBuf
;
2605 uTargetLimit
= uBuf
;
2609 cTargetLimit
= cTargetLimit
+ i
;
2610 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,FALSE
, &errorCode
);
2611 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2612 errorCode
=U_ZERO_ERROR
;
2616 if(U_FAILURE(errorCode
)){
2617 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2621 }while (uSource
<uSourceLimit
);
2623 cSourceLimit
=cTarget
;
2625 uTargetLimit
=uTargetLimit
+i
;
2626 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,FALSE
,&errorCode
);
2627 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2628 errorCode
=U_ZERO_ERROR
;
2631 if(U_FAILURE(errorCode
)){
2632 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2635 }while(cSource
<cSourceLimit
);
2639 for(len
=0;len
<(int)(source
- sourceLimit
);len
++){
2640 if(uBuf
[len
]!=uSource
[len
]){
2641 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource
[len
],(int)uBuf
[len
]) ;
2648 /* Test for Jitterbug 778 */
2649 static void TestToAndFromUChars(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2650 const UChar
* uSource
;
2651 const UChar
* uSourceLimit
;
2652 const char* cSource
;
2653 UChar
*uTargetLimit
=NULL
;
2656 const char *cTargetLimit
;
2659 int32_t uBufSize
= 120;
2660 int numCharsInTarget
=0;
2661 UErrorCode errorCode
=U_ZERO_ERROR
;
2662 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2663 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
2665 uSourceLimit
=sourceLimit
;
2667 cTargetLimit
= cBuf
+uBufSize
*5;
2669 uTargetLimit
= uBuf
+ uBufSize
*5;
2671 numCharsInTarget
=ucnv_fromUChars( cnv
, cTarget
, (cTargetLimit
-cTarget
),uSource
,(uSourceLimit
-uSource
), &errorCode
);
2672 if(U_FAILURE(errorCode
)){
2673 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2678 ucnv_toUChars(cnv
,uTarget
,(uTargetLimit
-uTarget
),cSource
,numCharsInTarget
,&errorCode
);
2679 if(U_FAILURE(errorCode
)){
2680 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode
));
2684 while(uSource
<uSourceLimit
){
2685 if(*test
!=*uSource
){
2687 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
2696 static void TestSmallSourceBuffer(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2697 const UChar
* uSource
;
2698 const UChar
* uSourceLimit
;
2699 const char* cSource
;
2700 const char* cSourceLimit
;
2701 UChar
*uTargetLimit
=NULL
;
2704 const char *cTargetLimit
;
2707 int32_t uBufSize
= 120;
2710 const UChar
*temp
= sourceLimit
;
2711 UErrorCode errorCode
=U_ZERO_ERROR
;
2712 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2713 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
2717 uSource
= (UChar
*) source
;
2721 cTargetLimit
= cBuf
;
2722 uTargetLimit
= uBuf
+uBufSize
*5;
2723 cTargetLimit
= cTargetLimit
+uBufSize
*10;
2724 uSourceLimit
=uSource
;
2727 if (uSourceLimit
< sourceLimit
) {
2728 uSourceLimit
= uSourceLimit
+1;
2730 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,FALSE
, &errorCode
);
2731 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2732 errorCode
=U_ZERO_ERROR
;
2736 if(U_FAILURE(errorCode
)){
2737 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2741 }while (uSource
<temp
);
2745 if (cSourceLimit
< cBuf
+ (cTarget
- cBuf
)) {
2746 cSourceLimit
= cSourceLimit
+1;
2748 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,FALSE
,&errorCode
);
2749 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2750 errorCode
=U_ZERO_ERROR
;
2753 if(U_FAILURE(errorCode
)){
2754 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2757 }while(cSource
<cTarget
);
2761 for(;len
<(int)(source
- sourceLimit
);len
++){
2762 if(uBuf
[len
]!=uSource
[len
]){
2763 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource
[len
],(int)uBuf
[len
]) ;
2771 TestGetNextUChar2022(UConverter
* cnv
, const char* source
, const char* limit
,
2772 const uint16_t results
[], const char* message
){
2774 const char* s
=(char*)source
;
2775 const uint16_t *r
=results
;
2776 UErrorCode errorCode
=U_ZERO_ERROR
;
2781 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
2782 if(errorCode
==U_INDEX_OUTOFBOUNDS_ERROR
) {
2783 break; /* no more significant input */
2784 } else if(U_FAILURE(errorCode
)) {
2785 log_err("%s ucnv_getNextUChar() failed: %s\n", message
, u_errorName(errorCode
));
2788 if(UTF_IS_FIRST_SURROGATE(*r
)){
2790 UTF_NEXT_CHAR_SAFE(r
, i
, len
, exC
, FALSE
);
2795 if(c
!=(uint32_t)(exC
))
2796 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message
,(uint32_t) (*r
),c
);
2802 static int TestJitterbug930(const char* enc
){
2803 UErrorCode err
= U_ZERO_ERROR
;
2804 UConverter
*converter
;
2808 const UChar
*source
= in
;
2810 int32_t* offsets
= off
;
2811 int numOffWritten
=0;
2813 converter
= my_ucnv_open(enc
, &err
);
2815 in
[0] = 0x41; /* 0x4E00;*/
2820 memset(off
, '*', sizeof(off
));
2822 ucnv_fromUnicode (converter
,
2831 /* writes three bytes into the output buffer: 41 1B 24
2832 * but offsets contains 0 1 1
2834 while(*offsets
< off
[10]){
2838 log_verbose("Testing Jitterbug 930 for encoding %s",enc
);
2839 if(numOffWritten
!= (int)(target
-out
)){
2840 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc
, (int)(target
-out
),numOffWritten
);
2845 memset(off
,'*' , sizeof(off
));
2849 ucnv_fromUnicode (converter
,
2858 while(*offsets
< off
[10]){
2861 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc
,-1,*offsets
) ;
2866 /* writes 42 43 7A into output buffer,
2867 * offsets contains -1 -1 -1
2869 ucnv_close(converter
);
2876 static const uint16_t in
[]={
2877 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
2878 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
2879 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
2880 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
2881 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
2882 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
2883 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
2884 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
2885 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
2886 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
2887 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
2888 0x005A, 0x005B, 0x005C, 0x000A
2890 const UChar
* uSource
;
2891 const UChar
* uSourceLimit
;
2892 const char* cSource
;
2893 const char* cSourceLimit
;
2894 UChar
*uTargetLimit
=NULL
;
2897 const char *cTargetLimit
;
2900 int32_t uBufSize
= 120;
2901 UErrorCode errorCode
=U_ZERO_ERROR
;
2903 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
2904 int32_t* myOff
= offsets
;
2905 cnv
=ucnv_open("HZ", &errorCode
);
2906 if(U_FAILURE(errorCode
)) {
2907 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode
));
2911 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2912 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
2913 uSource
= (const UChar
*)&in
[0];
2914 uSourceLimit
=(const UChar
*)&in
[sizeof(in
)/2];
2916 cTargetLimit
= cBuf
+uBufSize
*5;
2918 uTargetLimit
= uBuf
+ uBufSize
*5;
2919 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
2920 if(U_FAILURE(errorCode
)){
2921 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2925 cSourceLimit
=cTarget
;
2928 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
2929 if(U_FAILURE(errorCode
)){
2930 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2933 uSource
= (const UChar
*)&in
[0];
2934 while(uSource
<uSourceLimit
){
2935 if(*test
!=*uSource
){
2937 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
2942 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "HZ encoding");
2943 TestSmallTargetBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
2944 TestSmallSourceBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
2945 TestToAndFromUChars(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
2946 TestJitterbug930("csISO2022JP");
2956 static const uint16_t in
[]={
2957 /* test full range of Devanagari */
2958 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
2959 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
2960 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
2961 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
2962 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
2963 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
2964 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
2965 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
2966 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
2967 0x096D,0x096E,0x096F,
2968 /* test Soft halant*/
2969 0x0915,0x094d, 0x200D,
2970 /* test explicit halant */
2971 0x0915,0x094d, 0x200c,
2972 /* test double danda */
2975 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2976 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2977 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2978 /* tests from Lotus */
2979 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
2980 0x0930,0x094D,0x200D,
2981 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
2982 0x0915,0x0921,0x002B,0x095F,
2984 0x0B86, 0xB87, 0xB88,
2986 0x0C05, 0x0C02, 0x0C03,0x0c31,
2988 0x0C85, 0xC82, 0x0C83,
2989 /* test Abbr sign and Anudatta */
2999 0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3000 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3003 0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3004 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3005 0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3006 0x093D /* Avagraha 0xEA, 0xE9*/,
3014 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3016 static const unsigned char byteArr
[]={
3018 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3019 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3020 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3021 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3022 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3023 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3024 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3025 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3026 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3028 /* test soft halant */
3030 /* test explicit halant */
3032 /* test double danda */
3035 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3036 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3037 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3040 /* tests from Lotus */
3041 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3042 0xEF,0x42,0xCF,0xE8,0xD9,
3043 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3044 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3046 0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3048 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3050 0xEF, 0x48,0xa4, 0xa2, 0xa3,
3051 /* anudatta and abbreviation sign */
3052 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3055 0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3057 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3059 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3061 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3063 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3065 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3067 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3069 0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3071 0xB3, 0xE9, /* Ka + NUKTA */
3073 0xB4, 0xE9, /* Kha + NUKTA */
3075 0xB5, 0xE9, /* Ga + NUKTA */
3087 /* just consume unhandled codepoints */
3091 testConvertToU(byteArr
,(sizeof(byteArr
)),in
,(sizeof(in
)/U_SIZEOF_UCHAR
),"x-iscii-de",NULL
,TRUE
);
3092 TestConv(in
,(sizeof(in
)/2),"ISCII,version=0","hindi", (char *)byteArr
,sizeof(byteArr
));
3099 static const uint16_t in
[]={
3100 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3101 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3102 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3103 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3104 0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3105 0x201D, 0x3014, 0x000D, 0x000A,
3106 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3107 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3109 const UChar
* uSource
;
3110 const UChar
* uSourceLimit
;
3111 const char* cSource
;
3112 const char* cSourceLimit
;
3113 UChar
*uTargetLimit
=NULL
;
3116 const char *cTargetLimit
;
3119 int32_t uBufSize
= 120;
3120 UErrorCode errorCode
=U_ZERO_ERROR
;
3122 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3123 int32_t* myOff
= offsets
;
3124 cnv
=ucnv_open("ISO_2022_JP_1", &errorCode
);
3125 if(U_FAILURE(errorCode
)) {
3126 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode
));
3130 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3131 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3132 uSource
= (const UChar
*)&in
[0];
3133 uSourceLimit
=(const UChar
*)&in
[sizeof(in
)/2];
3135 cTargetLimit
= cBuf
+uBufSize
*5;
3137 uTargetLimit
= uBuf
+ uBufSize
*5;
3138 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3139 if(U_FAILURE(errorCode
)){
3140 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3144 cSourceLimit
=cTarget
;
3147 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3148 if(U_FAILURE(errorCode
)){
3149 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3153 uSource
= (const UChar
*)&in
[0];
3154 while(uSource
<uSourceLimit
){
3155 if(*test
!=*uSource
){
3157 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3163 TestSmallTargetBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3164 TestSmallSourceBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3165 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-JP encoding");
3166 TestToAndFromUChars(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3167 TestJitterbug930("csISO2022JP");
3174 static void TestConv(const uint16_t in
[],int len
, const char* conv
, const char* lang
, char byteArr
[],int byteArrLen
){
3175 const UChar
* uSource
;
3176 const UChar
* uSourceLimit
;
3177 const char* cSource
;
3178 const char* cSourceLimit
;
3179 UChar
*uTargetLimit
=NULL
;
3182 const char *cTargetLimit
;
3185 int32_t uBufSize
= 120*10;
3186 UErrorCode errorCode
=U_ZERO_ERROR
;
3188 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) );
3189 int32_t* myOff
= offsets
;
3190 cnv
=my_ucnv_open(conv
, &errorCode
);
3191 if(U_FAILURE(errorCode
)) {
3192 log_data_err("Unable to open a %s converter: %s\n", conv
, u_errorName(errorCode
));
3196 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
));
3197 cBuf
=(char*)malloc(uBufSize
* sizeof(char));
3198 uSource
= (const UChar
*)&in
[0];
3199 uSourceLimit
=uSource
+len
;
3201 cTargetLimit
= cBuf
+uBufSize
;
3203 uTargetLimit
= uBuf
+ uBufSize
;
3204 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3205 if(U_FAILURE(errorCode
)){
3206 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3209 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3211 cSourceLimit
=cTarget
;
3214 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3215 if(U_FAILURE(errorCode
)){
3216 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode
));
3220 uSource
= (const UChar
*)&in
[0];
3221 while(uSource
<uSourceLimit
){
3222 if(*test
!=*uSource
){
3223 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv
,*uSource
,(int)*test
) ;
3228 TestSmallTargetBuffer(&in
[0],(const UChar
*)&in
[len
],cnv
);
3229 TestSmallSourceBuffer(&in
[0],(const UChar
*)&in
[len
],cnv
);
3230 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, conv
);
3231 if(byteArr
&& byteArrLen
!=0){
3232 TestGetNextUChar2022(cnv
, byteArr
, (byteArr
+byteArrLen
), in
, lang
);
3233 TestToAndFromUChars(&in
[0],(const UChar
*)&in
[len
],cnv
);
3236 cSourceLimit
= cSource
+byteArrLen
;
3239 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3240 if(U_FAILURE(errorCode
)){
3241 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3245 uSource
= (const UChar
*)&in
[0];
3246 while(uSource
<uSourceLimit
){
3247 if(*test
!=*uSource
){
3248 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3261 static UChar U_CALLCONV
3262 _charAt(int32_t offset
, void *context
) {
3263 return ((char*)context
)[offset
];
3267 unescape(UChar
* dst
, int32_t dstLen
,const char* src
,int32_t srcLen
,UErrorCode
*status
){
3270 if(U_FAILURE(*status
)){
3273 if((dst
==NULL
&& dstLen
>0) || (src
==NULL
) || dstLen
< -1 || srcLen
<-1 ){
3274 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
3278 srcLen
= uprv_strlen(src
);
3281 for (; srcIndex
<srcLen
; ) {
3282 UChar32 c
= src
[srcIndex
++];
3283 if (c
== 0x005C /*'\\'*/) {
3284 c
= u_unescapeAt(_charAt
,&srcIndex
,srcLen
,(void*)src
); /* advances i*/
3285 if (c
== (UChar32
)0xFFFFFFFF) {
3286 *status
=U_INVALID_CHAR_FOUND
; /* return empty string */
3287 break; /* invalid escape sequence */
3290 if(dstIndex
< dstLen
){
3292 dst
[dstIndex
++] = UTF16_LEAD(c
);
3293 if(dstIndex
<dstLen
){
3294 dst
[dstIndex
]=UTF16_TRAIL(c
);
3296 *status
=U_BUFFER_OVERFLOW_ERROR
;
3299 dst
[dstIndex
]=(UChar
)c
;
3303 *status
= U_BUFFER_OVERFLOW_ERROR
;
3305 dstIndex
++; /* for preflighting */
3311 TestFullRoundtrip(const char* cp
){
3312 UChar usource
[10] ={0};
3313 UChar nsrc
[10] = {0};
3317 /* Test codepoint 0 */
3318 TestConv(usource
,1,cp
,"",NULL
,0);
3319 TestConv(usource
,2,cp
,"",NULL
,0);
3321 TestConv(nsrc
,3,cp
,"",NULL
,0);
3323 for(;i
<=0x10FFFF;i
++){
3329 usource
[0] =(UChar
) i
;
3332 usource
[0]=UTF16_LEAD(i
);
3333 usource
[1]=UTF16_TRAIL(i
);
3340 /* Test only single code points */
3341 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3342 /* Test codepoint repeated twice */
3343 usource
[ulen
]=usource
[0];
3344 usource
[ulen
+1]=usource
[1];
3346 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3347 /* Test codepoint repeated 3 times */
3348 usource
[ulen
]=usource
[0];
3349 usource
[ulen
+1]=usource
[1];
3351 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3352 /* Test codepoint in between 2 codepoints */
3356 TestConv(nsrc
,len
+2,cp
,"",NULL
,0);
3357 uprv_memset(usource
,0,sizeof(UChar
)*10);
3362 TestRoundTrippingAllUTF(void){
3364 log_verbose("Running exhaustive round trip test for BOCU-1\n");
3365 TestFullRoundtrip("BOCU-1");
3366 log_verbose("Running exhaustive round trip test for SCSU\n");
3367 TestFullRoundtrip("SCSU");
3368 log_verbose("Running exhaustive round trip test for UTF-8\n");
3369 TestFullRoundtrip("UTF-8");
3370 log_verbose("Running exhaustive round trip test for CESU-8\n");
3371 TestFullRoundtrip("CESU-8");
3372 log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3373 TestFullRoundtrip("UTF-16BE");
3374 log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3375 TestFullRoundtrip("UTF-16LE");
3376 log_verbose("Running exhaustive round trip test for UTF-16\n");
3377 TestFullRoundtrip("UTF-16");
3378 log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3379 TestFullRoundtrip("UTF-32BE");
3380 log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3381 TestFullRoundtrip("UTF-32LE");
3382 log_verbose("Running exhaustive round trip test for UTF-32\n");
3383 TestFullRoundtrip("UTF-32");
3384 log_verbose("Running exhaustive round trip test for UTF-7\n");
3385 TestFullRoundtrip("UTF-7");
3386 log_verbose("Running exhaustive round trip test for UTF-7\n");
3387 TestFullRoundtrip("UTF-7,version=1");
3388 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3389 TestFullRoundtrip("IMAP-mailbox-name");
3390 log_verbose("Running exhaustive round trip test for GB18030\n");
3391 TestFullRoundtrip("GB18030");
3398 static const uint16_t germanUTF16
[]={
3399 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3402 static const uint8_t germanSCSU
[]={
3403 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3406 static const uint16_t russianUTF16
[]={
3407 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3410 static const uint8_t russianSCSU
[]={
3411 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3414 static const uint16_t japaneseUTF16
[]={
3415 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3416 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3417 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3418 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3419 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3420 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3421 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3422 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3423 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3424 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3425 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3426 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3427 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3428 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3429 0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3432 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3433 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3434 static const uint8_t japaneseSCSU
[]={
3435 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3436 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3437 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3438 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3439 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3440 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3441 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3442 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3443 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3444 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3445 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3449 static const uint16_t allFeaturesUTF16
[]={
3450 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3451 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3452 0x01df, 0xf000, 0xdbff, 0xdfff
3455 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3456 * result here (34B vs. 35B)
3458 static const uint8_t allFeaturesSCSU
[]={
3459 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3460 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3461 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3462 0xdf, 0x14, 0x80, 0x15, 0xff
3464 static const uint16_t monkeyIn
[]={
3465 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3466 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3467 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3468 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3469 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3470 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3471 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3472 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3473 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3474 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3475 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3476 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3477 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3478 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3479 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3480 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3481 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3482 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3483 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3484 /* test non-BMP code points */
3485 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3486 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3487 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3488 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3489 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3490 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3491 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3492 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3493 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3494 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3495 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3498 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3499 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3500 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3501 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3502 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3504 static const char *fTestCases
[] = {
3505 "\\ud800\\udc00", /* smallest surrogate*/
3507 "\\udBff\\udFff", /* largest surrogate pair*/
3510 "Hello \\u9292 \\u9192 World!",
3511 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3512 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3514 "\\u0648\\u06c8", /* catch missing reset*/
3517 "\\u4444\\uE001", /* lowest quotable*/
3518 "\\u4444\\uf2FF", /* highest quotable*/
3519 "\\u4444\\uf188\\u4444",
3520 "\\u4444\\uf188\\uf288",
3521 "\\u4444\\uf188abc\\u0429\\uf288",
3523 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3524 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3525 "Hello World!123456",
3526 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3528 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/
3529 "abc\\u4411d", /* uses SQU*/
3530 "abc\\u4411\\u4412d",/* uses SCU*/
3531 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3532 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3534 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3535 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3536 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3538 "", /* empty input*/
3539 "\\u0000", /* smallest BMP character*/
3540 "\\uFFFF", /* largest BMP character*/
3542 /* regression tests*/
3543 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3544 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3545 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3546 "\\u0041\\u00df\\u0401\\u015f",
3547 "\\u9066\\u2123abc",
3548 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3549 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3552 for(;i
<sizeof(fTestCases
)/sizeof(*fTestCases
);i
++){
3553 const char* cSrc
= fTestCases
[i
];
3554 UErrorCode status
= U_ZERO_ERROR
;
3555 int32_t cSrcLen
,srcLen
;
3557 /* UConverter* cnv = ucnv_open("SCSU",&status); */
3558 cSrcLen
= srcLen
= uprv_strlen(fTestCases
[i
]);
3559 src
= (UChar
*) malloc((sizeof(UChar
) * srcLen
) + sizeof(UChar
));
3560 srcLen
=unescape(src
,srcLen
,cSrc
,cSrcLen
,&status
);
3561 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc
,i
);
3562 TestConv(src
,srcLen
,"SCSU","Coverage",NULL
,0);
3565 TestConv(allFeaturesUTF16
,(sizeof(allFeaturesUTF16
)/2),"SCSU","all features", (char *)allFeaturesSCSU
,sizeof(allFeaturesSCSU
));
3566 TestConv(allFeaturesUTF16
,(sizeof(allFeaturesUTF16
)/2),"SCSU","all features",(char *)allFeaturesSCSU
,sizeof(allFeaturesSCSU
));
3567 TestConv(japaneseUTF16
,(sizeof(japaneseUTF16
)/2),"SCSU","japaneese",(char *)japaneseSCSU
,sizeof(japaneseSCSU
));
3568 TestConv(japaneseUTF16
,(sizeof(japaneseUTF16
)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU
,sizeof(japaneseSCSU
));
3569 TestConv(germanUTF16
,(sizeof(germanUTF16
)/2),"SCSU","german",(char *)germanSCSU
,sizeof(germanSCSU
));
3570 TestConv(russianUTF16
,(sizeof(russianUTF16
)/2), "SCSU","russian",(char *)russianSCSU
,sizeof(russianSCSU
));
3571 TestConv(monkeyIn
,(sizeof(monkeyIn
)/2),"SCSU","monkey",NULL
,0);
3573 static void TestJitterbug2346(){
3574 char source
[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3575 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3576 uint16_t expected
[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3578 UChar uTarget
[500]={'\0'};
3579 UChar
* utarget
=uTarget
;
3580 UChar
* utargetLimit
=uTarget
+sizeof(uTarget
)/2;
3582 char cTarget
[500]={'\0'};
3583 char* ctarget
=cTarget
;
3584 char* ctargetLimit
=cTarget
+sizeof(cTarget
);
3585 const char* csource
=source
;
3586 UChar
* temp
= expected
;
3587 UErrorCode err
=U_ZERO_ERROR
;
3589 UConverter
* conv
=ucnv_open("ISO_2022_JP",&err
);
3590 if(U_FAILURE(err
)) {
3591 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err
));
3594 ucnv_toUnicode(conv
,&utarget
,utargetLimit
,&csource
,csource
+sizeof(source
),NULL
,TRUE
,&err
);
3595 if(U_FAILURE(err
)) {
3596 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err
));
3599 utargetLimit
=utarget
;
3601 while(utarget
<utargetLimit
){
3602 if(*temp
!=*utarget
){
3604 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget
,(int)*temp
) ;
3609 ucnv_fromUnicode(conv
,&ctarget
,ctargetLimit
,(const UChar
**)&utarget
,utargetLimit
,NULL
,TRUE
,&err
);
3610 if(U_FAILURE(err
)) {
3611 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err
));
3614 ctargetLimit
=ctarget
;
3621 TestISO_2022_JP_1() {
3623 static const uint16_t in
[]={
3624 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3625 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3626 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3627 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3628 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3629 0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3630 0x201D, 0x000D, 0x000A,
3631 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3632 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3633 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3634 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3635 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3636 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3638 const UChar
* uSource
;
3639 const UChar
* uSourceLimit
;
3640 const char* cSource
;
3641 const char* cSourceLimit
;
3642 UChar
*uTargetLimit
=NULL
;
3645 const char *cTargetLimit
;
3648 int32_t uBufSize
= 120;
3649 UErrorCode errorCode
=U_ZERO_ERROR
;
3652 cnv
=ucnv_open("ISO_2022_JP_1", &errorCode
);
3653 if(U_FAILURE(errorCode
)) {
3654 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
3658 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3659 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3660 uSource
= (const UChar
*)&in
[0];
3661 uSourceLimit
=(const UChar
*)&in
[sizeof(in
)/2];
3663 cTargetLimit
= cBuf
+uBufSize
*5;
3665 uTargetLimit
= uBuf
+ uBufSize
*5;
3666 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,TRUE
, &errorCode
);
3667 if(U_FAILURE(errorCode
)){
3668 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3672 cSourceLimit
=cTarget
;
3674 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,TRUE
,&errorCode
);
3675 if(U_FAILURE(errorCode
)){
3676 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3679 uSource
= (const UChar
*)&in
[0];
3680 while(uSource
<uSourceLimit
){
3681 if(*test
!=*uSource
){
3683 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3689 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3690 /*Test for the condition where there is an invalid character*/
3693 static const uint8_t source2
[]={0x0e,0x24,0x053};
3694 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-JP-1]");
3696 TestSmallTargetBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3697 TestSmallSourceBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3704 TestISO_2022_JP_2() {
3706 static const uint16_t in
[]={
3707 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3708 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3709 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3710 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3711 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3712 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3713 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3714 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3715 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3716 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3717 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3718 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3719 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3720 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3721 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3722 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3723 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3724 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3725 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3727 const UChar
* uSource
;
3728 const UChar
* uSourceLimit
;
3729 const char* cSource
;
3730 const char* cSourceLimit
;
3731 UChar
*uTargetLimit
=NULL
;
3734 const char *cTargetLimit
;
3737 int32_t uBufSize
= 120;
3738 UErrorCode errorCode
=U_ZERO_ERROR
;
3740 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3741 int32_t* myOff
= offsets
;
3742 cnv
=ucnv_open("ISO_2022_JP_2", &errorCode
);
3743 if(U_FAILURE(errorCode
)) {
3744 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
3748 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3749 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3750 uSource
= (const UChar
*)&in
[0];
3751 uSourceLimit
=(const UChar
*)&in
[sizeof(in
)/2];
3753 cTargetLimit
= cBuf
+uBufSize
*5;
3755 uTargetLimit
= uBuf
+ uBufSize
*5;
3756 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3757 if(U_FAILURE(errorCode
)){
3758 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3762 cSourceLimit
=cTarget
;
3765 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3766 if(U_FAILURE(errorCode
)){
3767 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3770 uSource
= (const UChar
*)&in
[0];
3771 while(uSource
<uSourceLimit
){
3772 if(*test
!=*uSource
){
3774 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3779 TestSmallTargetBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3780 TestSmallSourceBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3781 TestToAndFromUChars(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3782 /*Test for the condition where there is an invalid character*/
3785 static const uint8_t source2
[]={0x0e,0x24,0x053};
3786 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-JP-2]");
3797 static const uint16_t in
[]={
3798 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F66,0x9F67,0x9F6A,0x000A,0x000D
3799 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC02,0xAC04
3800 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3801 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3802 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53DF,0x53E1,0x53E2
3803 ,0x53E3,0x53E4,0x000A,0x000D};
3804 const UChar
* uSource
;
3805 const UChar
* uSourceLimit
;
3806 const char* cSource
;
3807 const char* cSourceLimit
;
3808 UChar
*uTargetLimit
=NULL
;
3811 const char *cTargetLimit
;
3814 int32_t uBufSize
= 120;
3815 UErrorCode errorCode
=U_ZERO_ERROR
;
3817 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3818 int32_t* myOff
= offsets
;
3819 cnv
=ucnv_open("ISO_2022,locale=kr", &errorCode
);
3820 if(U_FAILURE(errorCode
)) {
3821 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
3825 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3826 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3827 uSource
= (const UChar
*)&in
[0];
3828 uSourceLimit
=(const UChar
*)&in
[sizeof(in
)/2];
3830 cTargetLimit
= cBuf
+uBufSize
*5;
3832 uTargetLimit
= uBuf
+ uBufSize
*5;
3833 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3834 if(U_FAILURE(errorCode
)){
3835 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3839 cSourceLimit
=cTarget
;
3842 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3843 if(U_FAILURE(errorCode
)){
3844 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3847 uSource
= (const UChar
*)&in
[0];
3848 while(uSource
<uSourceLimit
){
3849 if(*test
!=*uSource
){
3850 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,*test
) ;
3855 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-KR encoding");
3856 TestSmallTargetBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3857 TestSmallSourceBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3858 TestToAndFromUChars(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3859 TestJitterbug930("csISO2022KR");
3860 /*Test for the condition where there is an invalid character*/
3863 static const uint8_t source2
[]={0x1b,0x24,0x053};
3864 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
3865 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ILLEGAL_ESCAPE_SEQUENCE
, "an invalid character [ISO-2022-KR]");
3874 TestISO_2022_KR_1() {
3876 static const uint16_t in
[]={
3877 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
3878 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
3879 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3880 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3881 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
3882 ,0x53E3,0x53E4,0x000A,0x000D};
3883 const UChar
* uSource
;
3884 const UChar
* uSourceLimit
;
3885 const char* cSource
;
3886 const char* cSourceLimit
;
3887 UChar
*uTargetLimit
=NULL
;
3890 const char *cTargetLimit
;
3893 int32_t uBufSize
= 120;
3894 UErrorCode errorCode
=U_ZERO_ERROR
;
3896 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3897 int32_t* myOff
= offsets
;
3898 cnv
=ucnv_open("ibm-25546", &errorCode
);
3899 if(U_FAILURE(errorCode
)) {
3900 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
3904 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3905 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3906 uSource
= (const UChar
*)&in
[0];
3907 uSourceLimit
=(const UChar
*)&in
[sizeof(in
)/2];
3909 cTargetLimit
= cBuf
+uBufSize
*5;
3911 uTargetLimit
= uBuf
+ uBufSize
*5;
3912 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3913 if(U_FAILURE(errorCode
)){
3914 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3918 cSourceLimit
=cTarget
;
3921 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3922 if(U_FAILURE(errorCode
)){
3923 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3926 uSource
= (const UChar
*)&in
[0];
3927 while(uSource
<uSourceLimit
){
3928 if(*test
!=*uSource
){
3929 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,*test
) ;
3935 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-KR encoding");
3936 TestSmallTargetBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3937 TestSmallSourceBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3939 TestToAndFromUChars(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
3940 /*Test for the condition where there is an invalid character*/
3943 static const uint8_t source2
[]={0x1b,0x24,0x053};
3944 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
3945 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ILLEGAL_ESCAPE_SEQUENCE
, "an invalid character [ISO-2022-KR]");
3953 static void TestJitterbug2411(){
3954 const char* source
= "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
3955 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
3956 UConverter
* kr
=NULL
, *kr1
=NULL
;
3957 UErrorCode errorCode
= U_ZERO_ERROR
;
3958 UChar tgt
[100]={'\0'};
3959 UChar
* target
= tgt
;
3960 UChar
* targetLimit
= target
+100;
3961 kr
=ucnv_open("iso-2022-kr", &errorCode
);
3962 if(U_FAILURE(errorCode
)) {
3963 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode
));
3966 ucnv_toUnicode(kr
,&target
,targetLimit
,&source
,source
+uprv_strlen(source
),NULL
,TRUE
,&errorCode
);
3967 if(U_FAILURE(errorCode
)) {
3968 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode
));
3971 kr1
= ucnv_open("ibm-25546", &errorCode
);
3972 if(U_FAILURE(errorCode
)) {
3973 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode
));
3977 targetLimit
= target
+100;
3978 ucnv_toUnicode(kr
,&target
,targetLimit
,&source
,source
+uprv_strlen(source
),NULL
,TRUE
,&errorCode
);
3980 if(U_FAILURE(errorCode
)) {
3981 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode
));
3992 /* From Unicode moved to testdata/conversion.txt */
3995 const uint8_t sampleTextJIS
[] = {
3996 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
3997 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
3998 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4000 const uint16_t expectedISO2022JIS
[] = {
4005 int32_t toISO2022JISOffs
[]={
4011 const uint8_t sampleTextJIS7
[] = {
4012 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4013 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4014 0x1b,0x24,0x42,0x21,0x21,
4015 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */
4017 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4019 const uint16_t expectedISO2022JIS7
[] = {
4027 int32_t toISO2022JIS7Offs
[]={
4034 const uint8_t sampleTextJIS8
[] = {
4035 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4036 0xa1,0xc8,0xd9,/*Katakana Set*/
4039 0xb1,0xc3, /*Katakana Set*/
4040 0x1b,0x24,0x42,0x21,0x21
4042 const uint16_t expectedISO2022JIS8
[] = {
4044 0xff61, 0xff88, 0xff99,
4049 int32_t toISO2022JIS8Offs
[]={
4055 testConvertToU(sampleTextJIS
,sizeof(sampleTextJIS
),expectedISO2022JIS
,
4056 sizeof(expectedISO2022JIS
)/sizeof(expectedISO2022JIS
[0]),"JIS", toISO2022JISOffs
,TRUE
);
4057 testConvertToU(sampleTextJIS7
,sizeof(sampleTextJIS7
),expectedISO2022JIS7
,
4058 sizeof(expectedISO2022JIS7
)/sizeof(expectedISO2022JIS7
[0]),"JIS7", toISO2022JIS7Offs
,TRUE
);
4059 testConvertToU(sampleTextJIS8
,sizeof(sampleTextJIS8
),expectedISO2022JIS8
,
4060 sizeof(expectedISO2022JIS8
)/sizeof(expectedISO2022JIS8
[0]),"JIS8", toISO2022JIS8Offs
,TRUE
);
4065 static void TestJitterbug915(){
4066 /* tests for roundtripping of the below sequence
4067 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * /
4068 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4069 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4070 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4071 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4072 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4073 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4075 static char cSource
[]={
4076 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4077 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4078 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4079 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4080 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4081 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4082 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4083 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4084 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4085 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4086 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4087 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4088 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4089 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4090 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4091 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4092 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4093 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4094 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4095 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4096 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4097 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4098 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4099 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4100 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4101 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4102 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4103 0x37, 0x20, 0x2A, 0x2F
4105 UChar uTarget
[500]={'\0'};
4106 UChar
* utarget
=uTarget
;
4107 UChar
* utargetLimit
=uTarget
+sizeof(uTarget
)/2;
4109 char cTarget
[500]={'\0'};
4110 char* ctarget
=cTarget
;
4111 char* ctargetLimit
=cTarget
+sizeof(cTarget
);
4112 const char* csource
=cSource
;
4113 char* tempSrc
= cSource
;
4114 UErrorCode err
=U_ZERO_ERROR
;
4116 UConverter
* conv
=ucnv_open("ISO_2022_CN_EXT",&err
);
4117 if(U_FAILURE(err
)) {
4118 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err
));
4121 ucnv_toUnicode(conv
,&utarget
,utargetLimit
,&csource
,csource
+sizeof(cSource
),NULL
,TRUE
,&err
);
4122 if(U_FAILURE(err
)) {
4123 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err
));
4126 utargetLimit
=utarget
;
4128 ucnv_fromUnicode(conv
,&ctarget
,ctargetLimit
,(const UChar
**)&utarget
,utargetLimit
,NULL
,TRUE
,&err
);
4129 if(U_FAILURE(err
)) {
4130 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err
));
4133 ctargetLimit
=ctarget
;
4135 while(ctarget
<ctargetLimit
){
4136 if(*ctarget
!= *tempSrc
){
4137 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget
-cTarget
), *ctarget
,(int)*tempSrc
) ;
4147 TestISO_2022_CN_EXT() {
4149 static const uint16_t in
[]={
4150 /* test Non-BMP code points */
4151 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4152 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4153 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4154 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4155 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4156 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4157 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4158 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4159 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4162 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4163 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4164 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4165 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4166 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4167 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4168 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4169 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4170 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4171 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4172 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4173 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4174 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4175 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4176 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4177 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4178 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4179 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4181 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4185 const UChar
* uSource
;
4186 const UChar
* uSourceLimit
;
4187 const char* cSource
;
4188 const char* cSourceLimit
;
4189 UChar
*uTargetLimit
=NULL
;
4192 const char *cTargetLimit
;
4195 int32_t uBufSize
= 180;
4196 UErrorCode errorCode
=U_ZERO_ERROR
;
4198 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4199 int32_t* myOff
= offsets
;
4200 cnv
=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode
);
4201 if(U_FAILURE(errorCode
)) {
4202 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4206 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4207 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
4208 uSource
= (const UChar
*)&in
[0];
4209 uSourceLimit
=(const UChar
*)&in
[sizeof(in
)/2];
4211 cTargetLimit
= cBuf
+uBufSize
*5;
4213 uTargetLimit
= uBuf
+ uBufSize
*5;
4214 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4215 if(U_FAILURE(errorCode
)){
4216 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4220 cSourceLimit
=cTarget
;
4223 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4224 if(U_FAILURE(errorCode
)){
4225 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4228 uSource
= (const UChar
*)&in
[0];
4229 while(uSource
<uSourceLimit
){
4230 if(*test
!=*uSource
){
4231 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
4234 log_verbose(" Got: \\u%04X\n",(int)*test
) ;
4239 TestSmallTargetBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
4240 TestSmallSourceBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
4241 /*Test for the condition where there is an invalid character*/
4244 static const uint8_t source2
[]={0x0e,0x24,0x053};
4245 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-CN-EXT]");
4256 static const uint16_t in
[]={
4258 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4259 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4260 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4261 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4262 0x0020, 0x0045, 0x004e, 0x0044,
4264 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4265 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4266 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4267 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4268 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4269 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4270 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4271 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4272 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4273 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4274 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4275 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4276 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4277 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4278 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4279 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4280 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4283 const UChar
* uSource
;
4284 const UChar
* uSourceLimit
;
4285 const char* cSource
;
4286 const char* cSourceLimit
;
4287 UChar
*uTargetLimit
=NULL
;
4290 const char *cTargetLimit
;
4293 int32_t uBufSize
= 180;
4294 UErrorCode errorCode
=U_ZERO_ERROR
;
4296 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4297 int32_t* myOff
= offsets
;
4298 cnv
=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode
);
4299 if(U_FAILURE(errorCode
)) {
4300 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4304 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4305 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
4306 uSource
= (const UChar
*)&in
[0];
4307 uSourceLimit
=(const UChar
*)&in
[sizeof(in
)/2];
4309 cTargetLimit
= cBuf
+uBufSize
*5;
4311 uTargetLimit
= uBuf
+ uBufSize
*5;
4312 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4313 if(U_FAILURE(errorCode
)){
4314 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4318 cSourceLimit
=cTarget
;
4321 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4322 if(U_FAILURE(errorCode
)){
4323 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4326 uSource
= (const UChar
*)&in
[0];
4327 while(uSource
<uSourceLimit
){
4328 if(*test
!=*uSource
){
4329 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
4332 log_verbose(" Got: \\u%04X\n",(int)*test
) ;
4337 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-CN encoding");
4338 TestSmallTargetBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
4339 TestSmallSourceBuffer(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
4340 TestToAndFromUChars(&in
[0],(const UChar
*)&in
[sizeof(in
)/2],cnv
);
4341 TestJitterbug930("csISO2022CN");
4342 /*Test for the condition where there is an invalid character*/
4345 static const uint8_t source2
[]={0x0e,0x24,0x053};
4346 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-CN]");
4356 TestEBCDIC_STATEFUL() {
4358 static const uint8_t in
[]={
4367 /* expected test results */
4368 static const int32_t results
[]={
4369 /* number of bytes read, code point */
4378 static const uint8_t in2
[]={
4384 /* expected test results */
4385 static const int32_t results2
[]={
4386 /* number of bytes read, code point */
4391 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
4392 UErrorCode errorCode
=U_ZERO_ERROR
;
4393 UConverter
*cnv
=ucnv_open("ibm-930", &errorCode
);
4394 if(U_FAILURE(errorCode
)) {
4395 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode
));
4398 TestNextUChar(cnv
, source
, limit
, results
, "EBCDIC_STATEFUL(ibm-930)");
4400 /* Test the condition when source >= sourceLimit */
4401 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
4403 /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4405 static const uint8_t source1
[]={0x0f};
4406 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_INDEX_OUTOFBOUNDS_ERROR
, "a character is truncated");
4408 /*Test for the condition where there is an invalid character*/
4411 static const uint8_t source2
[]={0x0e, 0x7F, 0xFF};
4412 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [EBCDIC STATEFUL]");
4415 source
=(const char*)in2
;
4416 limit
=(const char*)in2
+sizeof(in2
);
4417 TestNextUChar(cnv
,source
,limit
,results2
,"EBCDIC_STATEFUL(ibm-930),seq#2");
4425 static const uint8_t in
[]={
4428 0x81, 0x30, 0x81, 0x30,
4432 0x82, 0x35, 0x8f, 0x33,
4433 0x84, 0x31, 0xa4, 0x39,
4434 0x90, 0x30, 0x81, 0x30,
4435 0xe3, 0x32, 0x9a, 0x35
4438 * Feature removed markus 2000-oct-26
4439 * Only some codepages must match surrogate pairs into supplementary code points -
4440 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4441 * GB 18030 provides direct encodings for supplementary code points, therefore
4442 * it must not combine two single-encoded surrogates into one code point.
4444 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4448 /* expected test results */
4449 static const int32_t results
[]={
4450 /* number of bytes read, code point */
4462 /* Feature removed. See comment above. */
4467 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4468 UErrorCode errorCode
=U_ZERO_ERROR
;
4469 UConverter
*cnv
=ucnv_open("gb18030", &errorCode
);
4470 if(U_FAILURE(errorCode
)) {
4471 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode
));
4474 TestNextUChar(cnv
, (const char *)in
, (const char *)in
+sizeof(in
), results
, "gb18030");
4480 /* LMBCS-1 string */
4481 static const uint8_t pszLMBCS
[]={
4490 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4494 /* Unicode UChar32 equivalents */
4495 static const UChar32 pszUnicode32
[]={
4505 0x00023456, /* code point for surrogate pair */
4509 /* Unicode UChar equivalents */
4510 static const UChar pszUnicode
[]={
4520 0xD84D, /* low surrogate */
4521 0xDC56, /* high surrogate */
4525 /* expected test results */
4526 static const int offsets32
[]={
4527 /* number of bytes read, code point */
4541 /* expected test results */
4542 static const int offsets
[]={
4543 /* number of bytes read, code point */
4561 #define NAME_LMBCS_1 "LMBCS-1"
4562 #define NAME_LMBCS_2 "LMBCS-2"
4565 /* Some basic open/close/property tests on some LMBCS converters */
4568 char expected_subchars
[] = {0x3F}; /* ANSI Question Mark */
4569 char new_subchars
[] = {0x7F}; /* subst char used by SmartSuite..*/
4570 char get_subchars
[1];
4571 const char * get_name
;
4575 int8_t len
= sizeof(get_subchars
);
4577 UErrorCode errorCode
=U_ZERO_ERROR
;
4580 cnv1
=ucnv_open(NAME_LMBCS_1
, &errorCode
);
4581 if(U_FAILURE(errorCode
)) {
4582 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
4585 cnv2
=ucnv_open(NAME_LMBCS_2
, &errorCode
);
4586 if(U_FAILURE(errorCode
)) {
4587 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode
));
4592 get_name
= ucnv_getName (cnv1
, &errorCode
);
4593 if (strcmp(NAME_LMBCS_1
,get_name
)){
4594 log_err("Unexpected converter name: %s\n", get_name
);
4596 get_name
= ucnv_getName (cnv2
, &errorCode
);
4597 if (strcmp(NAME_LMBCS_2
,get_name
)){
4598 log_err("Unexpected converter name: %s\n", get_name
);
4601 /* substitution chars */
4602 ucnv_getSubstChars (cnv1
, get_subchars
, &len
, &errorCode
);
4603 if(U_FAILURE(errorCode
)) {
4604 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode
));
4607 log_err("Unexpected length of sub chars\n");
4609 if (get_subchars
[0] != expected_subchars
[0]){
4610 log_err("Unexpected value of sub chars\n");
4612 ucnv_setSubstChars (cnv2
,new_subchars
, len
, &errorCode
);
4613 if(U_FAILURE(errorCode
)) {
4614 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode
));
4616 ucnv_getSubstChars (cnv2
, get_subchars
, &len
, &errorCode
);
4617 if(U_FAILURE(errorCode
)) {
4618 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode
));
4621 log_err("Unexpected length of sub chars\n");
4623 if (get_subchars
[0] != new_subchars
[0]){
4624 log_err("Unexpected value of sub chars\n");
4631 /* LMBCS to Unicode - offsets */
4633 UErrorCode errorCode
=U_ZERO_ERROR
;
4635 const uint8_t * pSource
= pszLMBCS
;
4636 const uint8_t * sourceLimit
= pszLMBCS
+ sizeof(pszLMBCS
);
4638 UChar Out
[sizeof(pszUnicode
) + 1];
4640 UChar
* OutLimit
= Out
+ sizeof(pszUnicode
)/sizeof(UChar
);
4642 int32_t off
[sizeof(offsets
)];
4644 /* last 'offset' in expected results is just the final size.
4645 (Makes other tests easier). Compensate here: */
4647 off
[(sizeof(offsets
)/sizeof(offsets
[0]))-1] = sizeof(pszLMBCS
);
4651 cnv
=ucnv_open("lmbcs", &errorCode
); /* use generic name for LMBCS-1 */
4652 if(U_FAILURE(errorCode
)) {
4653 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode
));
4659 ucnv_toUnicode (cnv
,
4662 (const char **)&pSource
,
4663 (const char *)sourceLimit
,
4669 if (memcmp(off
,offsets
,sizeof(offsets
)))
4671 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4673 if (memcmp(Out
,pszUnicode
,sizeof(pszUnicode
)))
4675 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4680 /* LMBCS to Unicode - getNextUChar */
4681 const char * sourceStart
;
4682 const char *source
=(const char *)pszLMBCS
;
4683 const char *limit
=(const char *)pszLMBCS
+sizeof(pszLMBCS
);
4684 const UChar32
*results
= pszUnicode32
;
4685 const int *off
= offsets32
;
4687 UErrorCode errorCode
=U_ZERO_ERROR
;
4690 cnv
=ucnv_open("LMBCS-1", &errorCode
);
4691 if(U_FAILURE(errorCode
)) {
4692 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
4698 while(source
<limit
) {
4700 uniChar
=ucnv_getNextUChar(cnv
, &source
, source
+ (off
[1] - off
[0]), &errorCode
);
4701 if(U_FAILURE(errorCode
)) {
4702 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode
));
4704 } else if(source
-sourceStart
!= off
[1] - off
[0] || uniChar
!= *results
) {
4705 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4706 uniChar
, (source
-sourceStart
), *results
, *off
);
4715 { /* test locale & optimization group operations: Unicode to LMBCS */
4717 UErrorCode errorCode
=U_ZERO_ERROR
;
4718 UConverter
*cnv16he
= ucnv_open("LMBCS-16,locale=he", &errorCode
);
4719 UConverter
*cnv16jp
= ucnv_open("LMBCS-16,locale=ja_JP", &errorCode
);
4720 UConverter
*cnv01us
= ucnv_open("LMBCS-1,locale=us_EN", &errorCode
);
4721 UChar uniString
[] = {0x0192}; /* Latin Small letter f with hook */
4722 const UChar
* pUniOut
= uniString
;
4723 UChar
* pUniIn
= uniString
;
4724 uint8_t lmbcsString
[4];
4725 const uint8_t * pLMBCSOut
= lmbcsString
;
4726 uint8_t * pLMBCSIn
= lmbcsString
;
4728 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
4729 ucnv_fromUnicode (cnv16he
,
4730 (char **)&pLMBCSIn
, (const char *)(pLMBCSIn
+ sizeof(lmbcsString
)/sizeof(lmbcsString
[0])),
4731 &pUniOut
, pUniOut
+ sizeof(uniString
)/sizeof(uniString
[0]),
4732 NULL
, 1, &errorCode
);
4734 if (lmbcsString
[0] != 0x3 || lmbcsString
[1] != 0x83)
4736 log_err("LMBCS-16,locale=he gives unexpected translation\n");
4739 pLMBCSIn
=lmbcsString
;
4740 pUniOut
= uniString
;
4741 ucnv_fromUnicode (cnv01us
,
4742 (char **)&pLMBCSIn
, (const char *)(lmbcsString
+ sizeof(lmbcsString
)/sizeof(lmbcsString
[0])),
4743 &pUniOut
, pUniOut
+ sizeof(uniString
)/sizeof(uniString
[0]),
4744 NULL
, 1, &errorCode
);
4746 if (lmbcsString
[0] != 0x9F)
4748 log_err("LMBCS-1,locale=US gives unexpected translation\n");
4751 /* single byte char from mbcs char set */
4752 lmbcsString
[0] = 0xAE; /* 1/2 width katakana letter small Yo */
4753 pLMBCSOut
= lmbcsString
;
4755 ucnv_toUnicode (cnv16jp
,
4756 &pUniIn
, pUniIn
+ 1,
4757 (const char **)&pLMBCSOut
, (const char *)(pLMBCSOut
+ 1),
4758 NULL
, 1, &errorCode
);
4759 if (U_FAILURE(errorCode
) || pLMBCSOut
!= lmbcsString
+1 || pUniIn
!= uniString
+1 || uniString
[0] != 0xFF6E)
4761 log_err("Unexpected results from LMBCS-16 single byte char\n");
4763 /* convert to group 1: should be 3 bytes */
4764 pLMBCSIn
= lmbcsString
;
4765 pUniOut
= uniString
;
4766 ucnv_fromUnicode (cnv01us
,
4767 (char **)&pLMBCSIn
, (const char *)(pLMBCSIn
+ 3),
4768 &pUniOut
, pUniOut
+ 1,
4769 NULL
, 1, &errorCode
);
4770 if (U_FAILURE(errorCode
) || pLMBCSIn
!= lmbcsString
+3 || pUniOut
!= uniString
+1
4771 || lmbcsString
[0] != 0x10 || lmbcsString
[1] != 0x10 || lmbcsString
[2] != 0xAE)
4773 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
4775 pLMBCSOut
= lmbcsString
;
4777 ucnv_toUnicode (cnv01us
,
4778 &pUniIn
, pUniIn
+ 1,
4779 (const char **)&pLMBCSOut
, (const char *)(pLMBCSOut
+ 3),
4780 NULL
, 1, &errorCode
);
4781 if (U_FAILURE(errorCode
) || pLMBCSOut
!= lmbcsString
+3 || pUniIn
!= uniString
+1 || uniString
[0] != 0xFF6E)
4783 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
4785 pLMBCSIn
= lmbcsString
;
4786 pUniOut
= uniString
;
4787 ucnv_fromUnicode (cnv16jp
,
4788 (char **)&pLMBCSIn
, (const char *)(pLMBCSIn
+ 1),
4789 &pUniOut
, pUniOut
+ 1,
4790 NULL
, 1, &errorCode
);
4791 if (U_FAILURE(errorCode
) || pLMBCSIn
!= lmbcsString
+1 || pUniOut
!= uniString
+1 || lmbcsString
[0] != 0xAE)
4793 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
4795 ucnv_close(cnv16he
);
4796 ucnv_close(cnv16jp
);
4797 ucnv_close(cnv01us
);
4800 /* Small source buffer testing, LMBCS -> Unicode */
4802 UErrorCode errorCode
=U_ZERO_ERROR
;
4804 const uint8_t * pSource
= pszLMBCS
;
4805 const uint8_t * sourceLimit
= pszLMBCS
+ sizeof(pszLMBCS
);
4806 int codepointCount
= 0;
4808 UChar Out
[sizeof(pszUnicode
) + 1];
4810 UChar
* OutLimit
= Out
+ sizeof(pszUnicode
)/sizeof(UChar
);
4813 cnv
= ucnv_open(NAME_LMBCS_1
, &errorCode
);
4814 if(U_FAILURE(errorCode
)) {
4815 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
4820 while ((pSource
< sourceLimit
) && U_SUCCESS (errorCode
))
4822 ucnv_toUnicode (cnv
,
4825 (const char **)&pSource
,
4826 (const char *)(pSource
+1), /* claim that this is a 1- byte buffer */
4828 FALSE
, /* FALSE means there might be more chars in the next buffer */
4831 if (U_SUCCESS (errorCode
))
4833 if ((pSource
- (const uint8_t *)pszLMBCS
) == offsets
[codepointCount
+1])
4835 /* we are on to the next code point: check value */
4837 if (Out
[0] != pszUnicode
[codepointCount
]){
4838 log_err("LMBCS->Uni result %lx should have been %lx \n",
4839 Out
[0], pszUnicode
[codepointCount
]);
4842 pOut
= Out
; /* reset for accumulating next code point */
4848 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode
));
4852 /* limits & surrogate error testing */
4853 uint8_t LIn
[sizeof(pszLMBCS
)];
4854 const uint8_t * pLIn
= LIn
;
4856 char LOut
[sizeof(pszLMBCS
)];
4857 char * pLOut
= LOut
;
4859 UChar UOut
[sizeof(pszUnicode
)];
4860 UChar
* pUOut
= UOut
;
4862 UChar UIn
[sizeof(pszUnicode
)];
4863 const UChar
* pUIn
= UIn
;
4865 int32_t off
[sizeof(offsets
)];
4868 errorCode
=U_ZERO_ERROR
;
4870 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
4871 ucnv_fromUnicode(cnv
, &pLOut
,pLOut
+1,&pUIn
,pUIn
-1,off
,FALSE
, &errorCode
);
4872 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
4874 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode
));
4876 errorCode
=U_ZERO_ERROR
;
4877 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+1,(const char **)&pLIn
,(const char *)(pLIn
-1),off
,FALSE
, &errorCode
);
4878 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
4880 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode
));
4882 errorCode
=U_ZERO_ERROR
;
4884 uniChar
= ucnv_getNextUChar(cnv
, (const char **)&pLIn
, (const char *)(pLIn
-1), &errorCode
);
4885 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
4887 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode
));
4889 errorCode
=U_ZERO_ERROR
;
4891 /* 0 byte source request - no error, no pointer movement */
4892 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+1,(const char **)&pLIn
,(const char *)pLIn
,off
,FALSE
, &errorCode
);
4893 ucnv_fromUnicode(cnv
, &pLOut
,pLOut
+1,&pUIn
,pUIn
,off
,FALSE
, &errorCode
);
4894 if(U_FAILURE(errorCode
)) {
4895 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode
));
4897 if ((pUOut
!= UOut
) || (pUIn
!= UIn
) || (pLOut
!= LOut
) || (pLIn
!= LIn
))
4899 log_err("Unexpected pointer move in 0 byte source request \n");
4901 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
4902 uniChar
= ucnv_getNextUChar(cnv
, (const char **)&pLIn
, (const char *)pLIn
, &errorCode
);
4903 if (errorCode
!= U_INDEX_OUTOFBOUNDS_ERROR
)
4905 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode
));
4907 if (((uint32_t)uniChar
- 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
4909 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
4911 errorCode
= U_ZERO_ERROR
;
4913 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
4916 ucnv_fromUnicode(cnv
, &pLOut
,pLOut
+offsets
[4],&pUIn
,pUIn
+sizeof(pszUnicode
)/sizeof(UChar
),off
,FALSE
, &errorCode
);
4917 if (errorCode
!= U_BUFFER_OVERFLOW_ERROR
|| pLOut
!= LOut
+ offsets
[4] || pUIn
!= pszUnicode
+4 )
4919 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
4922 errorCode
= U_ZERO_ERROR
;
4925 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+4,(const char **)&pLIn
,(const char *)(pLIn
+sizeof(pszLMBCS
)),off
,FALSE
, &errorCode
);
4926 if (errorCode
!= U_BUFFER_OVERFLOW_ERROR
|| pUOut
!= UOut
+ 4 || pLIn
!= (const uint8_t *)pszLMBCS
+offsets
[4])
4928 log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
4931 /* unpaired or chopped LMBCS surrogates */
4933 /* OK high surrogate, Low surrogate is chopped */
4940 errorCode
= U_ZERO_ERROR
;
4943 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
4944 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
4945 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 5)
4947 log_err("Unexpected results on chopped low surrogate\n");
4950 /* chopped at surrogate boundary */
4955 errorCode
= U_ZERO_ERROR
;
4958 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+3),off
,TRUE
, &errorCode
);
4959 if (UOut
[0] != 0xD801 || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 3)
4961 log_err("Unexpected results on chopped at surrogate boundary \n");
4964 /* unpaired surrogate plus valid Unichar */
4972 errorCode
= U_ZERO_ERROR
;
4975 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+6),off
,TRUE
, &errorCode
);
4976 if (UOut
[0] != 0xD801 || UOut
[1] != 0xC9D0 || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 2 || pLIn
!= LIn
+ 6)
4978 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
4981 /* unpaired surrogate plus chopped Unichar */
4989 errorCode
= U_ZERO_ERROR
;
4992 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
4993 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 5)
4995 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
4998 /* unpaired surrogate plus valid non-Unichar */
5006 errorCode
= U_ZERO_ERROR
;
5009 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
5010 if (UOut
[0] != 0xD801 || UOut
[1] != 0x1B || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 2 || pLIn
!= LIn
+ 5)
5012 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5015 /* unpaired surrogate plus chopped non-Unichar */
5022 errorCode
= U_ZERO_ERROR
;
5025 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+4),off
,TRUE
, &errorCode
);
5027 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 4)
5029 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5033 ucnv_close(cnv
); /* final cleanup */
5037 static void TestJitterbug255()
5039 const uint8_t testBytes
[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5040 const uint8_t *testBuffer
= testBytes
;
5041 const uint8_t *testEnd
= testBytes
+ sizeof(testBytes
);
5042 UErrorCode status
= U_ZERO_ERROR
;
5044 UConverter
*cnv
= 0;
5046 cnv
= ucnv_open("shift-jis", &status
);
5047 if (U_FAILURE(status
) || cnv
== 0) {
5048 log_data_err("Failed to open the converter for SJIS.\n");
5051 while (testBuffer
!= testEnd
)
5053 result
= ucnv_getNextUChar (cnv
, (const char **)&testBuffer
, (const char *)testEnd
, &status
);
5054 if (U_FAILURE(status
))
5056 log_err("Failed to convert the next UChar for SJIS.\n");
5063 static void TestEBCDICUS4XML()
5065 UChar unicodes_x
[] = {0x0000, 0x0000, 0x0000, 0x0000};
5066 static const UChar toUnicodeMaps_x
[] = {0x000A, 0x000A, 0x000D, 0x0000};
5067 static const char fromUnicodeMaps_x
[] = {0x25, 0x25, 0x0D, 0x00};
5068 static const char newLines_x
[] = {0x25, 0x15, 0x0D, 0x00};
5069 char target_x
[] = {0x00, 0x00, 0x00, 0x00};
5070 UChar
*unicodes
= unicodes_x
;
5071 const UChar
*toUnicodeMaps
= toUnicodeMaps_x
;
5072 char *target
= target_x
;
5073 const char* fromUnicodeMaps
= fromUnicodeMaps_x
, *newLines
= newLines_x
;
5074 UErrorCode status
= U_ZERO_ERROR
;
5075 UConverter
*cnv
= 0;
5077 cnv
= ucnv_open("ebcdic-xml-us", &status
);
5078 if (U_FAILURE(status
) || cnv
== 0) {
5079 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5082 ucnv_toUnicode(cnv
, &unicodes
, unicodes
+3, (const char**)&newLines
, newLines
+3, NULL
, TRUE
, &status
);
5083 if (U_FAILURE(status
) || memcmp(unicodes_x
, toUnicodeMaps
, sizeof(UChar
)*3) != 0) {
5084 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5085 u_errorName(status
));
5086 printUSeqErr(unicodes_x
, 3);
5087 printUSeqErr(toUnicodeMaps
, 3);
5089 status
= U_ZERO_ERROR
;
5090 ucnv_fromUnicode(cnv
, &target
, target
+3, (const UChar
**)&toUnicodeMaps
, toUnicodeMaps
+3, NULL
, TRUE
, &status
);
5091 if (U_FAILURE(status
) || memcmp(target_x
, fromUnicodeMaps
, sizeof(char)*3) != 0) {
5092 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5093 u_errorName(status
));
5094 printSeqErr((const unsigned char*)target_x
, 3);
5095 printSeqErr((const unsigned char*)fromUnicodeMaps
, 3);
5100 #if !UCONFIG_NO_COLLATION
5102 static void TestJitterbug981(){
5104 int32_t rules_length
, target_cap
, bytes_needed
, buff_size
;
5105 UErrorCode status
= U_ZERO_ERROR
;
5106 UConverter
*utf8cnv
;
5107 UCollator
* myCollator
;
5110 utf8cnv
= ucnv_open ("utf8", &status
);
5111 if(U_FAILURE(status
)){
5112 log_err("Could not open UTF-8 converter. Error: %s", u_errorName(status
));
5115 myCollator
= ucol_open("zh", &status
);
5116 if(U_FAILURE(status
)){
5117 log_err("Could not open collator for zh locale. Error: %s", u_errorName(status
));
5121 rules
= ucol_getRules(myCollator
, &rules_length
);
5122 buff_size
= rules_length
* ucnv_getMaxCharSize(utf8cnv
);
5123 buff
= malloc(buff_size
);
5127 ucnv_reset(utf8cnv
);
5128 status
= U_ZERO_ERROR
;
5129 if(target_cap
>= buff_size
) {
5130 log_err("wanted %d bytes, only %d available\n", target_cap
, buff_size
);
5133 bytes_needed
= ucnv_fromUChars(utf8cnv
, buff
, target_cap
,
5134 rules
, rules_length
, &status
);
5135 target_cap
= (bytes_needed
> target_cap
) ? bytes_needed
: target_cap
+1;
5136 if(numNeeded
!=0 && numNeeded
!= bytes_needed
){
5137 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5139 numNeeded
= bytes_needed
;
5140 } while (status
== U_BUFFER_OVERFLOW_ERROR
);
5141 ucol_close(myCollator
);
5142 ucnv_close(utf8cnv
);
5148 static void TestJitterbug1293(){
5149 UChar src
[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5151 UErrorCode status
= U_ZERO_ERROR
;
5152 UConverter
* conv
=NULL
;
5153 int32_t target_cap
, bytes_needed
, numNeeded
= 0;
5154 conv
= ucnv_open("shift-jis",&status
);
5155 if(U_FAILURE(status
)){
5156 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status
));
5162 bytes_needed
= ucnv_fromUChars(conv
,target
,256,src
,u_strlen(src
),&status
);
5163 target_cap
= (bytes_needed
> target_cap
) ? bytes_needed
: target_cap
+1;
5164 if(numNeeded
!=0 && numNeeded
!= bytes_needed
){
5165 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5167 numNeeded
= bytes_needed
;
5168 } while (status
== U_BUFFER_OVERFLOW_ERROR
);
5169 if(U_FAILURE(status
)){
5170 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status
));