1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /*******************************************************************************
12 * Modification History:
14 * Steven R. Loomis 7/8/1999 Adding input buffer test
15 ********************************************************************************
19 #include "unicode/uloc.h"
20 #include "unicode/ucnv.h"
21 #include "unicode/ucnv_err.h"
22 #include "unicode/ucnv_cb.h"
24 #include "unicode/utypes.h"
25 #include "unicode/ustring.h"
26 #include "unicode/ucol.h"
27 #include "unicode/utf16.h"
31 static void TestNextUChar(UConverter
* cnv
, const char* source
, const char* limit
, const int32_t results
[], const char* message
);
32 static void TestNextUCharError(UConverter
* cnv
, const char* source
, const char* limit
, UErrorCode expected
, const char* message
);
33 #if !UCONFIG_NO_COLLATION
34 static void TestJitterbug981(void);
36 #if !UCONFIG_NO_LEGACY_CONVERSION
37 static void TestJitterbug1293(void);
39 static void TestNewConvertWithBufferSizes(int32_t osize
, int32_t isize
) ;
40 static void TestConverterTypesAndStarters(void);
41 static void TestAmbiguous(void);
42 static void TestSignatureDetection(void);
43 static void TestUTF7(void);
44 static void TestIMAP(void);
45 static void TestUTF8(void);
46 static void TestCESU8(void);
47 static void TestUTF16(void);
48 static void TestUTF16BE(void);
49 static void TestUTF16LE(void);
50 static void TestUTF32(void);
51 static void TestUTF32BE(void);
52 static void TestUTF32LE(void);
53 static void TestLATIN1(void);
55 #if !UCONFIG_NO_LEGACY_CONVERSION
56 static void TestSBCS(void);
57 static void TestDBCS(void);
58 static void TestMBCS(void);
59 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
60 static void TestICCRunout(void);
63 #ifdef U_ENABLE_GENERIC_ISO_2022
64 static void TestISO_2022(void);
67 static void TestISO_2022_JP(void);
68 static void TestISO_2022_JP_1(void);
69 static void TestISO_2022_JP_2(void);
70 static void TestISO_2022_KR(void);
71 static void TestISO_2022_KR_1(void);
72 static void TestISO_2022_CN(void);
75 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
77 static void TestISO_2022_CN_EXT(void);
79 static void TestJIS(void);
80 static void TestHZ(void);
83 static void TestSCSU(void);
85 #if !UCONFIG_NO_LEGACY_CONVERSION
86 static void TestEBCDIC_STATEFUL(void);
87 static void TestGB18030(void);
88 static void TestLMBCS(void);
89 static void TestJitterbug255(void);
90 static void TestEBCDICUS4XML(void);
93 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
95 static void TestJitterbug915(void);
97 static void TestISCII(void);
99 static void TestCoverageMBCS(void);
100 static void TestJitterbug2346(void);
101 static void TestJitterbug2411(void);
102 static void TestJB5275(void);
103 static void TestJB5275_1(void);
104 static void TestJitterbug6175(void);
106 static void TestIsFixedWidth(void);
109 static void TestInBufSizes(void);
111 static void TestRoundTrippingAllUTF(void);
112 static void TestConv(const uint16_t in
[],
119 /* open a converter, using test data if it begins with '@' */
120 static UConverter
*my_ucnv_open(const char *cnv
, UErrorCode
*err
);
123 #define NEW_MAX_BUFFER 999
125 static int32_t gInBufferSize
= NEW_MAX_BUFFER
;
126 static int32_t gOutBufferSize
= NEW_MAX_BUFFER
;
127 static char gNuConvTestName
[1024];
129 #define nct_min(x,y) ((x<y) ? x : y)
131 static UConverter
*my_ucnv_open(const char *cnv
, UErrorCode
*err
)
133 if(cnv
&& cnv
[0] == '@') {
134 return ucnv_openPackage(loadTestData(err
), cnv
+1, err
);
136 return ucnv_open(cnv
, err
);
140 static void printSeq(const unsigned char* a
, int len
)
145 log_verbose("0x%02x ", a
[i
++]);
149 static void printUSeq(const UChar
* a
, int len
)
153 while (i
<len
) log_verbose("0x%04x ", a
[i
++]);
157 static void printSeqErr(const unsigned char* a
, int len
)
160 fprintf(stderr
, "{");
162 fprintf(stderr
, "0x%02x ", a
[i
++]);
163 fprintf(stderr
, "}\n");
166 static void printUSeqErr(const UChar
* a
, int len
)
169 fprintf(stderr
, "{U+");
171 fprintf(stderr
, "0x%04x ", a
[i
++]);
172 fprintf(stderr
,"}\n");
176 TestNextUChar(UConverter
* cnv
, const char* source
, const char* limit
, const int32_t results
[], const char* message
)
179 const char* s
=(char*)source
;
180 const int32_t *r
=results
;
181 UErrorCode errorCode
=U_ZERO_ERROR
;
186 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
187 if(errorCode
==U_INDEX_OUTOFBOUNDS_ERROR
) {
188 break; /* no more significant input */
189 } else if(U_FAILURE(errorCode
)) {
190 log_err("%s ucnv_getNextUChar() failed: %s\n", message
, u_errorName(errorCode
));
193 /* test the expected number of input bytes only if >=0 */
194 (*r
>=0 && (int32_t)(s
-s0
)!=*r
) ||
197 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
198 message
, c
, (s
-s0
), *(r
+1), *r
);
206 TestNextUCharError(UConverter
* cnv
, const char* source
, const char* limit
, UErrorCode expected
, const char* message
)
208 const char* s
=(char*)source
;
209 UErrorCode errorCode
=U_ZERO_ERROR
;
211 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
212 if(errorCode
!= expected
){
213 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected
), message
, myErrorName(errorCode
));
215 if(c
!= 0xFFFD && c
!= 0xffff){
216 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message
, c
);
221 static void TestInBufSizes(void)
223 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,1);
225 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,2);
226 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,3);
227 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,4);
228 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,5);
229 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,6);
230 TestNewConvertWithBufferSizes(1,1);
231 TestNewConvertWithBufferSizes(2,3);
232 TestNewConvertWithBufferSizes(3,2);
236 static void TestOutBufSizes(void)
239 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,NEW_MAX_BUFFER
);
240 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER
);
241 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER
);
242 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER
);
243 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER
);
244 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER
);
250 void addTestNewConvert(TestNode
** root
)
252 #if !UCONFIG_NO_FILE_IO
253 addTest(root
, &TestInBufSizes
, "tsconv/nucnvtst/TestInBufSizes");
254 addTest(root
, &TestOutBufSizes
, "tsconv/nucnvtst/TestOutBufSizes");
256 addTest(root
, &TestConverterTypesAndStarters
, "tsconv/nucnvtst/TestConverterTypesAndStarters");
257 addTest(root
, &TestAmbiguous
, "tsconv/nucnvtst/TestAmbiguous");
258 addTest(root
, &TestSignatureDetection
, "tsconv/nucnvtst/TestSignatureDetection");
259 addTest(root
, &TestUTF7
, "tsconv/nucnvtst/TestUTF7");
260 addTest(root
, &TestIMAP
, "tsconv/nucnvtst/TestIMAP");
261 addTest(root
, &TestUTF8
, "tsconv/nucnvtst/TestUTF8");
263 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
264 addTest(root
, &TestCESU8
, "tsconv/nucnvtst/TestCESU8");
265 addTest(root
, &TestUTF16
, "tsconv/nucnvtst/TestUTF16");
266 addTest(root
, &TestUTF16BE
, "tsconv/nucnvtst/TestUTF16BE");
267 addTest(root
, &TestUTF16LE
, "tsconv/nucnvtst/TestUTF16LE");
268 addTest(root
, &TestUTF32
, "tsconv/nucnvtst/TestUTF32");
269 addTest(root
, &TestUTF32BE
, "tsconv/nucnvtst/TestUTF32BE");
270 addTest(root
, &TestUTF32LE
, "tsconv/nucnvtst/TestUTF32LE");
272 #if !UCONFIG_NO_LEGACY_CONVERSION
273 addTest(root
, &TestLMBCS
, "tsconv/nucnvtst/TestLMBCS");
276 addTest(root
, &TestLATIN1
, "tsconv/nucnvtst/TestLATIN1");
278 #if !UCONFIG_NO_LEGACY_CONVERSION
279 addTest(root
, &TestSBCS
, "tsconv/nucnvtst/TestSBCS");
280 #if !UCONFIG_NO_FILE_IO
281 addTest(root
, &TestDBCS
, "tsconv/nucnvtst/TestDBCS");
282 addTest(root
, &TestICCRunout
, "tsconv/nucnvtst/TestICCRunout");
284 addTest(root
, &TestMBCS
, "tsconv/nucnvtst/TestMBCS");
286 #ifdef U_ENABLE_GENERIC_ISO_2022
287 addTest(root
, &TestISO_2022
, "tsconv/nucnvtst/TestISO_2022");
290 addTest(root
, &TestISO_2022_JP
, "tsconv/nucnvtst/TestISO_2022_JP");
291 addTest(root
, &TestJIS
, "tsconv/nucnvtst/TestJIS");
292 addTest(root
, &TestISO_2022_JP_1
, "tsconv/nucnvtst/TestISO_2022_JP_1");
293 addTest(root
, &TestISO_2022_JP_2
, "tsconv/nucnvtst/TestISO_2022_JP_2");
294 addTest(root
, &TestISO_2022_KR
, "tsconv/nucnvtst/TestISO_2022_KR");
295 addTest(root
, &TestISO_2022_KR_1
, "tsconv/nucnvtst/TestISO_2022_KR_1");
296 addTest(root
, &TestISO_2022_CN
, "tsconv/nucnvtst/TestISO_2022_CN");
298 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
299 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
300 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
302 addTest(root
, &TestHZ
, "tsconv/nucnvtst/TestHZ");
305 addTest(root
, &TestSCSU
, "tsconv/nucnvtst/TestSCSU");
307 #if !UCONFIG_NO_LEGACY_CONVERSION
308 addTest(root
, &TestEBCDIC_STATEFUL
, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
309 addTest(root
, &TestGB18030
, "tsconv/nucnvtst/TestGB18030");
310 addTest(root
, &TestJitterbug255
, "tsconv/nucnvtst/TestJitterbug255");
311 addTest(root
, &TestEBCDICUS4XML
, "tsconv/nucnvtst/TestEBCDICUS4XML");
312 addTest(root
, &TestISCII
, "tsconv/nucnvtst/TestISCII");
313 addTest(root
, &TestJB5275
, "tsconv/nucnvtst/TestJB5275");
314 addTest(root
, &TestJB5275_1
, "tsconv/nucnvtst/TestJB5275_1");
315 #if !UCONFIG_NO_COLLATION
316 addTest(root
, &TestJitterbug981
, "tsconv/nucnvtst/TestJitterbug981");
319 addTest(root
, &TestJitterbug1293
, "tsconv/nucnvtst/TestJitterbug1293");
323 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
324 addTest(root
, &TestCoverageMBCS
, "tsconv/nucnvtst/TestCoverageMBCS");
327 addTest(root
, &TestRoundTrippingAllUTF
, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
329 #if !UCONFIG_NO_LEGACY_CONVERSION
330 addTest(root
, &TestJitterbug2346
, "tsconv/nucnvtst/TestJitterbug2346");
331 addTest(root
, &TestJitterbug2411
, "tsconv/nucnvtst/TestJitterbug2411");
332 addTest(root
, &TestJitterbug6175
, "tsconv/nucnvtst/TestJitterbug6175");
334 addTest(root
, &TestIsFixedWidth
, "tsconv/nucnvtst/TestIsFixedWidth");
339 /* Note that this test already makes use of statics, so it's not really
341 This convenience function lets us make the error messages actually useful.
344 static void setNuConvTestName(const char *codepage
, const char *direction
)
346 sprintf(gNuConvTestName
, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
350 (int)gOutBufferSize
);
355 TC_OK
= 0, /* test was OK */
356 TC_MISMATCH
= 1, /* Match failed - err was printed */
357 TC_FAIL
= 2 /* Test failed, don't print an err because it was already printed. */
358 } ETestConvertResult
;
360 /* Note: This function uses global variables and it will not do offset
361 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
362 static ETestConvertResult
testConvertFromU( const UChar
*source
, int sourceLen
, const uint8_t *expect
, int expectLen
,
363 const char *codepage
, const int32_t *expectOffsets
, UBool useFallback
)
365 UErrorCode status
= U_ZERO_ERROR
;
366 UConverter
*conv
= 0;
367 char junkout
[NEW_MAX_BUFFER
]; /* FIX */
368 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
375 int32_t realBufferSize
;
377 const UChar
*realSourceEnd
;
378 const UChar
*sourceLimit
;
379 UBool checkOffsets
= TRUE
;
382 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
383 junkout
[i
] = (char)0xF0;
384 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
387 setNuConvTestName(codepage
, "FROM");
389 log_verbose("\n========= %s\n", gNuConvTestName
);
391 conv
= my_ucnv_open(codepage
, &status
);
393 if(U_FAILURE(status
))
395 log_data_err("Couldn't open converter %s\n",codepage
);
399 ucnv_setFallback(conv
,useFallback
);
402 log_verbose("Converter opened..\n");
408 realBufferSize
= UPRV_LENGTHOF(junkout
);
409 realBufferEnd
= junkout
+ realBufferSize
;
410 realSourceEnd
= source
+ sourceLen
;
412 if ( gOutBufferSize
!= realBufferSize
|| gInBufferSize
!= NEW_MAX_BUFFER
)
413 checkOffsets
= FALSE
;
417 end
= nct_min(targ
+ gOutBufferSize
, realBufferEnd
);
418 sourceLimit
= nct_min(src
+ gInBufferSize
, realSourceEnd
);
420 doFlush
= (UBool
)(sourceLimit
== realSourceEnd
);
422 if(targ
== realBufferEnd
) {
423 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ
, gNuConvTestName
);
426 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src
,sourceLimit
, targ
,end
, doFlush
?"TRUE":"FALSE");
429 status
= U_ZERO_ERROR
;
431 ucnv_fromUnicode (conv
,
436 checkOffsets
? offs
: NULL
,
437 doFlush
, /* flush if we're at the end of the input data */
439 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && sourceLimit
< realSourceEnd
) );
441 if(U_FAILURE(status
)) {
442 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage
, myErrorName(status
), gNuConvTestName
);
446 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
447 sourceLen
, targ
-junkout
);
449 if(getTestOption(VERBOSITY_OPTION
))
452 char offset_str
[9999];
457 for(ptr
= junkout
;ptr
<targ
;ptr
++) {
458 sprintf(junk
+ strlen(junk
), "0x%02x, ", (int)(0xFF & *ptr
));
459 sprintf(offset_str
+ strlen(offset_str
), "0x%02x, ", (int)(0xFF & junokout
[ptr
-junkout
]));
463 printSeq((const uint8_t *)expect
, expectLen
);
464 if ( checkOffsets
) {
465 log_verbose("\nOffsets:");
466 log_verbose(offset_str
);
472 if(expectLen
!= targ
-junkout
) {
473 log_err("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
474 log_verbose("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
475 fprintf(stderr
, "Got:\n");
476 printSeqErr((const unsigned char*)junkout
, (int32_t)(targ
-junkout
));
477 fprintf(stderr
, "Expected:\n");
478 printSeqErr((const unsigned char*)expect
, expectLen
);
482 if (checkOffsets
&& (expectOffsets
!= 0) ) {
483 log_verbose("comparing %d offsets..\n", targ
-junkout
);
484 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t) )){
485 log_err("did not get the expected offsets. %s\n", gNuConvTestName
);
486 printSeqErr((const unsigned char*)junkout
, (int32_t)(targ
-junkout
));
489 for(p
=junkout
;p
<targ
;p
++) {
490 log_err("%d,", junokout
[p
-junkout
]);
493 log_err("Expected: ");
494 for(i
=0; i
<(targ
-junkout
); i
++) {
495 log_err("%d,", expectOffsets
[i
]);
501 log_verbose("comparing..\n");
502 if(!memcmp(junkout
, expect
, expectLen
)) {
503 log_verbose("Matches!\n");
506 log_err("String does not match u->%s\n", gNuConvTestName
);
507 printUSeqErr(source
, sourceLen
);
508 fprintf(stderr
, "Got:\n");
509 printSeqErr((const unsigned char *)junkout
, expectLen
);
510 fprintf(stderr
, "Expected:\n");
511 printSeqErr((const unsigned char *)expect
, expectLen
);
517 /* Note: This function uses global variables and it will not do offset
518 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
519 static ETestConvertResult
testConvertToU( const uint8_t *source
, int sourcelen
, const UChar
*expect
, int expectlen
,
520 const char *codepage
, const int32_t *expectOffsets
, UBool useFallback
)
522 UErrorCode status
= U_ZERO_ERROR
;
523 UConverter
*conv
= 0;
524 UChar junkout
[NEW_MAX_BUFFER
]; /* FIX */
525 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
527 const char *realSourceEnd
;
528 const char *srcLimit
;
534 UBool checkOffsets
= TRUE
;
536 int32_t realBufferSize
;
537 UChar
*realBufferEnd
;
540 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
543 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
546 setNuConvTestName(codepage
, "TO");
548 log_verbose("\n========= %s\n", gNuConvTestName
);
550 conv
= my_ucnv_open(codepage
, &status
);
552 if(U_FAILURE(status
))
554 log_data_err("Couldn't open converter %s\n",gNuConvTestName
);
558 ucnv_setFallback(conv
,useFallback
);
560 log_verbose("Converter opened..\n");
562 src
= (const char *)source
;
566 realBufferSize
= UPRV_LENGTHOF(junkout
);
567 realBufferEnd
= junkout
+ realBufferSize
;
568 realSourceEnd
= src
+ sourcelen
;
570 if ( gOutBufferSize
!= realBufferSize
|| gInBufferSize
!= NEW_MAX_BUFFER
)
571 checkOffsets
= FALSE
;
575 end
= nct_min( targ
+ gOutBufferSize
, realBufferEnd
);
576 srcLimit
= nct_min(realSourceEnd
, src
+ gInBufferSize
);
578 if(targ
== realBufferEnd
)
580 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ
,gNuConvTestName
);
583 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ
,end
);
585 /* oldTarg = targ; */
587 status
= U_ZERO_ERROR
;
589 ucnv_toUnicode (conv
,
594 checkOffsets
? offs
: NULL
,
595 (UBool
)(srcLimit
== realSourceEnd
), /* flush if we're at the end of hte source data */
598 /* offs += (targ-oldTarg); */
600 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (srcLimit
< realSourceEnd
)) ); /* while we just need another buffer */
602 if(U_FAILURE(status
))
604 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage
, myErrorName(status
), gNuConvTestName
);
608 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
609 sourcelen
, targ
-junkout
);
610 if(getTestOption(VERBOSITY_OPTION
))
613 char offset_str
[9999];
619 for(ptr
= junkout
;ptr
<targ
;ptr
++)
621 sprintf(junk
+ strlen(junk
), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr
);
622 sprintf(offset_str
+ strlen(offset_str
), "0x%04x, ", (0xFFFF) & (unsigned int)junokout
[ptr
-junkout
]);
626 printUSeq(expect
, expectlen
);
629 log_verbose("\nOffsets:");
630 log_verbose(offset_str
);
636 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen
,expectlen
*2);
638 if (checkOffsets
&& (expectOffsets
!= 0))
640 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t))){
641 log_err("did not get the expected offsets. %s\n",gNuConvTestName
);
643 for(p
=junkout
;p
<targ
;p
++) {
644 log_err("%d,", junokout
[p
-junkout
]);
647 log_err("Expected: ");
648 for(i
=0; i
<(targ
-junkout
); i
++) {
649 log_err("%d,", expectOffsets
[i
]);
653 for(i
=0; i
<(targ
-junkout
); i
++) {
654 log_err("%X,", junkout
[i
]);
658 for(i
=0; i
<(src
-(const char *)source
); i
++) {
659 log_err("%X,", (unsigned char)source
[i
]);
665 if(!memcmp(junkout
, expect
, expectlen
*2))
667 log_verbose("Matches!\n");
672 log_err("String does not match. %s\n", gNuConvTestName
);
673 log_verbose("String does not match. %s\n", gNuConvTestName
);
675 printUSeqErr(junkout
, expectlen
);
676 printf("\nExpected:");
677 printUSeqErr(expect
, expectlen
);
683 static void TestNewConvertWithBufferSizes(int32_t outsize
, int32_t insize
)
686 /* 1 2 3 1Han 2Han 3Han . */
687 static const UChar sampleText
[] =
688 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
689 static const UChar sampleTextRoundTripUnmappable
[] =
690 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
693 static const uint8_t expectedUTF8
[] =
694 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
695 static const int32_t toUTF8Offs
[] =
696 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
697 static const int32_t fmUTF8Offs
[] =
698 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
700 #ifdef U_ENABLE_GENERIC_ISO_2022
701 /* Same as UTF8, but with ^[%B preceeding */
702 static const const uint8_t expectedISO2022
[] =
703 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
704 static const int32_t toISO2022Offs
[] =
705 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
706 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
707 static const int32_t fmISO2022Offs
[] =
708 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
711 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
712 static const uint8_t expectedIBM930
[] =
713 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
714 static const int32_t toIBM930Offs
[] =
715 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
716 static const int32_t fmIBM930Offs
[] =
717 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
719 /* 1 2 3 0 h1 h2 h3 . MBCS*/
720 static const uint8_t expectedIBM943
[] =
721 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
722 static const int32_t toIBM943Offs
[] =
723 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
724 static const int32_t fmIBM943Offs
[] =
725 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
727 /* 1 2 3 0 h1 h2 h3 . DBCS*/
728 static const uint8_t expectedIBM9027
[] =
729 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
730 static const int32_t toIBM9027Offs
[] =
731 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
733 /* 1 2 3 0 <?> <?> <?> . SBCS*/
734 static const uint8_t expectedIBM920
[] =
735 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
736 static const int32_t toIBM920Offs
[] =
737 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
739 /* 1 2 3 0 <?> <?> <?> . SBCS*/
740 static const uint8_t expectedISO88593
[] =
741 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
742 static const int32_t toISO88593Offs
[] =
743 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
745 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
746 static const uint8_t expectedLATIN1
[] =
747 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
748 static const int32_t toLATIN1Offs
[] =
749 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
753 static const uint8_t expectedUTF16BE
[] =
754 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
755 static const int32_t toUTF16BEOffs
[]=
756 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
757 static const int32_t fmUTF16BEOffs
[] =
758 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
760 static const uint8_t expectedUTF16LE
[] =
761 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
762 static const int32_t toUTF16LEOffs
[]=
763 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
764 static const int32_t fmUTF16LEOffs
[] =
765 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
767 static const uint8_t expectedUTF32BE
[] =
768 { 0x00, 0x00, 0x00, 0x31,
769 0x00, 0x00, 0x00, 0x32,
770 0x00, 0x00, 0x00, 0x33,
771 0x00, 0x00, 0x00, 0x00,
772 0x00, 0x00, 0x4e, 0x00,
773 0x00, 0x00, 0x4e, 0x8c,
774 0x00, 0x00, 0x4e, 0x09,
775 0x00, 0x00, 0x00, 0x2e,
776 0x00, 0x02, 0x00, 0x21 };
777 static const int32_t toUTF32BEOffs
[]=
778 { 0x00, 0x00, 0x00, 0x00,
779 0x01, 0x01, 0x01, 0x01,
780 0x02, 0x02, 0x02, 0x02,
781 0x03, 0x03, 0x03, 0x03,
782 0x04, 0x04, 0x04, 0x04,
783 0x05, 0x05, 0x05, 0x05,
784 0x06, 0x06, 0x06, 0x06,
785 0x07, 0x07, 0x07, 0x07,
786 0x08, 0x08, 0x08, 0x08,
787 0x08, 0x08, 0x08, 0x08 };
788 static const int32_t fmUTF32BEOffs
[] =
789 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
791 static const uint8_t expectedUTF32LE
[] =
792 { 0x31, 0x00, 0x00, 0x00,
793 0x32, 0x00, 0x00, 0x00,
794 0x33, 0x00, 0x00, 0x00,
795 0x00, 0x00, 0x00, 0x00,
796 0x00, 0x4e, 0x00, 0x00,
797 0x8c, 0x4e, 0x00, 0x00,
798 0x09, 0x4e, 0x00, 0x00,
799 0x2e, 0x00, 0x00, 0x00,
800 0x21, 0x00, 0x02, 0x00 };
801 static const int32_t toUTF32LEOffs
[]=
802 { 0x00, 0x00, 0x00, 0x00,
803 0x01, 0x01, 0x01, 0x01,
804 0x02, 0x02, 0x02, 0x02,
805 0x03, 0x03, 0x03, 0x03,
806 0x04, 0x04, 0x04, 0x04,
807 0x05, 0x05, 0x05, 0x05,
808 0x06, 0x06, 0x06, 0x06,
809 0x07, 0x07, 0x07, 0x07,
810 0x08, 0x08, 0x08, 0x08,
811 0x08, 0x08, 0x08, 0x08 };
812 static const int32_t fmUTF32LEOffs
[] =
813 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
818 /** Test chars #2 **/
820 /* Sahha [health], slashed h's */
821 static const UChar malteseUChars
[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
822 static const uint8_t expectedMaltese913
[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
825 static const UChar LMBCSUChars
[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
826 static const uint8_t expectedLMBCS
[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
827 static const int32_t toLMBCSOffs
[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
828 static const int32_t fmLMBCSOffs
[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
829 /*********************************** START OF CODE finally *************/
831 gInBufferSize
= insize
;
832 gOutBufferSize
= outsize
;
834 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize
, gOutBufferSize
);
838 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
839 expectedUTF8
, sizeof(expectedUTF8
), "UTF8", toUTF8Offs
,FALSE
);
841 log_verbose("Test surrogate behaviour for UTF8\n");
843 static const UChar testinput
[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
844 static const uint8_t expectedUTF8test2
[]= { 0xe2, 0x82, 0xac,
845 0xf0, 0x90, 0x90, 0x81,
848 static const int32_t offsets
[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
849 testConvertFromU(testinput
, UPRV_LENGTHOF(testinput
),
850 expectedUTF8test2
, sizeof(expectedUTF8test2
), "UTF8", offsets
,FALSE
);
855 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
857 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
858 expectedISO2022
, sizeof(expectedISO2022
), "ISO_2022", toISO2022Offs
,FALSE
);
862 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
863 expectedUTF16LE
, sizeof(expectedUTF16LE
), "utf-16le", toUTF16LEOffs
,FALSE
);
865 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
866 expectedUTF16BE
, sizeof(expectedUTF16BE
), "utf-16be", toUTF16BEOffs
,FALSE
);
868 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
869 expectedUTF32LE
, sizeof(expectedUTF32LE
), "utf-32le", toUTF32LEOffs
,FALSE
);
871 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
872 expectedUTF32BE
, sizeof(expectedUTF32BE
), "utf-32be", toUTF32BEOffs
,FALSE
);
875 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
876 expectedLATIN1
, sizeof(expectedLATIN1
), "LATIN_1", toLATIN1Offs
,FALSE
);
878 #if !UCONFIG_NO_LEGACY_CONVERSION
880 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
881 expectedIBM930
, sizeof(expectedIBM930
), "ibm-930", toIBM930Offs
,FALSE
);
883 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
884 expectedISO88593
, sizeof(expectedISO88593
), "iso-8859-3", toISO88593Offs
,FALSE
);
888 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
889 expectedIBM943
, sizeof(expectedIBM943
), "ibm-943", toIBM943Offs
,FALSE
);
891 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
892 expectedIBM9027
, sizeof(expectedIBM9027
), "@ibm9027", toIBM9027Offs
,FALSE
);
894 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
895 expectedIBM920
, sizeof(expectedIBM920
), "ibm-920", toIBM920Offs
,FALSE
);
897 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
898 expectedISO88593
, sizeof(expectedISO88593
), "iso-8859-3", toISO88593Offs
,FALSE
);
905 testConvertToU(expectedUTF8
, sizeof(expectedUTF8
),
906 sampleText
, UPRV_LENGTHOF(sampleText
), "utf8", fmUTF8Offs
,FALSE
);
907 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
909 testConvertToU(expectedISO2022
, sizeof(expectedISO2022
),
910 sampleText
, UPRV_LENGTHOF(sampleText
), "ISO_2022", fmISO2022Offs
,FALSE
);
914 testConvertToU(expectedUTF16LE
, sizeof(expectedUTF16LE
),
915 sampleText
, UPRV_LENGTHOF(sampleText
), "utf-16le", fmUTF16LEOffs
,FALSE
);
917 testConvertToU(expectedUTF16BE
, sizeof(expectedUTF16BE
),
918 sampleText
, UPRV_LENGTHOF(sampleText
), "utf-16be", fmUTF16BEOffs
,FALSE
);
920 testConvertToU(expectedUTF32LE
, sizeof(expectedUTF32LE
),
921 sampleText
, UPRV_LENGTHOF(sampleText
), "utf-32le", fmUTF32LEOffs
,FALSE
);
923 testConvertToU(expectedUTF32BE
, sizeof(expectedUTF32BE
),
924 sampleText
, UPRV_LENGTHOF(sampleText
), "utf-32be", fmUTF32BEOffs
,FALSE
);
926 #if !UCONFIG_NO_LEGACY_CONVERSION
928 testConvertToU(expectedIBM930
, sizeof(expectedIBM930
), sampleTextRoundTripUnmappable
,
929 UPRV_LENGTHOF(sampleTextRoundTripUnmappable
), "ibm-930", fmIBM930Offs
,FALSE
);
931 testConvertToU(expectedIBM943
, sizeof(expectedIBM943
),sampleTextRoundTripUnmappable
,
932 UPRV_LENGTHOF(sampleTextRoundTripUnmappable
), "ibm-943", fmIBM943Offs
,FALSE
);
935 /* Try it again to make sure it still works */
936 testConvertToU(expectedUTF16LE
, sizeof(expectedUTF16LE
),
937 sampleText
, UPRV_LENGTHOF(sampleText
), "utf-16le", fmUTF16LEOffs
,FALSE
);
939 #if !UCONFIG_NO_LEGACY_CONVERSION
940 testConvertToU(expectedMaltese913
, sizeof(expectedMaltese913
),
941 malteseUChars
, UPRV_LENGTHOF(malteseUChars
), "latin3", NULL
,FALSE
);
943 testConvertFromU(malteseUChars
, UPRV_LENGTHOF(malteseUChars
),
944 expectedMaltese913
, sizeof(expectedMaltese913
), "iso-8859-3", NULL
,FALSE
);
947 testConvertFromU(LMBCSUChars
, UPRV_LENGTHOF(LMBCSUChars
),
948 expectedLMBCS
, sizeof(expectedLMBCS
), "LMBCS-1", toLMBCSOffs
,FALSE
);
949 testConvertToU(expectedLMBCS
, sizeof(expectedLMBCS
),
950 LMBCSUChars
, UPRV_LENGTHOF(LMBCSUChars
), "LMBCS-1", fmLMBCSOffs
,FALSE
);
953 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
955 /* encode directly set D and set O */
956 static const uint8_t utf7
[] = {
963 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
964 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
966 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
968 static const UChar unicode
[] = {
970 Hi Mom -<WHITE SMILING FACE>-!
971 A<NOT IDENTICAL TO><ALPHA>.
973 [Japanese word "nihongo"]
975 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
976 0x41, 0x2262, 0x0391, 0x2e,
978 0x65e5, 0x672c, 0x8a9e
980 static const int32_t toUnicodeOffsets
[] = {
981 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
986 static const int32_t fromUnicodeOffsets
[] = {
987 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
988 11, 12, 12, 12, 13, 13, 13, 13, 14,
990 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
993 /* same but escaping set O (the exclamation mark) */
994 static const uint8_t utf7Restricted
[] = {
1001 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
1002 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
1004 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
1006 static const int32_t toUnicodeOffsetsR
[] = {
1007 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1012 static const int32_t fromUnicodeOffsetsR
[] = {
1013 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1014 11, 12, 12, 12, 13, 13, 13, 13, 14,
1016 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
1019 testConvertFromU(unicode
, UPRV_LENGTHOF(unicode
), utf7
, sizeof(utf7
), "UTF-7", fromUnicodeOffsets
,FALSE
);
1021 testConvertToU(utf7
, sizeof(utf7
), unicode
, UPRV_LENGTHOF(unicode
), "UTF-7", toUnicodeOffsets
,FALSE
);
1023 testConvertFromU(unicode
, UPRV_LENGTHOF(unicode
), utf7Restricted
, sizeof(utf7Restricted
), "UTF-7,version=1", fromUnicodeOffsetsR
,FALSE
);
1025 testConvertToU(utf7Restricted
, sizeof(utf7Restricted
), unicode
, UPRV_LENGTHOF(unicode
), "UTF-7,version=1", toUnicodeOffsetsR
,FALSE
);
1029 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1030 * modified according to RFC 2060,
1031 * and supplemented with the one example in RFC 2060 itself.
1034 static const uint8_t imap
[] = {
1045 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1046 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1048 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1050 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1051 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1052 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1053 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1055 static const UChar unicode
[] = {
1056 /* Hi Mom -<WHITE SMILING FACE>-!
1057 A<NOT IDENTICAL TO><ALPHA>.
1059 [Japanese word "nihongo"]
1066 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1067 0x41, 0x2262, 0x0391, 0x2e,
1069 0x65e5, 0x672c, 0x8a9e,
1071 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1072 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1073 0x2f, 0x65e5, 0x672c, 0x8a9e,
1074 0x2f, 0x53f0, 0x5317
1076 static const int32_t toUnicodeOffsets
[] = {
1077 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1082 38, 39, 40, 41, 42, 43,
1087 static const int32_t fromUnicodeOffsets
[] = {
1088 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1089 11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1091 16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1093 20, 21, 22, 23, 24, 25,
1095 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1096 35, 36, 36, 36, 37, 37, 37, 37, 37
1099 testConvertFromU(unicode
, UPRV_LENGTHOF(unicode
), imap
, sizeof(imap
), "IMAP-mailbox-name", fromUnicodeOffsets
,FALSE
);
1101 testConvertToU(imap
, sizeof(imap
), unicode
, UPRV_LENGTHOF(unicode
), "IMAP-mailbox-name", toUnicodeOffsets
,FALSE
);
1104 /* Test UTF-8 bad data handling*/
1106 static const uint8_t utf8
[]={
1108 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1111 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1112 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1113 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */
1114 0xdf, 0xbf, /* 7ff */
1115 0xbf, /* truncated tail */
1116 0xf4, 0x90, 0x80, 0x80, /* 110000 */
1120 static const uint16_t utf8Expected
[]={
1122 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1125 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1126 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1130 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1134 static const int32_t utf8Offsets
[]={
1147 testConvertToU(utf8
, sizeof(utf8
),
1148 utf8Expected
, UPRV_LENGTHOF(utf8Expected
), "utf-8", utf8Offsets
,FALSE
);
1152 /* Test UTF-32BE bad data handling*/
1154 static const uint8_t utf32
[]={
1155 0x00, 0x00, 0x00, 0x61,
1156 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
1157 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1158 0x00, 0x00, 0x00, 0x62,
1159 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1160 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
1161 0x00, 0x00, 0x01, 0x62,
1162 0x00, 0x00, 0x02, 0x62
1164 static const uint16_t utf32Expected
[]={
1166 0xfffd, /* 0x110000 out of range */
1167 0xDBFF, /* 0x10FFFF in range */
1170 0xfffd, /* 0xffffffff out of range */
1171 0xfffd, /* 0x7fffffff out of range */
1175 static const int32_t utf32Offsets
[]={
1176 0, 4, 8, 8, 12, 16, 20, 24, 28
1178 static const uint8_t utf32ExpectedBack
[]={
1179 0x00, 0x00, 0x00, 0x61,
1180 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */
1181 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1182 0x00, 0x00, 0x00, 0x62,
1183 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */
1184 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */
1185 0x00, 0x00, 0x01, 0x62,
1186 0x00, 0x00, 0x02, 0x62
1188 static const int32_t utf32OffsetsBack
[]={
1199 testConvertToU(utf32
, sizeof(utf32
),
1200 utf32Expected
, UPRV_LENGTHOF(utf32Expected
), "utf-32be", utf32Offsets
,FALSE
);
1201 testConvertFromU(utf32Expected
, UPRV_LENGTHOF(utf32Expected
),
1202 utf32ExpectedBack
, sizeof(utf32ExpectedBack
), "utf-32be", utf32OffsetsBack
, FALSE
);
1205 /* Test UTF-32LE bad data handling*/
1207 static const uint8_t utf32
[]={
1208 0x61, 0x00, 0x00, 0x00,
1209 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
1210 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1211 0x62, 0x00, 0x00, 0x00,
1212 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1213 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
1214 0x62, 0x01, 0x00, 0x00,
1215 0x62, 0x02, 0x00, 0x00,
1218 static const uint16_t utf32Expected
[]={
1220 0xfffd, /* 0x110000 out of range */
1221 0xDBFF, /* 0x10FFFF in range */
1224 0xfffd, /* 0xffffffff out of range */
1225 0xfffd, /* 0x7fffffff out of range */
1229 static const int32_t utf32Offsets
[]={
1230 0, 4, 8, 8, 12, 16, 20, 24, 28
1232 static const uint8_t utf32ExpectedBack
[]={
1233 0x61, 0x00, 0x00, 0x00,
1234 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */
1235 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1236 0x62, 0x00, 0x00, 0x00,
1237 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */
1238 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */
1239 0x62, 0x01, 0x00, 0x00,
1240 0x62, 0x02, 0x00, 0x00
1242 static const int32_t utf32OffsetsBack
[]={
1252 testConvertToU(utf32
, sizeof(utf32
),
1253 utf32Expected
, UPRV_LENGTHOF(utf32Expected
), "utf-32le", utf32Offsets
,FALSE
);
1254 testConvertFromU(utf32Expected
, UPRV_LENGTHOF(utf32Expected
),
1255 utf32ExpectedBack
, sizeof(utf32ExpectedBack
), "utf-32le", utf32OffsetsBack
, FALSE
);
1259 static void TestCoverageMBCS(){
1261 UErrorCode status
= U_ZERO_ERROR
;
1262 const char *directory
= loadTestData(&status
);
1263 char* tdpath
= NULL
;
1264 char* saveDirectory
= (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1265 int len
= strlen(directory
);
1268 tdpath
= (char*) malloc(sizeof(char) * (len
* 2));
1269 uprv_strcpy(saveDirectory
,u_getDataDirectory());
1270 log_verbose("Retrieved data directory %s \n",saveDirectory
);
1271 uprv_strcpy(tdpath
,directory
);
1272 index
=strrchr(tdpath
,(char)U_FILE_SEP_CHAR
);
1274 if((unsigned int)(index
-tdpath
) != (strlen(tdpath
)-1)){
1277 u_setDataDirectory(tdpath
);
1278 log_verbose("ICU data directory is set to: %s \n" ,tdpath
);
1281 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
1282 which is test file for MBCS conversion with single-byte codepage data.*/
1285 /* MBCS with single byte codepage data test1.ucm*/
1286 const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1287 const uint8_t expectedtest1
[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1288 int32_t totest1Offs
[] = { 0, 1, 2, 3, 5, };
1291 testConvertFromU(unicodeInput
, UPRV_LENGTHOF(unicodeInput
),
1292 expectedtest1
, sizeof(expectedtest1
), "@test1", totest1Offs
,FALSE
);
1295 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
1296 which is test file for MBCS conversion with three-byte codepage data.*/
1299 /* MBCS with three byte codepage data test3.ucm*/
1300 const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1301 const uint8_t expectedtest3
[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,};
1302 int32_t totest3Offs
[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1304 const uint8_t test3input
[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1305 const UChar expectedUnicode
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1306 int32_t fromtest3Offs
[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1309 testConvertFromU(unicodeInput
, UPRV_LENGTHOF(unicodeInput
),
1310 expectedtest3
, sizeof(expectedtest3
), "@test3", totest3Offs
,FALSE
);
1313 testConvertToU(test3input
, sizeof(test3input
),
1314 expectedUnicode
, UPRV_LENGTHOF(expectedUnicode
), "@test3", fromtest3Offs
,FALSE
);
1318 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
1319 which is test file for MBCS conversion with four-byte codepage data.*/
1322 /* MBCS with three byte codepage data test4.ucm*/
1323 static const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1324 static const uint8_t expectedtest4
[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,};
1325 static const int32_t totest4Offs
[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1327 static const uint8_t test4input
[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1328 static const UChar expectedUnicode
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1329 static const int32_t fromtest4Offs
[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1332 testConvertFromU(unicodeInput
, UPRV_LENGTHOF(unicodeInput
),
1333 expectedtest4
, sizeof(expectedtest4
), "@test4", totest4Offs
,FALSE
);
1336 testConvertToU(test4input
, sizeof(test4input
),
1337 expectedUnicode
, UPRV_LENGTHOF(expectedUnicode
), "@test4", fromtest4Offs
,FALSE
);
1342 /* restore the original data directory */
1343 log_verbose("Setting the data directory to %s \n", saveDirectory
);
1344 u_setDataDirectory(saveDirectory
);
1345 free(saveDirectory
);
1350 static void TestConverterType(const char *convName
, UConverterType convType
) {
1351 UConverter
* myConverter
;
1352 UErrorCode err
= U_ZERO_ERROR
;
1354 myConverter
= my_ucnv_open(convName
, &err
);
1356 if (U_FAILURE(err
)) {
1357 log_data_err("Failed to create an %s converter\n", convName
);
1362 if (ucnv_getType(myConverter
)!=convType
) {
1363 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1364 convName
, convType
);
1367 log_verbose("ucnv_getType %s ok\n", convName
);
1370 ucnv_close(myConverter
);
1373 static void TestConverterTypesAndStarters()
1375 #if !UCONFIG_NO_LEGACY_CONVERSION
1376 UConverter
* myConverter
;
1377 UErrorCode err
= U_ZERO_ERROR
;
1378 UBool mystarters
[256];
1380 /* const UBool expectedKSCstarters[256] = {
1381 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1382 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1383 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1384 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1385 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1386 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1387 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1388 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1389 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1390 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1391 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1392 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1393 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1394 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1395 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1396 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1397 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1398 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1399 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1400 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1401 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1402 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1403 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1404 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1405 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1406 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1409 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types.");
1411 myConverter
= ucnv_open("ksc", &err
);
1412 if (U_FAILURE(err
)) {
1413 log_data_err("Failed to create an ibm-ksc converter\n");
1418 if (ucnv_getType(myConverter
)!=UCNV_MBCS
)
1419 log_err("ucnv_getType Failed for ibm-949\n");
1421 log_verbose("ucnv_getType ibm-949 ok\n");
1423 if(myConverter
!=NULL
)
1424 ucnv_getStarters(myConverter
, mystarters
, &err
);
1426 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1427 log_err("Failed ucnv_getStarters for ksc\n");
1429 log_verbose("ucnv_getStarters ok\n");*/
1432 ucnv_close(myConverter
);
1434 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL
);
1435 TestConverterType("ibm-878", UCNV_SBCS
);
1438 TestConverterType("iso-8859-1", UCNV_LATIN_1
);
1440 TestConverterType("ibm-1208", UCNV_UTF8
);
1442 TestConverterType("utf-8", UCNV_UTF8
);
1443 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian
);
1444 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian
);
1445 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian
);
1446 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian
);
1448 #if !UCONFIG_NO_LEGACY_CONVERSION
1450 #if defined(U_ENABLE_GENERIC_ISO_2022)
1451 TestConverterType("iso-2022", UCNV_ISO_2022
);
1454 TestConverterType("hz", UCNV_HZ
);
1457 TestConverterType("scsu", UCNV_SCSU
);
1459 #if !UCONFIG_NO_LEGACY_CONVERSION
1460 TestConverterType("x-iscii-de", UCNV_ISCII
);
1463 TestConverterType("ascii", UCNV_US_ASCII
);
1464 TestConverterType("utf-7", UCNV_UTF7
);
1465 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX
);
1466 TestConverterType("bocu-1", UCNV_BOCU1
);
1470 TestAmbiguousConverter(UConverter
*cnv
) {
1471 static const char inBytes
[3]={ 0x61, 0x5B, 0x5c };
1472 UChar outUnicode
[20]={ 0, 0, 0, 0 };
1476 UErrorCode errorCode
;
1479 /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1480 errorCode
=U_ZERO_ERROR
;
1483 ucnv_toUnicode(cnv
, &u
, u
+20, &s
, s
+3, NULL
, TRUE
, &errorCode
);
1484 if(U_FAILURE(errorCode
)) {
1485 /* we do not care about general failures in this test; the input may just not be mappable */
1489 if(outUnicode
[0]!=0x61 || outUnicode
[1]!=0x5B || outUnicode
[2]==0xfffd) {
1490 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1491 /* There are some encodings that are partially ASCII based,
1492 like the ISO-7 and GSM series of codepages, which we ignore. */
1496 isAmbiguous
=ucnv_isAmbiguous(cnv
);
1498 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1499 if((outUnicode
[2]!=0x5c)!=isAmbiguous
) {
1500 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1501 ucnv_getName(cnv
, &errorCode
), outUnicode
[2]!=0x5c, isAmbiguous
);
1505 if(outUnicode
[2]!=0x5c) {
1506 /* needs fixup, fix it */
1507 ucnv_fixFileSeparator(cnv
, outUnicode
, (int32_t)(u
-outUnicode
));
1508 if(outUnicode
[2]!=0x5c) {
1509 /* the fix failed */
1510 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv
, &errorCode
));
1516 static void TestAmbiguous()
1518 UErrorCode status
= U_ZERO_ERROR
;
1519 UConverter
*ascii_cnv
= 0, *sjis_cnv
= 0, *cnv
;
1520 static const char target
[] = {
1521 /* "\\usr\\local\\share\\data\\icutest.txt" */
1522 0x5c, 0x75, 0x73, 0x72,
1523 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1524 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1525 0x5c, 0x64, 0x61, 0x74, 0x61,
1526 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1529 UChar asciiResult
[200], sjisResult
[200];
1530 int32_t /*asciiLength = 0,*/ sjisLength
= 0, i
;
1533 /* enumerate all converters */
1534 status
=U_ZERO_ERROR
;
1535 for(i
=0; (name
=ucnv_getAvailableName(i
))!=NULL
; ++i
) {
1536 cnv
=ucnv_open(name
, &status
);
1537 if(U_SUCCESS(status
)) {
1538 TestAmbiguousConverter(cnv
);
1541 log_err("error: unable to open available converter \"%s\"\n", name
);
1542 status
=U_ZERO_ERROR
;
1546 #if !UCONFIG_NO_LEGACY_CONVERSION
1547 sjis_cnv
= ucnv_open("ibm-943", &status
);
1548 if (U_FAILURE(status
))
1550 log_data_err("Failed to create a SJIS converter\n");
1553 ascii_cnv
= ucnv_open("LATIN-1", &status
);
1554 if (U_FAILURE(status
))
1556 log_data_err("Failed to create a LATIN-1 converter\n");
1557 ucnv_close(sjis_cnv
);
1560 /* convert target from SJIS to Unicode */
1561 sjisLength
= ucnv_toUChars(sjis_cnv
, sjisResult
, UPRV_LENGTHOF(sjisResult
), target
, (int32_t)strlen(target
), &status
);
1562 if (U_FAILURE(status
))
1564 log_err("Failed to convert the SJIS string.\n");
1565 ucnv_close(sjis_cnv
);
1566 ucnv_close(ascii_cnv
);
1569 /* convert target from Latin-1 to Unicode */
1570 /*asciiLength =*/ ucnv_toUChars(ascii_cnv
, asciiResult
, UPRV_LENGTHOF(asciiResult
), target
, (int32_t)strlen(target
), &status
);
1571 if (U_FAILURE(status
))
1573 log_err("Failed to convert the Latin-1 string.\n");
1574 ucnv_close(sjis_cnv
);
1575 ucnv_close(ascii_cnv
);
1578 if (!ucnv_isAmbiguous(sjis_cnv
))
1580 log_err("SJIS converter should contain ambiguous character mappings.\n");
1581 ucnv_close(sjis_cnv
);
1582 ucnv_close(ascii_cnv
);
1585 if (u_strcmp(sjisResult
, asciiResult
) == 0)
1587 log_err("File separators for SJIS don't need to be fixed.\n");
1589 ucnv_fixFileSeparator(sjis_cnv
, sjisResult
, sjisLength
);
1590 if (u_strcmp(sjisResult
, asciiResult
) != 0)
1592 log_err("Fixing file separator for SJIS failed.\n");
1594 ucnv_close(sjis_cnv
);
1595 ucnv_close(ascii_cnv
);
1600 TestSignatureDetection(){
1601 /* with null terminated strings */
1603 static const char* data
[] = {
1604 "\xFE\xFF\x00\x00", /* UTF-16BE */
1605 "\xFF\xFE\x00\x00", /* UTF-16LE */
1606 "\xEF\xBB\xBF\x00", /* UTF-8 */
1607 "\x0E\xFE\xFF\x00", /* SCSU */
1609 "\xFE\xFF", /* UTF-16BE */
1610 "\xFF\xFE", /* UTF-16LE */
1611 "\xEF\xBB\xBF", /* UTF-8 */
1612 "\x0E\xFE\xFF", /* SCSU */
1614 "\xFE\xFF\x41\x42", /* UTF-16BE */
1615 "\xFF\xFE\x41\x41", /* UTF-16LE */
1616 "\xEF\xBB\xBF\x41", /* UTF-8 */
1617 "\x0E\xFE\xFF\x41", /* SCSU */
1619 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */
1620 "\x2B\x2F\x76\x38\x41", /* UTF-7 */
1621 "\x2B\x2F\x76\x39\x41", /* UTF-7 */
1622 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */
1623 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */
1625 "\xDD\x73\x66\x73" /* UTF-EBCDIC */
1627 static const char* expected
[] = {
1650 static const int32_t expectedLength
[] ={
1675 int32_t signatureLength
= -1;
1676 const char* source
= NULL
;
1677 const char* enc
= NULL
;
1678 for( ; i
<UPRV_LENGTHOF(data
); i
++){
1681 enc
= ucnv_detectUnicodeSignature(source
, -1 , &signatureLength
, &err
);
1683 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source
,i
,u_errorName(err
));
1686 if(enc
== NULL
|| strcmp(enc
,expected
[i
]) !=0){
1687 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source
,i
,expected
[i
],enc
);
1690 if(signatureLength
!= expectedLength
[i
]){
1691 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source
,i
,signatureLength
,expectedLength
[i
]);
1696 static const char* data
[] = {
1697 "\xFE\xFF\x00", /* UTF-16BE */
1698 "\xFF\xFE\x00", /* UTF-16LE */
1699 "\xEF\xBB\xBF\x00", /* UTF-8 */
1700 "\x0E\xFE\xFF\x00", /* SCSU */
1701 "\x00\x00\xFE\xFF", /* UTF-32BE */
1702 "\xFF\xFE\x00\x00", /* UTF-32LE */
1703 "\xFE\xFF", /* UTF-16BE */
1704 "\xFF\xFE", /* UTF-16LE */
1705 "\xEF\xBB\xBF", /* UTF-8 */
1706 "\x0E\xFE\xFF", /* SCSU */
1707 "\x00\x00\xFE\xFF", /* UTF-32BE */
1708 "\xFF\xFE\x00\x00", /* UTF-32LE */
1709 "\xFE\xFF\x41\x42", /* UTF-16BE */
1710 "\xFF\xFE\x41\x41", /* UTF-16LE */
1711 "\xEF\xBB\xBF\x41", /* UTF-8 */
1712 "\x0E\xFE\xFF\x41", /* SCSU */
1713 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1714 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1715 "\xFB\xEE\x28", /* BOCU-1 */
1716 "\xFF\x41\x42" /* NULL */
1718 static const int len
[] = {
1741 static const char* expected
[] = {
1763 static const int32_t expectedLength
[] ={
1787 int32_t signatureLength
= -1;
1788 int32_t sourceLength
=-1;
1789 const char* source
= NULL
;
1790 const char* enc
= NULL
;
1791 for( ; i
<UPRV_LENGTHOF(data
); i
++){
1794 sourceLength
= len
[i
];
1795 enc
= ucnv_detectUnicodeSignature(source
, sourceLength
, &signatureLength
, &err
);
1797 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source
,i
,u_errorName(err
));
1800 if(enc
== NULL
|| strcmp(enc
,expected
[i
]) !=0){
1801 if(expected
[i
] !=NULL
){
1802 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source
,i
,expected
[i
],enc
);
1806 if(signatureLength
!= expectedLength
[i
]){
1807 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source
,i
,signatureLength
,expectedLength
[i
]);
1813 static void TestUTF7() {
1815 static const uint8_t in
[]={
1816 /* H - +Jjo- - ! +- +2AHcAQ */
1819 0x2b, 0x4a, 0x6a, 0x6f,
1823 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1826 /* expected test results */
1827 static const int32_t results
[]={
1828 /* number of bytes read, code point */
1831 4, 0x263a, /* <WHITE SMILING FACE> */
1838 const char *cnvName
;
1839 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
1840 UErrorCode errorCode
=U_ZERO_ERROR
;
1841 UConverter
*cnv
=ucnv_open("UTF-7", &errorCode
);
1842 if(U_FAILURE(errorCode
)) {
1843 log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode
));
1846 TestNextUChar(cnv
, source
, limit
, results
, "UTF-7");
1847 /* Test the condition when source >= sourceLimit */
1848 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1849 cnvName
= ucnv_getName(cnv
, &errorCode
);
1850 if (U_FAILURE(errorCode
) || uprv_strcmp(cnvName
, "UTF-7") != 0) {
1851 log_err("UTF-7 converter is called %s: %s\n", cnvName
, u_errorName(errorCode
));
1856 static void TestIMAP() {
1858 static const uint8_t in
[]={
1859 /* H - &Jjo- - ! &- &2AHcAQ- \ */
1862 0x26, 0x4a, 0x6a, 0x6f,
1866 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1869 /* expected test results */
1870 static const int32_t results
[]={
1871 /* number of bytes read, code point */
1874 4, 0x263a, /* <WHITE SMILING FACE> */
1881 const char *cnvName
;
1882 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
1883 UErrorCode errorCode
=U_ZERO_ERROR
;
1884 UConverter
*cnv
=ucnv_open("IMAP-mailbox-name", &errorCode
);
1885 if(U_FAILURE(errorCode
)) {
1886 log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode
));
1889 TestNextUChar(cnv
, source
, limit
, results
, "IMAP-mailbox-name");
1890 /* Test the condition when source >= sourceLimit */
1891 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1892 cnvName
= ucnv_getName(cnv
, &errorCode
);
1893 if (U_FAILURE(errorCode
) || uprv_strcmp(cnvName
, "IMAP-mailbox-name") != 0) {
1894 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName
, u_errorName(errorCode
));
1899 static void TestUTF8() {
1901 static const uint8_t in
[]={
1905 0xf0, 0x90, 0x80, 0x80,
1906 0xf4, 0x84, 0x8c, 0xa1,
1907 0xf0, 0x90, 0x90, 0x81
1910 /* expected test results */
1911 static const int32_t results
[]={
1912 /* number of bytes read, code point */
1921 /* error test input */
1922 static const uint8_t in2
[]={
1924 0xc0, 0x80, /* illegal non-shortest form */
1925 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1926 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1927 0xc0, 0xc0, /* illegal trail byte */
1928 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1929 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1930 0xfe, /* illegal byte altogether */
1934 /* expected error test results */
1935 static const int32_t results2
[]={
1936 /* number of bytes read, code point */
1941 UConverterToUCallback cb
;
1944 const char *source
=(const char *)in
,*limit
=(const char *)in
+sizeof(in
);
1945 UErrorCode errorCode
=U_ZERO_ERROR
;
1946 UConverter
*cnv
=ucnv_open("UTF-8", &errorCode
);
1947 if(U_FAILURE(errorCode
)) {
1948 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode
));
1951 TestNextUChar(cnv
, source
, limit
, results
, "UTF-8");
1952 /* Test the condition when source >= sourceLimit */
1953 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1955 /* test error behavior with a skip callback */
1956 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
1957 source
=(const char *)in2
;
1958 limit
=(const char *)(in2
+sizeof(in2
));
1959 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-8");
1964 static void TestCESU8() {
1966 static const uint8_t in
[]={
1970 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1971 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1972 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1976 /* expected test results */
1977 static const int32_t results
[]={
1978 /* number of bytes read, code point */
1984 -1,0xd802, /* may read 3 or 6 bytes */
1985 -1,0x10ffff,/* may read 0 or 3 bytes */
1989 /* error test input */
1990 static const uint8_t in2
[]={
1992 0xc0, 0x80, /* illegal non-shortest form */
1993 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1994 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1995 0xc0, 0xc0, /* illegal trail byte */
1996 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */
1997 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */
1998 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */
1999 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
2000 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
2001 0xfe, /* illegal byte altogether */
2005 /* expected error test results */
2006 static const int32_t results2
[]={
2007 /* number of bytes read, code point */
2012 UConverterToUCallback cb
;
2015 const char *source
=(const char *)in
,*limit
=(const char *)in
+sizeof(in
);
2016 UErrorCode errorCode
=U_ZERO_ERROR
;
2017 UConverter
*cnv
=ucnv_open("CESU-8", &errorCode
);
2018 if(U_FAILURE(errorCode
)) {
2019 log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode
));
2022 TestNextUChar(cnv
, source
, limit
, results
, "CESU-8");
2023 /* Test the condition when source >= sourceLimit */
2024 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2026 /* test error behavior with a skip callback */
2027 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
2028 source
=(const char *)in2
;
2029 limit
=(const char *)(in2
+sizeof(in2
));
2030 TestNextUChar(cnv
, source
, limit
, results2
, "CESU-8");
2035 static void TestUTF16() {
2037 static const uint8_t in1
[]={
2038 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2040 static const uint8_t in2
[]={
2041 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2043 static const uint8_t in3
[]={
2044 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2047 /* expected test results */
2048 static const int32_t results1
[]={
2049 /* number of bytes read, code point */
2053 static const int32_t results2
[]={
2054 /* number of bytes read, code point */
2058 static const int32_t results3
[]={
2059 /* number of bytes read, code point */
2066 const char *source
, *limit
;
2068 UErrorCode errorCode
=U_ZERO_ERROR
;
2069 UConverter
*cnv
=ucnv_open("UTF-16", &errorCode
);
2070 if(U_FAILURE(errorCode
)) {
2071 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode
));
2075 source
=(const char *)in1
, limit
=(const char *)in1
+sizeof(in1
);
2076 TestNextUChar(cnv
, source
, limit
, results1
, "UTF-16");
2078 source
=(const char *)in2
, limit
=(const char *)in2
+sizeof(in2
);
2079 ucnv_resetToUnicode(cnv
);
2080 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-16");
2082 source
=(const char *)in3
, limit
=(const char *)in3
+sizeof(in3
);
2083 ucnv_resetToUnicode(cnv
);
2084 TestNextUChar(cnv
, source
, limit
, results3
, "UTF-16");
2086 /* Test the condition when source >= sourceLimit */
2087 ucnv_resetToUnicode(cnv
);
2088 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2093 static void TestUTF16BE() {
2095 static const uint8_t in
[]={
2101 0xd8, 0x01, 0xdc, 0x01
2104 /* expected test results */
2105 static const int32_t results
[]={
2106 /* number of bytes read, code point */
2115 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2116 UErrorCode errorCode
=U_ZERO_ERROR
;
2117 UConverter
*cnv
=ucnv_open("utf-16be", &errorCode
);
2118 if(U_FAILURE(errorCode
)) {
2119 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode
));
2122 TestNextUChar(cnv
, source
, limit
, results
, "UTF-16BE");
2123 /* Test the condition when source >= sourceLimit */
2124 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2125 /*Test for the condition where there is an invalid character*/
2127 static const uint8_t source2
[]={0x61};
2128 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2129 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an invalid character");
2133 * Test disabled because currently the UTF-16BE/LE converters are supposed
2134 * to not set errors for unpaired surrogates.
2135 * This may change with
2136 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2139 /*Test for the condition where there is a surrogate pair*/
2141 const uint8_t source2
[]={0xd8, 0x01};
2142 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an truncated surrogate character");
2151 static const uint8_t in
[]={
2156 0x01, 0xd8, 0x01, 0xdc
2159 /* expected test results */
2160 static const int32_t results
[]={
2161 /* number of bytes read, code point */
2169 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2170 UErrorCode errorCode
=U_ZERO_ERROR
;
2171 UConverter
*cnv
=ucnv_open("utf-16le", &errorCode
);
2172 if(U_FAILURE(errorCode
)) {
2173 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode
));
2176 TestNextUChar(cnv
, source
, limit
, results
, "UTF-16LE");
2177 /* Test the condition when source >= sourceLimit */
2178 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2179 /*Test for the condition where there is an invalid character*/
2181 static const uint8_t source2
[]={0x61};
2182 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2183 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an invalid character");
2187 * Test disabled because currently the UTF-16BE/LE converters are supposed
2188 * to not set errors for unpaired surrogates.
2189 * This may change with
2190 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2193 /*Test for the condition where there is a surrogate character*/
2195 static const uint8_t source2
[]={0x01, 0xd8};
2196 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an truncated surrogate character");
2203 static void TestUTF32() {
2205 static const uint8_t in1
[]={
2206 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff
2208 static const uint8_t in2
[]={
2209 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00
2211 static const uint8_t in3
[]={
2212 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01
2215 /* expected test results */
2216 static const int32_t results1
[]={
2217 /* number of bytes read, code point */
2221 static const int32_t results2
[]={
2222 /* number of bytes read, code point */
2226 static const int32_t results3
[]={
2227 /* number of bytes read, code point */
2230 4, 0xfffd, /* unmatched surrogate */
2231 4, 0xfffd /* unmatched surrogate */
2234 const char *source
, *limit
;
2236 UErrorCode errorCode
=U_ZERO_ERROR
;
2237 UConverter
*cnv
=ucnv_open("UTF-32", &errorCode
);
2238 if(U_FAILURE(errorCode
)) {
2239 log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode
));
2243 source
=(const char *)in1
, limit
=(const char *)in1
+sizeof(in1
);
2244 TestNextUChar(cnv
, source
, limit
, results1
, "UTF-32");
2246 source
=(const char *)in2
, limit
=(const char *)in2
+sizeof(in2
);
2247 ucnv_resetToUnicode(cnv
);
2248 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32");
2250 source
=(const char *)in3
, limit
=(const char *)in3
+sizeof(in3
);
2251 ucnv_resetToUnicode(cnv
);
2252 TestNextUChar(cnv
, source
, limit
, results3
, "UTF-32");
2254 /* Test the condition when source >= sourceLimit */
2255 ucnv_resetToUnicode(cnv
);
2256 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2264 static const uint8_t in
[]={
2265 0x00, 0x00, 0x00, 0x61,
2266 0x00, 0x00, 0x30, 0x61,
2267 0x00, 0x00, 0xdc, 0x00,
2268 0x00, 0x00, 0xd8, 0x00,
2269 0x00, 0x00, 0xdf, 0xff,
2270 0x00, 0x00, 0xff, 0xfe,
2271 0x00, 0x10, 0xab, 0xcd,
2272 0x00, 0x10, 0xff, 0xff
2275 /* expected test results */
2276 static const int32_t results
[]={
2277 /* number of bytes read, code point */
2288 /* error test input */
2289 static const uint8_t in2
[]={
2290 0x00, 0x00, 0x00, 0x61,
2291 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
2292 0x00, 0x00, 0x00, 0x62,
2293 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2294 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
2295 0x00, 0x00, 0x01, 0x62,
2296 0x00, 0x00, 0x02, 0x62
2299 /* expected error test results */
2300 static const int32_t results2
[]={
2301 /* number of bytes read, code point */
2308 UConverterToUCallback cb
;
2311 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2312 UErrorCode errorCode
=U_ZERO_ERROR
;
2313 UConverter
*cnv
=ucnv_open("UTF-32BE", &errorCode
);
2314 if(U_FAILURE(errorCode
)) {
2315 log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode
));
2318 TestNextUChar(cnv
, source
, limit
, results
, "UTF-32BE");
2320 /* Test the condition when source >= sourceLimit */
2321 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2323 /* test error behavior with a skip callback */
2324 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
2325 source
=(const char *)in2
;
2326 limit
=(const char *)(in2
+sizeof(in2
));
2327 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32BE");
2335 static const uint8_t in
[]={
2336 0x61, 0x00, 0x00, 0x00,
2337 0x61, 0x30, 0x00, 0x00,
2338 0x00, 0xdc, 0x00, 0x00,
2339 0x00, 0xd8, 0x00, 0x00,
2340 0xff, 0xdf, 0x00, 0x00,
2341 0xfe, 0xff, 0x00, 0x00,
2342 0xcd, 0xab, 0x10, 0x00,
2343 0xff, 0xff, 0x10, 0x00
2346 /* expected test results */
2347 static const int32_t results
[]={
2348 /* number of bytes read, code point */
2359 /* error test input */
2360 static const uint8_t in2
[]={
2361 0x61, 0x00, 0x00, 0x00,
2362 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
2363 0x62, 0x00, 0x00, 0x00,
2364 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2365 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
2366 0x62, 0x01, 0x00, 0x00,
2367 0x62, 0x02, 0x00, 0x00,
2370 /* expected error test results */
2371 static const int32_t results2
[]={
2372 /* number of bytes read, code point */
2379 UConverterToUCallback cb
;
2382 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2383 UErrorCode errorCode
=U_ZERO_ERROR
;
2384 UConverter
*cnv
=ucnv_open("UTF-32LE", &errorCode
);
2385 if(U_FAILURE(errorCode
)) {
2386 log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode
));
2389 TestNextUChar(cnv
, source
, limit
, results
, "UTF-32LE");
2391 /* Test the condition when source >= sourceLimit */
2392 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2394 /* test error behavior with a skip callback */
2395 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
2396 source
=(const char *)in2
;
2397 limit
=(const char *)(in2
+sizeof(in2
));
2398 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32LE");
2406 static const uint8_t in
[]={
2415 /* expected test results */
2416 static const int32_t results
[]={
2417 /* number of bytes read, code point */
2425 static const uint16_t in1
[] = {
2426 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2427 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2428 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2429 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2430 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2431 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2432 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2433 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2434 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2435 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2436 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2439 static const uint8_t out1
[] = {
2440 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2441 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2442 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2443 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2444 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2445 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2446 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2447 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2448 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2449 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2450 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2453 static const uint16_t in2
[]={
2454 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2455 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2456 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2457 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2458 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2459 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2460 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2461 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2462 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2463 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2464 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2465 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2466 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2467 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2468 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2469 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2470 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2471 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2472 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2473 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2474 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2475 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2476 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2477 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2478 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2479 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2480 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2481 0x37, 0x20, 0x2A, 0x2F,
2483 static const unsigned char out2
[]={
2484 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2485 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2486 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2487 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2488 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2489 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2490 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2491 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2492 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2493 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2494 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2495 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2496 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2497 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2498 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2499 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2500 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2501 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2502 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2503 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2504 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2505 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2506 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2507 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2508 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2509 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2510 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2511 0x37, 0x20, 0x2A, 0x2F,
2513 const char *source
=(const char *)in
;
2514 const char *limit
=(const char *)in
+sizeof(in
);
2516 UErrorCode errorCode
=U_ZERO_ERROR
;
2517 UConverter
*cnv
=ucnv_open("LATIN_1", &errorCode
);
2518 if(U_FAILURE(errorCode
)) {
2519 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode
));
2522 TestNextUChar(cnv
, source
, limit
, results
, "LATIN_1");
2523 /* Test the condition when source >= sourceLimit */
2524 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2525 TestConv((uint16_t*)in1
,sizeof(in1
)/2,"LATIN_1","LATIN-1",(char*)out1
,sizeof(out1
));
2526 TestConv((uint16_t*)in2
,sizeof(in2
)/2,"ASCII","ASCII",(char*)out2
,sizeof(out2
));
2534 static const uint8_t in
[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2535 /* expected test results */
2536 static const int32_t results
[]={
2537 /* number of bytes read, code point */
2546 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2547 UErrorCode errorCode
=U_ZERO_ERROR
;
2548 UConverter
*cnv
=ucnv_open("x-mac-turkish", &errorCode
);
2549 if(U_FAILURE(errorCode
)) {
2550 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode
));
2553 TestNextUChar(cnv
, source
, limit
, results
, "SBCS(x-mac-turkish)");
2554 /* Test the condition when source >= sourceLimit */
2555 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2556 /*Test for Illegal character */ /*
2558 static const uint8_t input1[]={ 0xA1 };
2559 const char* illegalsource=(const char*)input1;
2560 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2569 static const uint8_t in
[]={
2578 /* expected test results */
2579 static const int32_t results
[]={
2580 /* number of bytes read, code point */
2588 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2589 UErrorCode errorCode
=U_ZERO_ERROR
;
2591 UConverter
*cnv
=my_ucnv_open("@ibm9027", &errorCode
);
2592 if(U_FAILURE(errorCode
)) {
2593 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode
));
2596 TestNextUChar(cnv
, source
, limit
, results
, "DBCS(@ibm9027)");
2597 /* Test the condition when source >= sourceLimit */
2598 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2599 /*Test for the condition where there is an invalid character*/
2601 static const uint8_t source2
[]={0x1a, 0x1b};
2602 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character");
2604 /*Test for the condition where we have a truncated char*/
2606 static const uint8_t source1
[]={0xc4};
2607 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2608 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2616 static const uint8_t in
[]={
2627 /* expected test results */
2628 static const int32_t results
[]={
2629 /* number of bytes read, code point */
2639 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2640 UErrorCode errorCode
=U_ZERO_ERROR
;
2642 UConverter
*cnv
=ucnv_open("ibm-1363", &errorCode
);
2643 if(U_FAILURE(errorCode
)) {
2644 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode
));
2647 TestNextUChar(cnv
, source
, limit
, results
, "MBCS(ibm-1363)");
2648 /* Test the condition when source >= sourceLimit */
2649 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2650 /*Test for the condition where there is an invalid character*/
2652 static const uint8_t source2
[]={0xa1, 0x80};
2653 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character");
2655 /*Test for the condition where we have a truncated char*/
2657 static const uint8_t source1
[]={0xc4};
2658 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2659 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2665 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2668 /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2670 const char *cnvName
= "ibm-1363";
2671 UErrorCode status
= U_ZERO_ERROR
;
2672 const char sourceData
[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2673 /* UChar expectUData[] = { 0x00a1, 0x001a }; */
2674 const char *source
= sourceData
;
2675 const char *sourceLim
= sourceData
+sizeof(sourceData
);
2677 UConverter
*cnv
=ucnv_open(cnvName
, &status
);
2678 if(U_FAILURE(status
)) {
2679 log_data_err("Unable to open %s converter: %s\n", cnvName
, u_errorName(status
));
2685 UChar targetBuf
[256];
2686 UChar
*target
= targetBuf
;
2687 UChar
*targetLim
= target
+256;
2688 ucnv_toUnicode(cnv
, &target
, targetLim
, &source
, sourceLim
, NULL
, TRUE
, &status
);
2690 log_info("After convert: target@%d, source@%d, status%s\n",
2691 target
-targetBuf
, source
-sourceData
, u_errorName(status
));
2693 if(U_FAILURE(status
)) {
2694 log_err("Failed to convert: %s\n", u_errorName(status
));
2701 c1
=ucnv_getNextUChar(cnv
, &source
, sourceLim
, &status
);
2702 log_verbose("c1: U+%04X, source@%d, status %s\n", c1
, source
-sourceData
, u_errorName(status
));
2704 c2
=ucnv_getNextUChar(cnv
, &source
, sourceLim
, &status
);
2705 log_verbose("c2: U+%04X, source@%d, status %s\n", c2
, source
-sourceData
, u_errorName(status
));
2707 c3
=ucnv_getNextUChar(cnv
, &source
, sourceLim
, &status
);
2708 log_verbose("c3: U+%04X, source@%d, status %s\n", c3
, source
-sourceData
, u_errorName(status
));
2710 if(status
==U_INDEX_OUTOFBOUNDS_ERROR
&& c3
==0xFFFF) {
2711 log_verbose("OK\n");
2713 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2721 #ifdef U_ENABLE_GENERIC_ISO_2022
2726 static const uint8_t in
[]={
2733 0xf0, 0x90, 0x80, 0x80
2738 /* expected test results */
2739 static const int32_t results
[]={
2740 /* number of bytes read, code point */
2741 4, 0x0031, /* 4 bytes including the escape sequence */
2749 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2750 UErrorCode errorCode
=U_ZERO_ERROR
;
2753 cnv
=ucnv_open("ISO_2022", &errorCode
);
2754 if(U_FAILURE(errorCode
)) {
2755 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
2758 TestNextUChar(cnv
, source
, limit
, results
, "ISO_2022");
2760 /* Test the condition when source >= sourceLimit */
2761 TestNextUCharError(cnv
, source
, source
-1, U_ILLEGAL_ARGUMENT_ERROR
, "sourceLimit < source");
2762 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2763 /*Test for the condition where we have a truncated char*/
2765 static const uint8_t source1
[]={0xc4};
2766 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2767 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2769 /*Test for the condition where there is an invalid character*/
2771 static const uint8_t source2
[]={0xa1, 0x01};
2772 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ILLEGAL_CHAR_FOUND
, "an invalid character");
2780 TestSmallTargetBuffer(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2781 const UChar
* uSource
;
2782 const UChar
* uSourceLimit
;
2783 const char* cSource
;
2784 const char* cSourceLimit
;
2785 UChar
*uTargetLimit
=NULL
;
2788 const char *cTargetLimit
;
2790 UChar
*uBuf
; /*,*test;*/
2791 int32_t uBufSize
= 120;
2794 UErrorCode errorCode
=U_ZERO_ERROR
;
2795 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2796 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
2799 uSource
= (UChar
*) source
;
2800 uSourceLimit
=(const UChar
*)sourceLimit
;
2804 cTargetLimit
= cBuf
;
2805 uTargetLimit
= uBuf
;
2809 cTargetLimit
= cTargetLimit
+ i
;
2810 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,FALSE
, &errorCode
);
2811 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2812 errorCode
=U_ZERO_ERROR
;
2816 if(U_FAILURE(errorCode
)){
2817 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2821 }while (uSource
<uSourceLimit
);
2823 cSourceLimit
=cTarget
;
2825 uTargetLimit
=uTargetLimit
+i
;
2826 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,FALSE
,&errorCode
);
2827 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2828 errorCode
=U_ZERO_ERROR
;
2831 if(U_FAILURE(errorCode
)){
2832 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2835 }while(cSource
<cSourceLimit
);
2839 for(len
=0;len
<(int)(source
- sourceLimit
);len
++){
2840 if(uBuf
[len
]!=uSource
[len
]){
2841 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource
[len
],(int)uBuf
[len
]) ;
2848 /* Test for Jitterbug 778 */
2849 static void TestToAndFromUChars(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2850 const UChar
* uSource
;
2851 const UChar
* uSourceLimit
;
2852 const char* cSource
;
2853 UChar
*uTargetLimit
=NULL
;
2856 const char *cTargetLimit
;
2859 int32_t uBufSize
= 120;
2860 int numCharsInTarget
=0;
2861 UErrorCode errorCode
=U_ZERO_ERROR
;
2862 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2863 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
2865 uSourceLimit
=sourceLimit
;
2867 cTargetLimit
= cBuf
+uBufSize
*5;
2869 uTargetLimit
= uBuf
+ uBufSize
*5;
2871 numCharsInTarget
=ucnv_fromUChars(cnv
, cTarget
, (int32_t)(cTargetLimit
-cTarget
), uSource
, (int32_t)(uSourceLimit
-uSource
), &errorCode
);
2872 if(U_FAILURE(errorCode
)){
2873 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2878 ucnv_toUChars(cnv
,uTarget
,(int32_t)(uTargetLimit
-uTarget
),cSource
,numCharsInTarget
,&errorCode
);
2879 if(U_FAILURE(errorCode
)){
2880 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode
));
2884 while(uSource
<uSourceLimit
){
2885 if(*test
!=*uSource
){
2887 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
2896 static void TestSmallSourceBuffer(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2897 const UChar
* uSource
;
2898 const UChar
* uSourceLimit
;
2899 const char* cSource
;
2900 const char* cSourceLimit
;
2901 UChar
*uTargetLimit
=NULL
;
2904 const char *cTargetLimit
;
2906 UChar
*uBuf
; /*,*test;*/
2907 int32_t uBufSize
= 120;
2910 const UChar
*temp
= sourceLimit
;
2911 UErrorCode errorCode
=U_ZERO_ERROR
;
2912 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2913 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
2917 uSource
= (UChar
*) source
;
2921 cTargetLimit
= cBuf
;
2922 uTargetLimit
= uBuf
+uBufSize
*5;
2923 cTargetLimit
= cTargetLimit
+uBufSize
*10;
2924 uSourceLimit
=uSource
;
2927 if (uSourceLimit
< sourceLimit
) {
2928 uSourceLimit
= uSourceLimit
+1;
2930 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,FALSE
, &errorCode
);
2931 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2932 errorCode
=U_ZERO_ERROR
;
2936 if(U_FAILURE(errorCode
)){
2937 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2941 }while (uSource
<temp
);
2945 if (cSourceLimit
< cBuf
+ (cTarget
- cBuf
)) {
2946 cSourceLimit
= cSourceLimit
+1;
2948 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,FALSE
,&errorCode
);
2949 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2950 errorCode
=U_ZERO_ERROR
;
2953 if(U_FAILURE(errorCode
)){
2954 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2957 }while(cSource
<cTarget
);
2961 for(;len
<(int)(source
- sourceLimit
);len
++){
2962 if(uBuf
[len
]!=uSource
[len
]){
2963 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource
[len
],(int)uBuf
[len
]) ;
2971 TestGetNextUChar2022(UConverter
* cnv
, const char* source
, const char* limit
,
2972 const uint16_t results
[], const char* message
){
2973 /* const char* s0; */
2974 const char* s
=(char*)source
;
2975 const uint16_t *r
=results
;
2976 UErrorCode errorCode
=U_ZERO_ERROR
;
2981 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
2982 if(errorCode
==U_INDEX_OUTOFBOUNDS_ERROR
) {
2983 break; /* no more significant input */
2984 } else if(U_FAILURE(errorCode
)) {
2985 log_err("%s ucnv_getNextUChar() failed: %s\n", message
, u_errorName(errorCode
));
2988 if(U16_IS_LEAD(*r
)){
2990 U16_NEXT(r
, i
, len
, exC
);
2995 if(c
!=(uint32_t)(exC
))
2996 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message
,(uint32_t) (*r
),c
);
3002 static int TestJitterbug930(const char* enc
){
3003 UErrorCode err
= U_ZERO_ERROR
;
3004 UConverter
*converter
;
3008 const UChar
*source
= in
;
3010 int32_t* offsets
= off
;
3011 int numOffWritten
=0;
3013 converter
= my_ucnv_open(enc
, &err
);
3015 in
[0] = 0x41; /* 0x4E00;*/
3020 memset(off
, '*', sizeof(off
));
3022 ucnv_fromUnicode (converter
,
3031 /* writes three bytes into the output buffer: 41 1B 24
3032 * but offsets contains 0 1 1
3034 while(*offsets
< off
[10]){
3038 log_verbose("Testing Jitterbug 930 for encoding %s",enc
);
3039 if(numOffWritten
!= (int)(target
-out
)){
3040 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc
, (int)(target
-out
),numOffWritten
);
3045 memset(off
,'*' , sizeof(off
));
3049 ucnv_fromUnicode (converter
,
3058 while(*offsets
< off
[10]){
3061 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc
,-1,*offsets
) ;
3066 /* writes 42 43 7A into output buffer,
3067 * offsets contains -1 -1 -1
3069 ucnv_close(converter
);
3076 static const uint16_t in
[]={
3077 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3078 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3079 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3080 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3081 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3082 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3083 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3084 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3085 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3086 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3087 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3088 0x005A, 0x005B, 0x005C, 0x000A
3090 const UChar
* uSource
;
3091 const UChar
* uSourceLimit
;
3092 const char* cSource
;
3093 const char* cSourceLimit
;
3094 UChar
*uTargetLimit
=NULL
;
3097 const char *cTargetLimit
;
3101 int32_t uBufSize
= 120;
3102 UErrorCode errorCode
=U_ZERO_ERROR
;
3103 UConverter
*cnv
= NULL
;
3104 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3105 int32_t* myOff
= offsets
;
3106 cnv
=ucnv_open("HZ", &errorCode
);
3107 if(U_FAILURE(errorCode
)) {
3108 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode
));
3112 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3113 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3114 uSource
= (const UChar
*)in
;
3115 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
3117 cTargetLimit
= cBuf
+uBufSize
*5;
3119 uTargetLimit
= uBuf
+ uBufSize
*5;
3120 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3121 if(U_FAILURE(errorCode
)){
3122 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3126 cSourceLimit
=cTarget
;
3129 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3130 if(U_FAILURE(errorCode
)){
3131 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3134 uSource
= (const UChar
*)in
;
3135 while(uSource
<uSourceLimit
){
3136 if(*test
!=*uSource
){
3138 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3143 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "HZ encoding");
3144 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3145 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3146 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3147 TestJitterbug930("csISO2022JP");
3159 static const uint16_t in
[]={
3160 /* test full range of Devanagari */
3161 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3162 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3163 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3164 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3165 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3166 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3167 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3168 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3169 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3170 0x096D,0x096E,0x096F,
3171 /* test Soft halant*/
3172 0x0915,0x094d, 0x200D,
3173 /* test explicit halant */
3174 0x0915,0x094d, 0x200c,
3175 /* test double danda */
3178 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3179 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3180 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3181 /* tests from Lotus */
3182 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3183 0x0930,0x094D,0x200D,
3184 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3185 0x0915,0x0921,0x002B,0x095F,
3187 0x0B86, 0xB87, 0xB88,
3189 0x0C05, 0x0C02, 0x0C03,0x0c31,
3191 0x0C85, 0xC82, 0x0C83,
3192 /* test Abbr sign and Anudatta */
3202 0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3203 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3206 0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3207 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3208 0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3209 0x093D /* Avagraha 0xEA, 0xE9*/,
3217 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3219 static const unsigned char byteArr
[]={
3221 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3222 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3223 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3224 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3225 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3226 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3227 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3228 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3229 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3231 /* test soft halant */
3233 /* test explicit halant */
3235 /* test double danda */
3238 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3239 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3240 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3243 /* tests from Lotus */
3244 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3245 0xEF,0x42,0xCF,0xE8,0xD9,
3246 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3247 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3249 0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3251 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3253 0xEF, 0x48,0xa4, 0xa2, 0xa3,
3254 /* anudatta and abbreviation sign */
3255 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3258 0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3260 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3262 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3264 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3266 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3268 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3270 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3272 0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3274 0xB3, 0xE9, /* Ka + NUKTA */
3276 0xB4, 0xE9, /* Kha + NUKTA */
3278 0xB5, 0xE9, /* Ga + NUKTA */
3290 /* just consume unhandled codepoints */
3294 testConvertToU(byteArr
,(sizeof(byteArr
)),in
,UPRV_LENGTHOF(in
),"x-iscii-de",NULL
,TRUE
);
3295 TestConv(in
,(sizeof(in
)/2),"ISCII,version=0","hindi", (char *)byteArr
,sizeof(byteArr
));
3302 static const uint16_t in
[]={
3303 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3304 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3305 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3306 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3307 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3308 0x201D, 0x3014, 0x000D, 0x000A,
3309 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3310 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3312 const UChar
* uSource
;
3313 const UChar
* uSourceLimit
;
3314 const char* cSource
;
3315 const char* cSourceLimit
;
3316 UChar
*uTargetLimit
=NULL
;
3319 const char *cTargetLimit
;
3323 int32_t uBufSize
= 120;
3324 UErrorCode errorCode
=U_ZERO_ERROR
;
3325 UConverter
*cnv
= NULL
;
3326 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3327 int32_t* myOff
= offsets
;
3328 cnv
=ucnv_open("ISO_2022_JP_1", &errorCode
);
3329 if(U_FAILURE(errorCode
)) {
3330 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode
));
3334 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3335 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3336 uSource
= (const UChar
*)in
;
3337 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
3339 cTargetLimit
= cBuf
+uBufSize
*5;
3341 uTargetLimit
= uBuf
+ uBufSize
*5;
3342 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3343 if(U_FAILURE(errorCode
)){
3344 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3348 cSourceLimit
=cTarget
;
3351 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3352 if(U_FAILURE(errorCode
)){
3353 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3357 uSource
= (const UChar
*)in
;
3358 while(uSource
<uSourceLimit
){
3359 if(*test
!=*uSource
){
3361 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3367 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3368 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3369 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-JP encoding");
3370 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3371 TestJitterbug930("csISO2022JP");
3380 static void TestConv(const uint16_t in
[],int len
, const char* conv
, const char* lang
, char byteArr
[],int byteArrLen
){
3381 const UChar
* uSource
;
3382 const UChar
* uSourceLimit
;
3383 const char* cSource
;
3384 const char* cSourceLimit
;
3385 UChar
*uTargetLimit
=NULL
;
3388 const char *cTargetLimit
;
3391 int32_t uBufSize
= 120*10;
3392 UErrorCode errorCode
=U_ZERO_ERROR
;
3394 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) );
3395 int32_t* myOff
= offsets
;
3396 cnv
=my_ucnv_open(conv
, &errorCode
);
3397 if(U_FAILURE(errorCode
)) {
3398 log_data_err("Unable to open a %s converter: %s\n", conv
, u_errorName(errorCode
));
3402 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
));
3403 cBuf
=(char*)malloc(uBufSize
* sizeof(char));
3404 uSource
= (const UChar
*)in
;
3405 uSourceLimit
=uSource
+len
;
3407 cTargetLimit
= cBuf
+uBufSize
;
3409 uTargetLimit
= uBuf
+ uBufSize
;
3410 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3411 if(U_FAILURE(errorCode
)){
3412 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3415 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3417 cSourceLimit
=cTarget
;
3420 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3421 if(U_FAILURE(errorCode
)){
3422 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode
));
3426 uSource
= (const UChar
*)in
;
3427 while(uSource
<uSourceLimit
){
3428 if(*test
!=*uSource
){
3429 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv
,*uSource
,(int)*test
) ;
3434 TestSmallTargetBuffer(in
,(const UChar
*)&in
[len
],cnv
);
3435 TestSmallSourceBuffer(in
,(const UChar
*)&in
[len
],cnv
);
3436 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, conv
);
3437 if(byteArr
&& byteArrLen
!=0){
3438 TestGetNextUChar2022(cnv
, byteArr
, (byteArr
+byteArrLen
), in
, lang
);
3439 TestToAndFromUChars(in
,(const UChar
*)&in
[len
],cnv
);
3442 cSourceLimit
= cSource
+byteArrLen
;
3445 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3446 if(U_FAILURE(errorCode
)){
3447 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3451 uSource
= (const UChar
*)in
;
3452 while(uSource
<uSourceLimit
){
3453 if(*test
!=*uSource
){
3454 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3467 static UChar U_CALLCONV
3468 _charAt(int32_t offset
, void *context
) {
3469 return ((char*)context
)[offset
];
3473 unescape(UChar
* dst
, int32_t dstLen
,const char* src
,int32_t srcLen
,UErrorCode
*status
){
3476 if(U_FAILURE(*status
)){
3479 if((dst
==NULL
&& dstLen
>0) || (src
==NULL
) || dstLen
< -1 || srcLen
<-1 ){
3480 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
3484 srcLen
= (int32_t)uprv_strlen(src
);
3487 for (; srcIndex
<srcLen
; ) {
3488 UChar32 c
= src
[srcIndex
++];
3489 if (c
== 0x005C /*'\\'*/) {
3490 c
= u_unescapeAt(_charAt
,&srcIndex
,srcLen
,(void*)src
); /* advances i*/
3491 if (c
== (UChar32
)0xFFFFFFFF) {
3492 *status
=U_INVALID_CHAR_FOUND
; /* return empty string */
3493 break; /* invalid escape sequence */
3496 if(dstIndex
< dstLen
){
3498 dst
[dstIndex
++] = U16_LEAD(c
);
3499 if(dstIndex
<dstLen
){
3500 dst
[dstIndex
]=U16_TRAIL(c
);
3502 *status
=U_BUFFER_OVERFLOW_ERROR
;
3505 dst
[dstIndex
]=(UChar
)c
;
3509 *status
= U_BUFFER_OVERFLOW_ERROR
;
3511 dstIndex
++; /* for preflighting */
3517 TestFullRoundtrip(const char* cp
){
3518 UChar usource
[10] ={0};
3519 UChar nsrc
[10] = {0};
3523 /* Test codepoint 0 */
3524 TestConv(usource
,1,cp
,"",NULL
,0);
3525 TestConv(usource
,2,cp
,"",NULL
,0);
3527 TestConv(nsrc
,3,cp
,"",NULL
,0);
3529 for(;i
<=0x10FFFF;i
++){
3535 usource
[0] =(UChar
) i
;
3538 usource
[0]=U16_LEAD(i
);
3539 usource
[1]=U16_TRAIL(i
);
3546 /* Test only single code points */
3547 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3548 /* Test codepoint repeated twice */
3549 usource
[ulen
]=usource
[0];
3550 usource
[ulen
+1]=usource
[1];
3552 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3553 /* Test codepoint repeated 3 times */
3554 usource
[ulen
]=usource
[0];
3555 usource
[ulen
+1]=usource
[1];
3557 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3558 /* Test codepoint in between 2 codepoints */
3562 TestConv(nsrc
,len
+2,cp
,"",NULL
,0);
3563 uprv_memset(usource
,0,sizeof(UChar
)*10);
3568 TestRoundTrippingAllUTF(void){
3569 if(!getTestOption(QUICK_OPTION
)){
3570 log_verbose("Running exhaustive round trip test for BOCU-1\n");
3571 TestFullRoundtrip("BOCU-1");
3572 log_verbose("Running exhaustive round trip test for SCSU\n");
3573 TestFullRoundtrip("SCSU");
3574 log_verbose("Running exhaustive round trip test for UTF-8\n");
3575 TestFullRoundtrip("UTF-8");
3576 log_verbose("Running exhaustive round trip test for CESU-8\n");
3577 TestFullRoundtrip("CESU-8");
3578 log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3579 TestFullRoundtrip("UTF-16BE");
3580 log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3581 TestFullRoundtrip("UTF-16LE");
3582 log_verbose("Running exhaustive round trip test for UTF-16\n");
3583 TestFullRoundtrip("UTF-16");
3584 log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3585 TestFullRoundtrip("UTF-32BE");
3586 log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3587 TestFullRoundtrip("UTF-32LE");
3588 log_verbose("Running exhaustive round trip test for UTF-32\n");
3589 TestFullRoundtrip("UTF-32");
3590 log_verbose("Running exhaustive round trip test for UTF-7\n");
3591 TestFullRoundtrip("UTF-7");
3592 log_verbose("Running exhaustive round trip test for UTF-7\n");
3593 TestFullRoundtrip("UTF-7,version=1");
3594 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3595 TestFullRoundtrip("IMAP-mailbox-name");
3598 * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
3599 * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
3600 * The old mappings remain as fallbacks.
3601 * This test may be reintroduced at a later time.
3606 log_verbose("Running exhaustive round trip test for GB18030\n");
3607 TestFullRoundtrip("GB18030");
3615 static const uint16_t germanUTF16
[]={
3616 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3619 static const uint8_t germanSCSU
[]={
3620 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3623 static const uint16_t russianUTF16
[]={
3624 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3627 static const uint8_t russianSCSU
[]={
3628 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3631 static const uint16_t japaneseUTF16
[]={
3632 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3633 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3634 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3635 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3636 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3637 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3638 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3639 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3640 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3641 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3642 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3643 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3644 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3645 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3646 0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3649 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3650 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3651 static const uint8_t japaneseSCSU
[]={
3652 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3653 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3654 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3655 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3656 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3657 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3658 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3659 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3660 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3661 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3662 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3666 static const uint16_t allFeaturesUTF16
[]={
3667 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3668 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3669 0x01df, 0xf000, 0xdbff, 0xdfff
3672 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3673 * result here (34B vs. 35B)
3675 static const uint8_t allFeaturesSCSU
[]={
3676 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3677 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3678 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3679 0xdf, 0x14, 0x80, 0x15, 0xff
3681 static const uint16_t monkeyIn
[]={
3682 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3683 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3684 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3685 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3686 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3687 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3688 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3689 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3690 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3691 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3692 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3693 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3694 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3695 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3696 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3697 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3698 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3699 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3700 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3701 /* test non-BMP code points */
3702 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3703 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3704 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3705 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3706 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3707 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3708 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3709 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3710 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3711 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3712 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3715 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3716 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3717 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3718 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3719 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3721 static const char *fTestCases
[] = {
3722 "\\ud800\\udc00", /* smallest surrogate*/
3724 "\\udBff\\udFff", /* largest surrogate pair*/
3727 "Hello \\u9292 \\u9192 World!",
3728 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3729 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3731 "\\u0648\\u06c8", /* catch missing reset*/
3734 "\\u4444\\uE001", /* lowest quotable*/
3735 "\\u4444\\uf2FF", /* highest quotable*/
3736 "\\u4444\\uf188\\u4444",
3737 "\\u4444\\uf188\\uf288",
3738 "\\u4444\\uf188abc\\u0429\\uf288",
3740 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3741 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3742 "Hello World!123456",
3743 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3745 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/
3746 "abc\\u4411d", /* uses SQU*/
3747 "abc\\u4411\\u4412d",/* uses SCU*/
3748 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3749 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3751 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3752 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3753 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3755 "", /* empty input*/
3756 "\\u0000", /* smallest BMP character*/
3757 "\\uFFFF", /* largest BMP character*/
3759 /* regression tests*/
3760 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3761 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3762 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3763 "\\u0041\\u00df\\u0401\\u015f",
3764 "\\u9066\\u2123abc",
3765 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3766 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3769 for(;i
<UPRV_LENGTHOF(fTestCases
);i
++){
3770 const char* cSrc
= fTestCases
[i
];
3771 UErrorCode status
= U_ZERO_ERROR
;
3772 int32_t cSrcLen
,srcLen
;
3774 /* UConverter* cnv = ucnv_open("SCSU",&status); */
3775 cSrcLen
= srcLen
= (int32_t)uprv_strlen(fTestCases
[i
]);
3776 src
= (UChar
*) malloc((sizeof(UChar
) * srcLen
) + sizeof(UChar
));
3777 srcLen
=unescape(src
,srcLen
,cSrc
,cSrcLen
,&status
);
3778 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc
,i
);
3779 TestConv(src
,srcLen
,"SCSU","Coverage",NULL
,0);
3782 TestConv(allFeaturesUTF16
,(sizeof(allFeaturesUTF16
)/2),"SCSU","all features", (char *)allFeaturesSCSU
,sizeof(allFeaturesSCSU
));
3783 TestConv(allFeaturesUTF16
,(sizeof(allFeaturesUTF16
)/2),"SCSU","all features",(char *)allFeaturesSCSU
,sizeof(allFeaturesSCSU
));
3784 TestConv(japaneseUTF16
,(sizeof(japaneseUTF16
)/2),"SCSU","japaneese",(char *)japaneseSCSU
,sizeof(japaneseSCSU
));
3785 TestConv(japaneseUTF16
,(sizeof(japaneseUTF16
)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU
,sizeof(japaneseSCSU
));
3786 TestConv(germanUTF16
,(sizeof(germanUTF16
)/2),"SCSU","german",(char *)germanSCSU
,sizeof(germanSCSU
));
3787 TestConv(russianUTF16
,(sizeof(russianUTF16
)/2), "SCSU","russian",(char *)russianSCSU
,sizeof(russianSCSU
));
3788 TestConv(monkeyIn
,(sizeof(monkeyIn
)/2),"SCSU","monkey",NULL
,0);
3791 #if !UCONFIG_NO_LEGACY_CONVERSION
3792 static void TestJitterbug2346(){
3793 char source
[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3794 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3795 uint16_t expected
[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3797 UChar uTarget
[500]={'\0'};
3798 UChar
* utarget
=uTarget
;
3799 UChar
* utargetLimit
=uTarget
+sizeof(uTarget
)/2;
3801 char cTarget
[500]={'\0'};
3802 char* ctarget
=cTarget
;
3803 char* ctargetLimit
=cTarget
+sizeof(cTarget
);
3804 const char* csource
=source
;
3805 UChar
* temp
= expected
;
3806 UErrorCode err
=U_ZERO_ERROR
;
3808 UConverter
* conv
=ucnv_open("ISO_2022_JP",&err
);
3809 if(U_FAILURE(err
)) {
3810 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err
));
3813 ucnv_toUnicode(conv
,&utarget
,utargetLimit
,&csource
,csource
+sizeof(source
),NULL
,TRUE
,&err
);
3814 if(U_FAILURE(err
)) {
3815 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err
));
3818 utargetLimit
=utarget
;
3820 while(utarget
<utargetLimit
){
3821 if(*temp
!=*utarget
){
3823 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget
,(int)*temp
) ;
3828 ucnv_fromUnicode(conv
,&ctarget
,ctargetLimit
,(const UChar
**)&utarget
,utargetLimit
,NULL
,TRUE
,&err
);
3829 if(U_FAILURE(err
)) {
3830 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err
));
3833 ctargetLimit
=ctarget
;
3841 TestISO_2022_JP_1() {
3843 static const uint16_t in
[]={
3844 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3845 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3846 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3847 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3848 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3849 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3850 0x201D, 0x000D, 0x000A,
3851 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3852 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3853 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3854 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3855 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3856 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3858 const UChar
* uSource
;
3859 const UChar
* uSourceLimit
;
3860 const char* cSource
;
3861 const char* cSourceLimit
;
3862 UChar
*uTargetLimit
=NULL
;
3865 const char *cTargetLimit
;
3868 int32_t uBufSize
= 120;
3869 UErrorCode errorCode
=U_ZERO_ERROR
;
3872 cnv
=ucnv_open("ISO_2022_JP_1", &errorCode
);
3873 if(U_FAILURE(errorCode
)) {
3874 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
3878 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3879 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3880 uSource
= (const UChar
*)in
;
3881 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
3883 cTargetLimit
= cBuf
+uBufSize
*5;
3885 uTargetLimit
= uBuf
+ uBufSize
*5;
3886 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,TRUE
, &errorCode
);
3887 if(U_FAILURE(errorCode
)){
3888 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3892 cSourceLimit
=cTarget
;
3894 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,TRUE
,&errorCode
);
3895 if(U_FAILURE(errorCode
)){
3896 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3899 uSource
= (const UChar
*)in
;
3900 while(uSource
<uSourceLimit
){
3901 if(*test
!=*uSource
){
3903 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3909 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3910 /*Test for the condition where there is an invalid character*/
3913 static const uint8_t source2
[]={0x0e,0x24,0x053};
3914 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-JP-1]");
3916 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3917 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3924 TestISO_2022_JP_2() {
3926 static const uint16_t in
[]={
3927 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3928 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3929 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3930 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3931 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3932 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3933 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3934 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3935 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3936 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3937 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3938 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3939 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3940 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3941 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3942 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3943 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3944 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3945 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3947 const UChar
* uSource
;
3948 const UChar
* uSourceLimit
;
3949 const char* cSource
;
3950 const char* cSourceLimit
;
3951 UChar
*uTargetLimit
=NULL
;
3954 const char *cTargetLimit
;
3958 int32_t uBufSize
= 120;
3959 UErrorCode errorCode
=U_ZERO_ERROR
;
3960 UConverter
*cnv
= NULL
;
3961 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3962 int32_t* myOff
= offsets
;
3963 cnv
=ucnv_open("ISO_2022_JP_2", &errorCode
);
3964 if(U_FAILURE(errorCode
)) {
3965 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
3969 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3970 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3971 uSource
= (const UChar
*)in
;
3972 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
3974 cTargetLimit
= cBuf
+uBufSize
*5;
3976 uTargetLimit
= uBuf
+ uBufSize
*5;
3977 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3978 if(U_FAILURE(errorCode
)){
3979 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3983 cSourceLimit
=cTarget
;
3986 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3987 if(U_FAILURE(errorCode
)){
3988 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3991 uSource
= (const UChar
*)in
;
3992 while(uSource
<uSourceLimit
){
3993 if(*test
!=*uSource
){
3995 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
4000 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4001 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4002 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4003 /*Test for the condition where there is an invalid character*/
4006 static const uint8_t source2
[]={0x0e,0x24,0x053};
4007 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-JP-2]");
4020 static const uint16_t in
[]={
4021 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4022 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4023 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4024 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4025 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4026 ,0x53E3,0x53E4,0x000A,0x000D};
4027 const UChar
* uSource
;
4028 const UChar
* uSourceLimit
;
4029 const char* cSource
;
4030 const char* cSourceLimit
;
4031 UChar
*uTargetLimit
=NULL
;
4034 const char *cTargetLimit
;
4038 int32_t uBufSize
= 120;
4039 UErrorCode errorCode
=U_ZERO_ERROR
;
4040 UConverter
*cnv
= NULL
;
4041 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4042 int32_t* myOff
= offsets
;
4043 cnv
=ucnv_open("ISO_2022,locale=kr", &errorCode
);
4044 if(U_FAILURE(errorCode
)) {
4045 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4049 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4050 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
4051 uSource
= (const UChar
*)in
;
4052 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
4054 cTargetLimit
= cBuf
+uBufSize
*5;
4056 uTargetLimit
= uBuf
+ uBufSize
*5;
4057 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4058 if(U_FAILURE(errorCode
)){
4059 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4063 cSourceLimit
=cTarget
;
4066 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4067 if(U_FAILURE(errorCode
)){
4068 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4071 uSource
= (const UChar
*)in
;
4072 while(uSource
<uSourceLimit
){
4073 if(*test
!=*uSource
){
4074 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,*test
) ;
4079 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-KR encoding");
4080 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4081 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4082 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4083 TestJitterbug930("csISO2022KR");
4084 /*Test for the condition where there is an invalid character*/
4087 static const uint8_t source2
[]={0x1b,0x24,0x053};
4088 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
4089 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ILLEGAL_ESCAPE_SEQUENCE
, "an invalid character [ISO-2022-KR]");
4100 TestISO_2022_KR_1() {
4102 static const uint16_t in
[]={
4103 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4104 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4105 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4106 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4107 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4108 ,0x53E3,0x53E4,0x000A,0x000D};
4109 const UChar
* uSource
;
4110 const UChar
* uSourceLimit
;
4111 const char* cSource
;
4112 const char* cSourceLimit
;
4113 UChar
*uTargetLimit
=NULL
;
4116 const char *cTargetLimit
;
4120 int32_t uBufSize
= 120;
4121 UErrorCode errorCode
=U_ZERO_ERROR
;
4122 UConverter
*cnv
= NULL
;
4123 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4124 int32_t* myOff
= offsets
;
4125 cnv
=ucnv_open("ibm-25546", &errorCode
);
4126 if(U_FAILURE(errorCode
)) {
4127 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4131 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4132 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
4133 uSource
= (const UChar
*)in
;
4134 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
4136 cTargetLimit
= cBuf
+uBufSize
*5;
4138 uTargetLimit
= uBuf
+ uBufSize
*5;
4139 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4140 if(U_FAILURE(errorCode
)){
4141 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4145 cSourceLimit
=cTarget
;
4148 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4149 if(U_FAILURE(errorCode
)){
4150 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4153 uSource
= (const UChar
*)in
;
4154 while(uSource
<uSourceLimit
){
4155 if(*test
!=*uSource
){
4156 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,*test
) ;
4162 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-KR encoding");
4163 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4164 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4166 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4167 /*Test for the condition where there is an invalid character*/
4170 static const uint8_t source2
[]={0x1b,0x24,0x053};
4171 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
4172 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ILLEGAL_ESCAPE_SEQUENCE
, "an invalid character [ISO-2022-KR]");
4182 static void TestJitterbug2411(){
4183 static const char* source
= "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4184 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4185 UConverter
* kr
=NULL
, *kr1
=NULL
;
4186 UErrorCode errorCode
= U_ZERO_ERROR
;
4187 UChar tgt
[100]={'\0'};
4188 UChar
* target
= tgt
;
4189 UChar
* targetLimit
= target
+100;
4190 kr
=ucnv_open("iso-2022-kr", &errorCode
);
4191 if(U_FAILURE(errorCode
)) {
4192 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode
));
4195 ucnv_toUnicode(kr
,&target
,targetLimit
,&source
,source
+uprv_strlen(source
),NULL
,TRUE
,&errorCode
);
4196 if(U_FAILURE(errorCode
)) {
4197 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode
));
4200 kr1
= ucnv_open("ibm-25546", &errorCode
);
4201 if(U_FAILURE(errorCode
)) {
4202 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode
));
4206 targetLimit
= target
+100;
4207 ucnv_toUnicode(kr
,&target
,targetLimit
,&source
,source
+uprv_strlen(source
),NULL
,TRUE
,&errorCode
);
4209 if(U_FAILURE(errorCode
)) {
4210 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode
));
4221 /* From Unicode moved to testdata/conversion.txt */
4224 static const uint8_t sampleTextJIS
[] = {
4225 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4226 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4227 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4229 static const uint16_t expectedISO2022JIS
[] = {
4234 static const int32_t toISO2022JISOffs
[]={
4240 static const uint8_t sampleTextJIS7
[] = {
4241 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4242 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4243 0x1b,0x24,0x42,0x21,0x21,
4244 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */
4246 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4248 static const uint16_t expectedISO2022JIS7
[] = {
4256 static const int32_t toISO2022JIS7Offs
[]={
4263 static const uint8_t sampleTextJIS8
[] = {
4264 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4265 0xa1,0xc8,0xd9,/*Katakana Set*/
4268 0xb1,0xc3, /*Katakana Set*/
4269 0x1b,0x24,0x42,0x21,0x21
4271 static const uint16_t expectedISO2022JIS8
[] = {
4273 0xff61, 0xff88, 0xff99,
4278 static const int32_t toISO2022JIS8Offs
[]={
4284 testConvertToU(sampleTextJIS
,sizeof(sampleTextJIS
),expectedISO2022JIS
,
4285 UPRV_LENGTHOF(expectedISO2022JIS
),"JIS", toISO2022JISOffs
,TRUE
);
4286 testConvertToU(sampleTextJIS7
,sizeof(sampleTextJIS7
),expectedISO2022JIS7
,
4287 UPRV_LENGTHOF(expectedISO2022JIS7
),"JIS7", toISO2022JIS7Offs
,TRUE
);
4288 testConvertToU(sampleTextJIS8
,sizeof(sampleTextJIS8
),expectedISO2022JIS8
,
4289 UPRV_LENGTHOF(expectedISO2022JIS8
),"JIS8", toISO2022JIS8Offs
,TRUE
);
4296 ICU
4.4 (ticket
#7314) removes mappings for CNS 11643 planes 3..7
4298 static void TestJitterbug915(){
4299 /* tests for roundtripping of the below sequence
4300 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * /
4301 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4302 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4303 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4304 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4305 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4306 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4308 static const char cSource
[]={
4309 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4310 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4311 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4312 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4313 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4314 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4315 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4316 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4317 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4318 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4319 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4320 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4321 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4322 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4323 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4324 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4325 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4326 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4327 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4328 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4329 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4330 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4331 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4332 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4333 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4334 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4335 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4336 0x37, 0x20, 0x2A, 0x2F
4338 UChar uTarget
[500]={'\0'};
4339 UChar
* utarget
=uTarget
;
4340 UChar
* utargetLimit
=uTarget
+sizeof(uTarget
)/2;
4342 char cTarget
[500]={'\0'};
4343 char* ctarget
=cTarget
;
4344 char* ctargetLimit
=cTarget
+sizeof(cTarget
);
4345 const char* csource
=cSource
;
4346 const char* tempSrc
= cSource
;
4347 UErrorCode err
=U_ZERO_ERROR
;
4349 UConverter
* conv
=ucnv_open("ISO_2022_CN_EXT",&err
);
4350 if(U_FAILURE(err
)) {
4351 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err
));
4354 ucnv_toUnicode(conv
,&utarget
,utargetLimit
,&csource
,csource
+sizeof(cSource
),NULL
,TRUE
,&err
);
4355 if(U_FAILURE(err
)) {
4356 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err
));
4359 utargetLimit
=utarget
;
4361 ucnv_fromUnicode(conv
,&ctarget
,ctargetLimit
,(const UChar
**)&utarget
,utargetLimit
,NULL
,TRUE
,&err
);
4362 if(U_FAILURE(err
)) {
4363 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err
));
4366 ctargetLimit
=ctarget
;
4368 while(ctarget
<ctargetLimit
){
4369 if(*ctarget
!= *tempSrc
){
4370 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget
-cTarget
), *ctarget
,(int)*tempSrc
) ;
4380 TestISO_2022_CN_EXT() {
4382 static const uint16_t in
[]={
4383 /* test Non-BMP code points */
4384 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4385 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4386 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4387 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4388 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4389 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4390 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4391 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4392 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4395 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4396 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4397 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4398 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4399 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4400 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4401 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4402 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4403 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4404 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4405 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4406 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4407 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4408 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4409 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4410 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4411 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4412 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4414 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4418 const UChar
* uSource
;
4419 const UChar
* uSourceLimit
;
4420 const char* cSource
;
4421 const char* cSourceLimit
;
4422 UChar
*uTargetLimit
=NULL
;
4425 const char *cTargetLimit
;
4429 int32_t uBufSize
= 180;
4430 UErrorCode errorCode
=U_ZERO_ERROR
;
4431 UConverter
*cnv
= NULL
;
4432 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4433 int32_t* myOff
= offsets
;
4434 cnv
=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode
);
4435 if(U_FAILURE(errorCode
)) {
4436 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4440 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4441 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
4442 uSource
= (const UChar
*)in
;
4443 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
4445 cTargetLimit
= cBuf
+uBufSize
*5;
4447 uTargetLimit
= uBuf
+ uBufSize
*5;
4448 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4449 if(U_FAILURE(errorCode
)){
4450 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4454 cSourceLimit
=cTarget
;
4457 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4458 if(U_FAILURE(errorCode
)){
4459 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4462 uSource
= (const UChar
*)in
;
4463 while(uSource
<uSourceLimit
){
4464 if(*test
!=*uSource
){
4465 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
4468 log_verbose(" Got: \\u%04X\n",(int)*test
) ;
4473 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4474 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4475 /*Test for the condition where there is an invalid character*/
4478 static const uint8_t source2
[]={0x0e,0x24,0x053};
4479 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-CN-EXT]");
4493 static const uint16_t in
[]={
4495 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4496 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4497 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4498 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4499 0x0020, 0x0045, 0x004e, 0x0044,
4501 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4502 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4503 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4504 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4505 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4506 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4507 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4508 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4509 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4510 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4511 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4512 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4513 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4514 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4515 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4516 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4517 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4520 const UChar
* uSource
;
4521 const UChar
* uSourceLimit
;
4522 const char* cSource
;
4523 const char* cSourceLimit
;
4524 UChar
*uTargetLimit
=NULL
;
4527 const char *cTargetLimit
;
4531 int32_t uBufSize
= 180;
4532 UErrorCode errorCode
=U_ZERO_ERROR
;
4533 UConverter
*cnv
= NULL
;
4534 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4535 int32_t* myOff
= offsets
;
4536 cnv
=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode
);
4537 if(U_FAILURE(errorCode
)) {
4538 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4542 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4543 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
4544 uSource
= (const UChar
*)in
;
4545 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
4547 cTargetLimit
= cBuf
+uBufSize
*5;
4549 uTargetLimit
= uBuf
+ uBufSize
*5;
4550 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4551 if(U_FAILURE(errorCode
)){
4552 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4556 cSourceLimit
=cTarget
;
4559 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4560 if(U_FAILURE(errorCode
)){
4561 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4564 uSource
= (const UChar
*)in
;
4565 while(uSource
<uSourceLimit
){
4566 if(*test
!=*uSource
){
4567 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
4570 log_verbose(" Got: \\u%04X\n",(int)*test
) ;
4575 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-CN encoding");
4576 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4577 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4578 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4579 TestJitterbug930("csISO2022CN");
4580 /*Test for the condition where there is an invalid character*/
4583 static const uint8_t source2
[]={0x0e,0x24,0x053};
4584 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-CN]");
4594 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4596 const char * converterName
;
4597 const char * inputText
;
4598 int inputTextLength
;
4601 /* Callback for TestJitterbug6175, should only get called for empty segment errors */
4602 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context
, UConverterToUnicodeArgs
*toArgs
, const char* codeUnits
,
4603 int32_t length
, UConverterCallbackReason reason
, UErrorCode
* err
) {
4604 // suppress compiler warnings about unused variables
4608 if (reason
> UCNV_IRREGULAR
) {
4611 if (reason
!= UCNV_IRREGULAR
) {
4612 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4614 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4615 *err
= U_ZERO_ERROR
;
4616 ucnv_cbToUWriteSub(toArgs
,0,err
);
4619 enum { kEmptySegmentToUCharsMax
= 64 };
4620 static void TestJitterbug6175(void) {
4621 static const char iso2022jp_a
[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4622 static const char iso2022kr_a
[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4623 static const char iso2022cn_a
[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4624 static const char iso2022cn_b
[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4625 static const char hzGB2312_a
[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4626 static const EmptySegmentTest emptySegmentTests
[] = {
4627 /* converterName inputText inputTextLength */
4628 { "ISO-2022-JP", iso2022jp_a
, sizeof(iso2022jp_a
) },
4629 { "ISO-2022-KR", iso2022kr_a
, sizeof(iso2022kr_a
) },
4630 { "ISO-2022-CN", iso2022cn_a
, sizeof(iso2022cn_a
) },
4631 { "ISO-2022-CN", iso2022cn_b
, sizeof(iso2022cn_b
) },
4632 { "HZ-GB-2312", hzGB2312_a
, sizeof(hzGB2312_a
) },
4636 const EmptySegmentTest
* testPtr
;
4637 for (testPtr
= emptySegmentTests
; testPtr
->converterName
!= NULL
; ++testPtr
) {
4638 UErrorCode err
= U_ZERO_ERROR
;
4639 UConverter
* cnv
= ucnv_open(testPtr
->converterName
, &err
);
4640 if (U_FAILURE(err
)) {
4641 log_data_err("Unable to open %s converter: %s\n", testPtr
->converterName
, u_errorName(err
));
4644 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_EMPTYSEGMENT
, NULL
, NULL
, NULL
, &err
);
4645 if (U_FAILURE(err
)) {
4646 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr
->converterName
, u_errorName(err
));
4651 UChar toUChars
[kEmptySegmentToUCharsMax
];
4652 UChar
* toUCharsPtr
= toUChars
;
4653 const UChar
* toUCharsLimit
= toUCharsPtr
+ kEmptySegmentToUCharsMax
;
4654 const char * inCharsPtr
= testPtr
->inputText
;
4655 const char * inCharsLimit
= inCharsPtr
+ testPtr
->inputTextLength
;
4656 ucnv_toUnicode(cnv
, &toUCharsPtr
, toUCharsLimit
, &inCharsPtr
, inCharsLimit
, NULL
, TRUE
, &err
);
4663 TestEBCDIC_STATEFUL() {
4665 static const uint8_t in
[]={
4674 /* expected test results */
4675 static const int32_t results
[]={
4676 /* number of bytes read, code point */
4685 static const uint8_t in2
[]={
4691 /* expected test results */
4692 static const int32_t results2
[]={
4693 /* number of bytes read, code point */
4698 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
4699 UErrorCode errorCode
=U_ZERO_ERROR
;
4700 UConverter
*cnv
=ucnv_open("ibm-930", &errorCode
);
4701 if(U_FAILURE(errorCode
)) {
4702 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode
));
4705 TestNextUChar(cnv
, source
, limit
, results
, "EBCDIC_STATEFUL(ibm-930)");
4707 /* Test the condition when source >= sourceLimit */
4708 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
4710 /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4712 static const uint8_t source1
[]={0x0f};
4713 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_INDEX_OUTOFBOUNDS_ERROR
, "a character is truncated");
4715 /*Test for the condition where there is an invalid character*/
4718 static const uint8_t source2
[]={0x0e, 0x7F, 0xFF};
4719 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [EBCDIC STATEFUL]");
4722 source
=(const char*)in2
;
4723 limit
=(const char*)in2
+sizeof(in2
);
4724 TestNextUChar(cnv
,source
,limit
,results2
,"EBCDIC_STATEFUL(ibm-930),seq#2");
4732 static const uint8_t in
[]={
4735 0x81, 0x30, 0x81, 0x30,
4739 0x82, 0x35, 0x8f, 0x33,
4740 0x84, 0x31, 0xa4, 0x39,
4741 0x90, 0x30, 0x81, 0x30,
4742 0xe3, 0x32, 0x9a, 0x35
4745 * Feature removed markus 2000-oct-26
4746 * Only some codepages must match surrogate pairs into supplementary code points -
4747 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4748 * GB 18030 provides direct encodings for supplementary code points, therefore
4749 * it must not combine two single-encoded surrogates into one code point.
4751 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4755 /* expected test results */
4756 static const int32_t results
[]={
4757 /* number of bytes read, code point */
4769 /* Feature removed. See comment above. */
4774 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4775 UErrorCode errorCode
=U_ZERO_ERROR
;
4776 UConverter
*cnv
=ucnv_open("gb18030", &errorCode
);
4777 if(U_FAILURE(errorCode
)) {
4778 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode
));
4781 TestNextUChar(cnv
, (const char *)in
, (const char *)in
+sizeof(in
), results
, "gb18030");
4787 /* LMBCS-1 string */
4788 static const uint8_t pszLMBCS
[]={
4797 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4801 /* Unicode UChar32 equivalents */
4802 static const UChar32 pszUnicode32
[]={
4812 0x00023456, /* code point for surrogate pair */
4816 /* Unicode UChar equivalents */
4817 static const UChar pszUnicode
[]={
4827 0xD84D, /* low surrogate */
4828 0xDC56, /* high surrogate */
4832 /* expected test results */
4833 static const int offsets32
[]={
4834 /* number of bytes read, code point */
4848 /* expected test results */
4849 static const int offsets
[]={
4850 /* number of bytes read, code point */
4868 #define NAME_LMBCS_1 "LMBCS-1"
4869 #define NAME_LMBCS_2 "LMBCS-2"
4872 /* Some basic open/close/property tests on some LMBCS converters */
4875 char expected_subchars
[] = {0x3F}; /* ANSI Question Mark */
4876 char new_subchars
[] = {0x7F}; /* subst char used by SmartSuite..*/
4877 char get_subchars
[1];
4878 const char * get_name
;
4882 int8_t len
= sizeof(get_subchars
);
4884 UErrorCode errorCode
=U_ZERO_ERROR
;
4887 cnv1
=ucnv_open(NAME_LMBCS_1
, &errorCode
);
4888 if(U_FAILURE(errorCode
)) {
4889 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
4892 cnv2
=ucnv_open(NAME_LMBCS_2
, &errorCode
);
4893 if(U_FAILURE(errorCode
)) {
4894 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode
));
4899 get_name
= ucnv_getName (cnv1
, &errorCode
);
4900 if (strcmp(NAME_LMBCS_1
,get_name
)){
4901 log_err("Unexpected converter name: %s\n", get_name
);
4903 get_name
= ucnv_getName (cnv2
, &errorCode
);
4904 if (strcmp(NAME_LMBCS_2
,get_name
)){
4905 log_err("Unexpected converter name: %s\n", get_name
);
4908 /* substitution chars */
4909 ucnv_getSubstChars (cnv1
, get_subchars
, &len
, &errorCode
);
4910 if(U_FAILURE(errorCode
)) {
4911 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode
));
4914 log_err("Unexpected length of sub chars\n");
4916 if (get_subchars
[0] != expected_subchars
[0]){
4917 log_err("Unexpected value of sub chars\n");
4919 ucnv_setSubstChars (cnv2
,new_subchars
, len
, &errorCode
);
4920 if(U_FAILURE(errorCode
)) {
4921 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode
));
4923 ucnv_getSubstChars (cnv2
, get_subchars
, &len
, &errorCode
);
4924 if(U_FAILURE(errorCode
)) {
4925 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode
));
4928 log_err("Unexpected length of sub chars\n");
4930 if (get_subchars
[0] != new_subchars
[0]){
4931 log_err("Unexpected value of sub chars\n");
4938 /* LMBCS to Unicode - offsets */
4940 UErrorCode errorCode
=U_ZERO_ERROR
;
4942 const char * pSource
= (const char *)pszLMBCS
;
4943 const char * sourceLimit
= (const char *)pszLMBCS
+ sizeof(pszLMBCS
);
4945 UChar Out
[sizeof(pszUnicode
) + 1];
4947 UChar
* OutLimit
= Out
+ UPRV_LENGTHOF(pszUnicode
);
4949 int32_t off
[sizeof(offsets
)];
4951 /* last 'offset' in expected results is just the final size.
4952 (Makes other tests easier). Compensate here: */
4954 off
[UPRV_LENGTHOF(offsets
)-1] = sizeof(pszLMBCS
);
4958 cnv
=ucnv_open("lmbcs", &errorCode
); /* use generic name for LMBCS-1 */
4959 if(U_FAILURE(errorCode
)) {
4960 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode
));
4966 ucnv_toUnicode (cnv
,
4976 if (memcmp(off
,offsets
,sizeof(offsets
)))
4978 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4980 if (memcmp(Out
,pszUnicode
,sizeof(pszUnicode
)))
4982 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4987 /* LMBCS to Unicode - getNextUChar */
4988 const char * sourceStart
;
4989 const char *source
=(const char *)pszLMBCS
;
4990 const char *limit
=(const char *)pszLMBCS
+sizeof(pszLMBCS
);
4991 const UChar32
*results
= pszUnicode32
;
4992 const int *off
= offsets32
;
4994 UErrorCode errorCode
=U_ZERO_ERROR
;
4997 cnv
=ucnv_open("LMBCS-1", &errorCode
);
4998 if(U_FAILURE(errorCode
)) {
4999 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
5005 while(source
<limit
) {
5007 uniChar
=ucnv_getNextUChar(cnv
, &source
, source
+ (off
[1] - off
[0]), &errorCode
);
5008 if(U_FAILURE(errorCode
)) {
5009 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode
));
5011 } else if(source
-sourceStart
!= off
[1] - off
[0] || uniChar
!= *results
) {
5012 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
5013 uniChar
, (source
-sourceStart
), *results
, *off
);
5022 { /* test locale & optimization group operations: Unicode to LMBCS */
5024 UErrorCode errorCode
=U_ZERO_ERROR
;
5025 UConverter
*cnv16he
= ucnv_open("LMBCS-16,locale=he", &errorCode
);
5026 UConverter
*cnv16jp
= ucnv_open("LMBCS-16,locale=ja_JP", &errorCode
);
5027 UConverter
*cnv01us
= ucnv_open("LMBCS-1,locale=us_EN", &errorCode
);
5028 UChar uniString
[] = {0x0192}; /* Latin Small letter f with hook */
5029 const UChar
* pUniOut
= uniString
;
5030 UChar
* pUniIn
= uniString
;
5031 uint8_t lmbcsString
[4];
5032 const char * pLMBCSOut
= (const char *)lmbcsString
;
5033 char * pLMBCSIn
= (char *)lmbcsString
;
5035 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
5036 ucnv_fromUnicode (cnv16he
,
5037 &pLMBCSIn
, (pLMBCSIn
+ UPRV_LENGTHOF(lmbcsString
)),
5038 &pUniOut
, pUniOut
+ UPRV_LENGTHOF(uniString
),
5039 NULL
, 1, &errorCode
);
5041 if (lmbcsString
[0] != 0x3 || lmbcsString
[1] != 0x83)
5043 log_err("LMBCS-16,locale=he gives unexpected translation\n");
5046 pLMBCSIn
= (char *)lmbcsString
;
5047 pUniOut
= uniString
;
5048 ucnv_fromUnicode (cnv01us
,
5049 &pLMBCSIn
, (const char *)(lmbcsString
+ UPRV_LENGTHOF(lmbcsString
)),
5050 &pUniOut
, pUniOut
+ UPRV_LENGTHOF(uniString
),
5051 NULL
, 1, &errorCode
);
5053 if (lmbcsString
[0] != 0x9F)
5055 log_err("LMBCS-1,locale=US gives unexpected translation\n");
5058 /* single byte char from mbcs char set */
5059 lmbcsString
[0] = 0xAE; /* 1/2 width katakana letter small Yo */
5060 pLMBCSOut
= (const char *)lmbcsString
;
5062 ucnv_toUnicode (cnv16jp
,
5063 &pUniIn
, pUniIn
+ 1,
5064 &pLMBCSOut
, (pLMBCSOut
+ 1),
5065 NULL
, 1, &errorCode
);
5066 if (U_FAILURE(errorCode
) || pLMBCSOut
!= (const char *)lmbcsString
+1 || pUniIn
!= uniString
+1 || uniString
[0] != 0xFF6E)
5068 log_err("Unexpected results from LMBCS-16 single byte char\n");
5070 /* convert to group 1: should be 3 bytes */
5071 pLMBCSIn
= (char *)lmbcsString
;
5072 pUniOut
= uniString
;
5073 ucnv_fromUnicode (cnv01us
,
5074 &pLMBCSIn
, (const char *)(pLMBCSIn
+ 3),
5075 &pUniOut
, pUniOut
+ 1,
5076 NULL
, 1, &errorCode
);
5077 if (U_FAILURE(errorCode
) || pLMBCSIn
!= (const char *)lmbcsString
+3 || pUniOut
!= uniString
+1
5078 || lmbcsString
[0] != 0x10 || lmbcsString
[1] != 0x10 || lmbcsString
[2] != 0xAE)
5080 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5082 pLMBCSOut
= (const char *)lmbcsString
;
5084 ucnv_toUnicode (cnv01us
,
5085 &pUniIn
, pUniIn
+ 1,
5086 &pLMBCSOut
, (const char *)(pLMBCSOut
+ 3),
5087 NULL
, 1, &errorCode
);
5088 if (U_FAILURE(errorCode
) || pLMBCSOut
!= (const char *)lmbcsString
+3 || pUniIn
!= uniString
+1 || uniString
[0] != 0xFF6E)
5090 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5092 pLMBCSIn
= (char *)lmbcsString
;
5093 pUniOut
= uniString
;
5094 ucnv_fromUnicode (cnv16jp
,
5095 &pLMBCSIn
, (const char *)(pLMBCSIn
+ 1),
5096 &pUniOut
, pUniOut
+ 1,
5097 NULL
, 1, &errorCode
);
5098 if (U_FAILURE(errorCode
) || pLMBCSIn
!= (const char *)lmbcsString
+1 || pUniOut
!= uniString
+1 || lmbcsString
[0] != 0xAE)
5100 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5102 ucnv_close(cnv16he
);
5103 ucnv_close(cnv16jp
);
5104 ucnv_close(cnv01us
);
5107 /* Small source buffer testing, LMBCS -> Unicode */
5109 UErrorCode errorCode
=U_ZERO_ERROR
;
5111 const char * pSource
= (const char *)pszLMBCS
;
5112 const char * sourceLimit
= (const char *)pszLMBCS
+ sizeof(pszLMBCS
);
5113 int codepointCount
= 0;
5115 UChar Out
[sizeof(pszUnicode
) + 1];
5117 UChar
* OutLimit
= Out
+ UPRV_LENGTHOF(pszUnicode
);
5120 cnv
= ucnv_open(NAME_LMBCS_1
, &errorCode
);
5121 if(U_FAILURE(errorCode
)) {
5122 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
5127 while ((pSource
< sourceLimit
) && U_SUCCESS (errorCode
))
5129 ucnv_toUnicode (cnv
,
5133 (pSource
+1), /* claim that this is a 1- byte buffer */
5135 FALSE
, /* FALSE means there might be more chars in the next buffer */
5138 if (U_SUCCESS (errorCode
))
5140 if ((pSource
- (const char *)pszLMBCS
) == offsets
[codepointCount
+1])
5142 /* we are on to the next code point: check value */
5144 if (Out
[0] != pszUnicode
[codepointCount
]){
5145 log_err("LMBCS->Uni result %lx should have been %lx \n",
5146 Out
[0], pszUnicode
[codepointCount
]);
5149 pOut
= Out
; /* reset for accumulating next code point */
5155 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode
));
5159 /* limits & surrogate error testing */
5160 char LIn
[sizeof(pszLMBCS
)];
5161 const char * pLIn
= LIn
;
5163 char LOut
[sizeof(pszLMBCS
)];
5164 char * pLOut
= LOut
;
5166 UChar UOut
[sizeof(pszUnicode
)];
5167 UChar
* pUOut
= UOut
;
5169 UChar UIn
[sizeof(pszUnicode
)];
5170 const UChar
* pUIn
= UIn
;
5172 int32_t off
[sizeof(offsets
)];
5175 errorCode
=U_ZERO_ERROR
;
5177 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5179 ucnv_fromUnicode(cnv
, &pLOut
, pLOut
+1, &pUIn
, pUIn
-1, off
, FALSE
, &errorCode
);
5180 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
5182 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode
));
5186 errorCode
=U_ZERO_ERROR
;
5187 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+1,(const char **)&pLIn
,(const char *)(pLIn
-1),off
,FALSE
, &errorCode
);
5188 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
5190 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode
));
5192 errorCode
=U_ZERO_ERROR
;
5194 uniChar
= ucnv_getNextUChar(cnv
, (const char **)&pLIn
, (const char *)(pLIn
-1), &errorCode
);
5195 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
5197 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode
));
5199 errorCode
=U_ZERO_ERROR
;
5201 /* 0 byte source request - no error, no pointer movement */
5202 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+1,(const char **)&pLIn
,(const char *)pLIn
,off
,FALSE
, &errorCode
);
5203 ucnv_fromUnicode(cnv
, &pLOut
,pLOut
+1,&pUIn
,pUIn
,off
,FALSE
, &errorCode
);
5204 if(U_FAILURE(errorCode
)) {
5205 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode
));
5207 if ((pUOut
!= UOut
) || (pUIn
!= UIn
) || (pLOut
!= LOut
) || (pLIn
!= LIn
))
5209 log_err("Unexpected pointer move in 0 byte source request \n");
5211 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5212 uniChar
= ucnv_getNextUChar(cnv
, (const char **)&pLIn
, (const char *)pLIn
, &errorCode
);
5213 if (errorCode
!= U_INDEX_OUTOFBOUNDS_ERROR
)
5215 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode
));
5217 if (((uint32_t)uniChar
- 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5219 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5221 errorCode
= U_ZERO_ERROR
;
5223 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5226 ucnv_fromUnicode(cnv
, &pLOut
,pLOut
+offsets
[4],&pUIn
,pUIn
+UPRV_LENGTHOF(pszUnicode
),off
,FALSE
, &errorCode
);
5227 if (errorCode
!= U_BUFFER_OVERFLOW_ERROR
|| pLOut
!= LOut
+ offsets
[4] || pUIn
!= pszUnicode
+4 )
5229 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5232 errorCode
= U_ZERO_ERROR
;
5234 pLIn
= (const char *)pszLMBCS
;
5235 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+4,&pLIn
,(pLIn
+sizeof(pszLMBCS
)),off
,FALSE
, &errorCode
);
5236 if (errorCode
!= U_BUFFER_OVERFLOW_ERROR
|| pUOut
!= UOut
+ 4 || pLIn
!= (const char *)pszLMBCS
+offsets
[4])
5238 log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5241 /* unpaired or chopped LMBCS surrogates */
5243 /* OK high surrogate, Low surrogate is chopped */
5244 LIn
[0] = (char)0x14;
5245 LIn
[1] = (char)0xD8;
5246 LIn
[2] = (char)0x01;
5247 LIn
[3] = (char)0x14;
5248 LIn
[4] = (char)0xDC;
5250 errorCode
= U_ZERO_ERROR
;
5253 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
5254 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
5255 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 5)
5257 log_err("Unexpected results on chopped low surrogate\n");
5260 /* chopped at surrogate boundary */
5261 LIn
[0] = (char)0x14;
5262 LIn
[1] = (char)0xD8;
5263 LIn
[2] = (char)0x01;
5265 errorCode
= U_ZERO_ERROR
;
5268 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+3),off
,TRUE
, &errorCode
);
5269 if (UOut
[0] != 0xD801 || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 3)
5271 log_err("Unexpected results on chopped at surrogate boundary \n");
5274 /* unpaired surrogate plus valid Unichar */
5275 LIn
[0] = (char)0x14;
5276 LIn
[1] = (char)0xD8;
5277 LIn
[2] = (char)0x01;
5278 LIn
[3] = (char)0x14;
5279 LIn
[4] = (char)0xC9;
5280 LIn
[5] = (char)0xD0;
5282 errorCode
= U_ZERO_ERROR
;
5285 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+6),off
,TRUE
, &errorCode
);
5286 if (UOut
[0] != 0xD801 || UOut
[1] != 0xC9D0 || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 2 || pLIn
!= LIn
+ 6)
5288 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5291 /* unpaired surrogate plus chopped Unichar */
5292 LIn
[0] = (char)0x14;
5293 LIn
[1] = (char)0xD8;
5294 LIn
[2] = (char)0x01;
5295 LIn
[3] = (char)0x14;
5296 LIn
[4] = (char)0xC9;
5299 errorCode
= U_ZERO_ERROR
;
5302 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
5303 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 5)
5305 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5308 /* unpaired surrogate plus valid non-Unichar */
5309 LIn
[0] = (char)0x14;
5310 LIn
[1] = (char)0xD8;
5311 LIn
[2] = (char)0x01;
5312 LIn
[3] = (char)0x0F;
5313 LIn
[4] = (char)0x3B;
5316 errorCode
= U_ZERO_ERROR
;
5319 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
5320 if (UOut
[0] != 0xD801 || UOut
[1] != 0x1B || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 2 || pLIn
!= LIn
+ 5)
5322 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5325 /* unpaired surrogate plus chopped non-Unichar */
5326 LIn
[0] = (char)0x14;
5327 LIn
[1] = (char)0xD8;
5328 LIn
[2] = (char)0x01;
5329 LIn
[3] = (char)0x0F;
5332 errorCode
= U_ZERO_ERROR
;
5335 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+4),off
,TRUE
, &errorCode
);
5337 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 4)
5339 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5343 ucnv_close(cnv
); /* final cleanup */
5347 static void TestJitterbug255()
5349 static const uint8_t testBytes
[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5350 const char *testBuffer
= (const char *)testBytes
;
5351 const char *testEnd
= (const char *)testBytes
+ sizeof(testBytes
);
5352 UErrorCode status
= U_ZERO_ERROR
;
5354 UConverter
*cnv
= 0;
5356 cnv
= ucnv_open("shift-jis", &status
);
5357 if (U_FAILURE(status
) || cnv
== 0) {
5358 log_data_err("Failed to open the converter for SJIS.\n");
5361 while (testBuffer
!= testEnd
)
5363 /*result = */ucnv_getNextUChar (cnv
, &testBuffer
, testEnd
, &status
);
5364 if (U_FAILURE(status
))
5366 log_err("Failed to convert the next UChar for SJIS.\n");
5373 static void TestEBCDICUS4XML()
5375 UChar unicodes_x
[] = {0x0000, 0x0000, 0x0000, 0x0000};
5376 static const UChar toUnicodeMaps_x
[] = {0x000A, 0x000A, 0x000D, 0x0000};
5377 static const char fromUnicodeMaps_x
[] = {0x25, 0x25, 0x0D, 0x00};
5378 static const char newLines_x
[] = {0x25, 0x15, 0x0D, 0x00};
5379 char target_x
[] = {0x00, 0x00, 0x00, 0x00};
5380 UChar
*unicodes
= unicodes_x
;
5381 const UChar
*toUnicodeMaps
= toUnicodeMaps_x
;
5382 char *target
= target_x
;
5383 const char* fromUnicodeMaps
= fromUnicodeMaps_x
, *newLines
= newLines_x
;
5384 UErrorCode status
= U_ZERO_ERROR
;
5385 UConverter
*cnv
= 0;
5387 cnv
= ucnv_open("ebcdic-xml-us", &status
);
5388 if (U_FAILURE(status
) || cnv
== 0) {
5389 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5392 ucnv_toUnicode(cnv
, &unicodes
, unicodes
+3, (const char**)&newLines
, newLines
+3, NULL
, TRUE
, &status
);
5393 if (U_FAILURE(status
) || memcmp(unicodes_x
, toUnicodeMaps
, sizeof(UChar
)*3) != 0) {
5394 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5395 u_errorName(status
));
5396 printUSeqErr(unicodes_x
, 3);
5397 printUSeqErr(toUnicodeMaps
, 3);
5399 status
= U_ZERO_ERROR
;
5400 ucnv_fromUnicode(cnv
, &target
, target
+3, (const UChar
**)&toUnicodeMaps
, toUnicodeMaps
+3, NULL
, TRUE
, &status
);
5401 if (U_FAILURE(status
) || memcmp(target_x
, fromUnicodeMaps
, sizeof(char)*3) != 0) {
5402 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5403 u_errorName(status
));
5404 printSeqErr((const unsigned char*)target_x
, 3);
5405 printSeqErr((const unsigned char*)fromUnicodeMaps
, 3);
5409 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5411 #if !UCONFIG_NO_COLLATION
5413 static void TestJitterbug981(){
5415 int32_t rules_length
, target_cap
, bytes_needed
, buff_size
;
5416 UErrorCode status
= U_ZERO_ERROR
;
5417 UConverter
*utf8cnv
;
5418 UCollator
* myCollator
;
5421 utf8cnv
= ucnv_open ("utf8", &status
);
5422 if(U_FAILURE(status
)){
5423 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status
));
5426 myCollator
= ucol_open("zh", &status
);
5427 if(U_FAILURE(status
)){
5428 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status
));
5429 ucnv_close(utf8cnv
);
5433 rules
= ucol_getRules(myCollator
, &rules_length
);
5434 if(rules_length
== 0) {
5435 log_data_err("missing zh tailoring rule string\n");
5436 ucol_close(myCollator
);
5437 ucnv_close(utf8cnv
);
5440 buff_size
= rules_length
* ucnv_getMaxCharSize(utf8cnv
);
5441 buff
= malloc(buff_size
);
5445 ucnv_reset(utf8cnv
);
5446 status
= U_ZERO_ERROR
;
5447 if(target_cap
>= buff_size
) {
5448 log_err("wanted %d bytes, only %d available\n", target_cap
, buff_size
);
5451 bytes_needed
= ucnv_fromUChars(utf8cnv
, buff
, target_cap
,
5452 rules
, rules_length
, &status
);
5453 target_cap
= (bytes_needed
> target_cap
) ? bytes_needed
: target_cap
+1;
5454 if(numNeeded
!=0 && numNeeded
!= bytes_needed
){
5455 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5458 numNeeded
= bytes_needed
;
5459 } while (status
== U_BUFFER_OVERFLOW_ERROR
);
5460 ucol_close(myCollator
);
5461 ucnv_close(utf8cnv
);
5467 #if !UCONFIG_NO_LEGACY_CONVERSION
5468 static void TestJitterbug1293(){
5469 static const UChar src
[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5471 UErrorCode status
= U_ZERO_ERROR
;
5472 UConverter
* conv
=NULL
;
5473 int32_t target_cap
, bytes_needed
, numNeeded
= 0;
5474 conv
= ucnv_open("shift-jis",&status
);
5475 if(U_FAILURE(status
)){
5476 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status
));
5482 bytes_needed
= ucnv_fromUChars(conv
,target
,256,src
,u_strlen(src
),&status
);
5483 target_cap
= (bytes_needed
> target_cap
) ? bytes_needed
: target_cap
+1;
5484 if(numNeeded
!=0 && numNeeded
!= bytes_needed
){
5485 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5487 numNeeded
= bytes_needed
;
5488 } while (status
== U_BUFFER_OVERFLOW_ERROR
);
5489 if(U_FAILURE(status
)){
5490 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status
));
5497 static void TestJB5275_1(){
5499 static const char* data
= "\x3B\xB3\x0A" /* Easy characters */
5500 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5501 /* Switch script: */
5502 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5503 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5504 "\xEF\x40\x3B\xB3\x0A";
5505 static const UChar expected
[] ={
5506 0x003b, 0x0a15, 0x000a, /* Easy characters */
5507 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5508 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5509 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5510 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5513 UErrorCode status
= U_ZERO_ERROR
;
5514 UConverter
* conv
= ucnv_open("iscii-gur", &status
);
5515 UChar dest
[100] = {'\0'};
5516 UChar
* target
= dest
;
5517 UChar
* targetLimit
= dest
+100;
5518 const char* source
= data
;
5519 const char* sourceLimit
= data
+strlen(data
);
5520 const UChar
* exp
= expected
;
5522 if (U_FAILURE(status
)) {
5523 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status
));
5527 log_verbose("Testing switching back to default script when new line is encountered.\n");
5528 ucnv_toUnicode(conv
, &target
, targetLimit
, &source
, sourceLimit
, NULL
, TRUE
, &status
);
5529 if(U_FAILURE(status
)){
5530 log_err("conversion failed: %s \n", u_errorName(status
));
5532 targetLimit
= target
;
5534 printUSeq(target
, (int)(targetLimit
-target
));
5535 while(target
<targetLimit
){
5537 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp
, *target
);
5545 static void TestJB5275(){
5546 static const char* data
=
5547 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */
5548 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */
5549 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */
5550 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5551 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */
5552 "\xEF\x48\x38\xB3\x0A" /* Kannada test */
5553 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */
5554 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */
5555 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */
5556 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */;
5557 static const UChar expected
[] ={
5558 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5559 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */
5560 0x0038, 0x0C95, 0x000A, /* Kannada test */
5561 0x0039, 0x0D15, 0x000A, /* Malayalam test */
5562 0x003A, 0x0A95, 0x000A, /* Gujarati test */
5563 0x003B, 0x0A15, 0x000A, /* Punjabi test */
5566 UErrorCode status
= U_ZERO_ERROR
;
5567 UConverter
* conv
= ucnv_open("iscii", &status
);
5568 UChar dest
[100] = {'\0'};
5569 UChar
* target
= dest
;
5570 UChar
* targetLimit
= dest
+100;
5571 const char* source
= data
;
5572 const char* sourceLimit
= data
+strlen(data
);
5573 const UChar
* exp
= expected
;
5574 ucnv_toUnicode(conv
, &target
, targetLimit
, &source
, sourceLimit
, NULL
, TRUE
, &status
);
5575 if(U_FAILURE(status
)){
5576 log_data_err("conversion failed: %s \n", u_errorName(status
));
5578 targetLimit
= target
;
5581 printUSeq(target
, (int)(targetLimit
-target
));
5583 while(target
<targetLimit
){
5585 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp
, *target
);
5594 TestIsFixedWidth() {
5595 UErrorCode status
= U_ZERO_ERROR
;
5596 UConverter
*cnv
= NULL
;
5599 const char *fixedWidth
[] = {
5602 "ibm-5478_P100-1995"
5605 const char *notFixedWidth
[] = {
5612 for (i
= 0; i
< UPRV_LENGTHOF(fixedWidth
); i
++) {
5613 cnv
= ucnv_open(fixedWidth
[i
], &status
);
5614 if (cnv
== NULL
|| U_FAILURE(status
)) {
5615 log_data_err("Error open converter: %s - %s \n", fixedWidth
[i
], u_errorName(status
));
5619 if (!ucnv_isFixedWidth(cnv
, &status
)) {
5620 log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth
[i
]);
5625 for (i
= 0; i
< UPRV_LENGTHOF(notFixedWidth
); i
++) {
5626 cnv
= ucnv_open(notFixedWidth
[i
], &status
);
5627 if (cnv
== NULL
|| U_FAILURE(status
)) {
5628 log_data_err("Error open converter: %s - %s \n", notFixedWidth
[i
], u_errorName(status
));
5632 if (ucnv_isFixedWidth(cnv
, &status
)) {
5633 log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth
[i
]);