1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /*******************************************************************************
12 * Modification History:
14 * Steven R. Loomis 7/8/1999 Adding input buffer test
15 ********************************************************************************
19 #include "unicode/uloc.h"
20 #include "unicode/ucnv.h"
21 #include "unicode/ucnv_err.h"
22 #include "unicode/ucnv_cb.h"
24 #include "unicode/utypes.h"
25 #include "unicode/ustring.h"
26 #include "unicode/ucol.h"
27 #include "unicode/utf16.h"
31 static void TestNextUChar(UConverter
* cnv
, const char* source
, const char* limit
, const int32_t results
[], const char* message
);
32 static void TestNextUCharError(UConverter
* cnv
, const char* source
, const char* limit
, UErrorCode expected
, const char* message
);
33 #if !UCONFIG_NO_COLLATION
34 static void TestJitterbug981(void);
36 #if !UCONFIG_NO_LEGACY_CONVERSION
37 static void TestJitterbug1293(void);
39 static void TestNewConvertWithBufferSizes(int32_t osize
, int32_t isize
) ;
40 static void TestConverterTypesAndStarters(void);
41 static void TestAmbiguous(void);
42 static void TestSignatureDetection(void);
43 static void TestUTF7(void);
44 static void TestIMAP(void);
45 static void TestUTF8(void);
46 static void TestCESU8(void);
47 static void TestUTF16(void);
48 static void TestUTF16BE(void);
49 static void TestUTF16LE(void);
50 static void TestUTF32(void);
51 static void TestUTF32BE(void);
52 static void TestUTF32LE(void);
53 static void TestLATIN1(void);
55 #if !UCONFIG_NO_LEGACY_CONVERSION
56 static void TestSBCS(void);
57 static void TestDBCS(void);
58 static void TestMBCS(void);
59 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
60 static void TestICCRunout(void);
63 #ifdef U_ENABLE_GENERIC_ISO_2022
64 static void TestISO_2022(void);
67 static void TestISO_2022_JP(void);
68 static void TestISO_2022_JP_1(void);
69 static void TestISO_2022_JP_2(void);
70 static void TestISO_2022_KR(void);
71 static void TestISO_2022_KR_1(void);
72 static void TestISO_2022_CN(void);
75 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
77 static void TestISO_2022_CN_EXT(void);
79 static void TestJIS(void);
80 static void TestHZ(void);
83 static void TestSCSU(void);
85 #if !UCONFIG_NO_LEGACY_CONVERSION
86 static void TestEBCDIC_STATEFUL(void);
87 static void TestGB18030(void);
88 static void TestLMBCS(void);
89 static void TestJitterbug255(void);
90 static void TestEBCDICUS4XML(void);
93 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
95 static void TestJitterbug915(void);
97 static void TestISCII(void);
99 static void TestCoverageMBCS(void);
100 static void TestJitterbug2346(void);
101 static void TestJitterbug2411(void);
102 static void TestJB5275(void);
103 static void TestJB5275_1(void);
104 static void TestJitterbug6175(void);
106 static void TestIsFixedWidth(void);
109 static void TestInBufSizes(void);
111 static void TestRoundTrippingAllUTF(void);
112 static void TestConv(const uint16_t in
[],
119 /* open a converter, using test data if it begins with '@' */
120 static UConverter
*my_ucnv_open(const char *cnv
, UErrorCode
*err
);
123 #define NEW_MAX_BUFFER 999
125 static int32_t gInBufferSize
= NEW_MAX_BUFFER
;
126 static int32_t gOutBufferSize
= NEW_MAX_BUFFER
;
127 static char gNuConvTestName
[1024];
129 #define nct_min(x,y) ((x<y) ? x : y)
131 static UConverter
*my_ucnv_open(const char *cnv
, UErrorCode
*err
)
133 if(cnv
&& cnv
[0] == '@') {
134 return ucnv_openPackage(loadTestData(err
), cnv
+1, err
);
136 return ucnv_open(cnv
, err
);
140 static void printSeq(const unsigned char* a
, int len
)
145 log_verbose("0x%02x ", a
[i
++]);
149 static void printUSeq(const UChar
* a
, int len
)
153 while (i
<len
) log_verbose("0x%04x ", a
[i
++]);
157 static void printSeqErr(const unsigned char* a
, int len
)
160 fprintf(stderr
, "{");
162 fprintf(stderr
, "0x%02x ", a
[i
++]);
163 fprintf(stderr
, "}\n");
166 static void printUSeqErr(const UChar
* a
, int len
)
169 fprintf(stderr
, "{U+");
171 fprintf(stderr
, "0x%04x ", a
[i
++]);
172 fprintf(stderr
,"}\n");
176 TestNextUChar(UConverter
* cnv
, const char* source
, const char* limit
, const int32_t results
[], const char* message
)
179 const char* s
=(char*)source
;
180 const int32_t *r
=results
;
181 UErrorCode errorCode
=U_ZERO_ERROR
;
186 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
187 if(errorCode
==U_INDEX_OUTOFBOUNDS_ERROR
) {
188 break; /* no more significant input */
189 } else if(U_FAILURE(errorCode
)) {
190 log_err("%s ucnv_getNextUChar() failed: %s\n", message
, u_errorName(errorCode
));
193 /* test the expected number of input bytes only if >=0 */
194 (*r
>=0 && (int32_t)(s
-s0
)!=*r
) ||
197 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
198 message
, c
, (s
-s0
), *(r
+1), *r
);
206 TestNextUCharError(UConverter
* cnv
, const char* source
, const char* limit
, UErrorCode expected
, const char* message
)
208 const char* s
=(char*)source
;
209 UErrorCode errorCode
=U_ZERO_ERROR
;
211 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
212 if(errorCode
!= expected
){
213 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected
), message
, myErrorName(errorCode
));
215 if(c
!= 0xFFFD && c
!= 0xffff){
216 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message
, c
);
221 static void TestInBufSizes(void)
223 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,1);
225 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,2);
226 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,3);
227 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,4);
228 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,5);
229 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,6);
230 TestNewConvertWithBufferSizes(1,1);
231 TestNewConvertWithBufferSizes(2,3);
232 TestNewConvertWithBufferSizes(3,2);
236 static void TestOutBufSizes(void)
239 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,NEW_MAX_BUFFER
);
240 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER
);
241 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER
);
242 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER
);
243 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER
);
244 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER
);
250 void addTestNewConvert(TestNode
** root
)
252 #if !UCONFIG_NO_FILE_IO
253 addTest(root
, &TestInBufSizes
, "tsconv/nucnvtst/TestInBufSizes");
254 addTest(root
, &TestOutBufSizes
, "tsconv/nucnvtst/TestOutBufSizes");
256 addTest(root
, &TestConverterTypesAndStarters
, "tsconv/nucnvtst/TestConverterTypesAndStarters");
257 addTest(root
, &TestAmbiguous
, "tsconv/nucnvtst/TestAmbiguous");
258 addTest(root
, &TestSignatureDetection
, "tsconv/nucnvtst/TestSignatureDetection");
259 addTest(root
, &TestUTF7
, "tsconv/nucnvtst/TestUTF7");
260 addTest(root
, &TestIMAP
, "tsconv/nucnvtst/TestIMAP");
261 addTest(root
, &TestUTF8
, "tsconv/nucnvtst/TestUTF8");
263 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
264 addTest(root
, &TestCESU8
, "tsconv/nucnvtst/TestCESU8");
265 addTest(root
, &TestUTF16
, "tsconv/nucnvtst/TestUTF16");
266 addTest(root
, &TestUTF16BE
, "tsconv/nucnvtst/TestUTF16BE");
267 addTest(root
, &TestUTF16LE
, "tsconv/nucnvtst/TestUTF16LE");
268 addTest(root
, &TestUTF32
, "tsconv/nucnvtst/TestUTF32");
269 addTest(root
, &TestUTF32BE
, "tsconv/nucnvtst/TestUTF32BE");
270 addTest(root
, &TestUTF32LE
, "tsconv/nucnvtst/TestUTF32LE");
272 #if !UCONFIG_NO_LEGACY_CONVERSION
273 addTest(root
, &TestLMBCS
, "tsconv/nucnvtst/TestLMBCS");
276 addTest(root
, &TestLATIN1
, "tsconv/nucnvtst/TestLATIN1");
278 #if !UCONFIG_NO_LEGACY_CONVERSION
279 addTest(root
, &TestSBCS
, "tsconv/nucnvtst/TestSBCS");
280 #if !UCONFIG_NO_FILE_IO
281 addTest(root
, &TestDBCS
, "tsconv/nucnvtst/TestDBCS");
282 addTest(root
, &TestICCRunout
, "tsconv/nucnvtst/TestICCRunout");
284 addTest(root
, &TestMBCS
, "tsconv/nucnvtst/TestMBCS");
286 #ifdef U_ENABLE_GENERIC_ISO_2022
287 addTest(root
, &TestISO_2022
, "tsconv/nucnvtst/TestISO_2022");
290 addTest(root
, &TestISO_2022_JP
, "tsconv/nucnvtst/TestISO_2022_JP");
291 addTest(root
, &TestJIS
, "tsconv/nucnvtst/TestJIS");
292 addTest(root
, &TestISO_2022_JP_1
, "tsconv/nucnvtst/TestISO_2022_JP_1");
293 addTest(root
, &TestISO_2022_JP_2
, "tsconv/nucnvtst/TestISO_2022_JP_2");
294 addTest(root
, &TestISO_2022_KR
, "tsconv/nucnvtst/TestISO_2022_KR");
295 addTest(root
, &TestISO_2022_KR_1
, "tsconv/nucnvtst/TestISO_2022_KR_1");
296 addTest(root
, &TestISO_2022_CN
, "tsconv/nucnvtst/TestISO_2022_CN");
298 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
299 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
300 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
302 addTest(root
, &TestHZ
, "tsconv/nucnvtst/TestHZ");
305 addTest(root
, &TestSCSU
, "tsconv/nucnvtst/TestSCSU");
307 #if !UCONFIG_NO_LEGACY_CONVERSION
308 addTest(root
, &TestEBCDIC_STATEFUL
, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
309 addTest(root
, &TestGB18030
, "tsconv/nucnvtst/TestGB18030");
310 addTest(root
, &TestJitterbug255
, "tsconv/nucnvtst/TestJitterbug255");
311 addTest(root
, &TestEBCDICUS4XML
, "tsconv/nucnvtst/TestEBCDICUS4XML");
312 addTest(root
, &TestISCII
, "tsconv/nucnvtst/TestISCII");
313 addTest(root
, &TestJB5275
, "tsconv/nucnvtst/TestJB5275");
314 addTest(root
, &TestJB5275_1
, "tsconv/nucnvtst/TestJB5275_1");
315 #if !UCONFIG_NO_COLLATION
316 addTest(root
, &TestJitterbug981
, "tsconv/nucnvtst/TestJitterbug981");
319 addTest(root
, &TestJitterbug1293
, "tsconv/nucnvtst/TestJitterbug1293");
323 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
324 addTest(root
, &TestCoverageMBCS
, "tsconv/nucnvtst/TestCoverageMBCS");
327 addTest(root
, &TestRoundTrippingAllUTF
, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
329 #if !UCONFIG_NO_LEGACY_CONVERSION
330 addTest(root
, &TestJitterbug2346
, "tsconv/nucnvtst/TestJitterbug2346");
331 addTest(root
, &TestJitterbug2411
, "tsconv/nucnvtst/TestJitterbug2411");
332 addTest(root
, &TestJitterbug6175
, "tsconv/nucnvtst/TestJitterbug6175");
334 addTest(root
, &TestIsFixedWidth
, "tsconv/nucnvtst/TestIsFixedWidth");
339 /* Note that this test already makes use of statics, so it's not really
341 This convenience function lets us make the error messages actually useful.
344 static void setNuConvTestName(const char *codepage
, const char *direction
)
346 sprintf(gNuConvTestName
, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
350 (int)gOutBufferSize
);
355 TC_OK
= 0, /* test was OK */
356 TC_MISMATCH
= 1, /* Match failed - err was printed */
357 TC_FAIL
= 2 /* Test failed, don't print an err because it was already printed. */
358 } ETestConvertResult
;
360 /* Note: This function uses global variables and it will not do offset
361 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
362 static ETestConvertResult
testConvertFromU( const UChar
*source
, int sourceLen
, const uint8_t *expect
, int expectLen
,
363 const char *codepage
, const int32_t *expectOffsets
, UBool useFallback
)
365 UErrorCode status
= U_ZERO_ERROR
;
366 UConverter
*conv
= 0;
367 char junkout
[NEW_MAX_BUFFER
]; /* FIX */
368 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
375 int32_t realBufferSize
;
377 const UChar
*realSourceEnd
;
378 const UChar
*sourceLimit
;
379 UBool checkOffsets
= TRUE
;
382 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
383 junkout
[i
] = (char)0xF0;
384 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
387 setNuConvTestName(codepage
, "FROM");
389 log_verbose("\n========= %s\n", gNuConvTestName
);
391 conv
= my_ucnv_open(codepage
, &status
);
393 if(U_FAILURE(status
))
395 log_data_err("Couldn't open converter %s\n",codepage
);
399 ucnv_setFallback(conv
,useFallback
);
402 log_verbose("Converter opened..\n");
408 realBufferSize
= UPRV_LENGTHOF(junkout
);
409 realBufferEnd
= junkout
+ realBufferSize
;
410 realSourceEnd
= source
+ sourceLen
;
412 if ( gOutBufferSize
!= realBufferSize
|| gInBufferSize
!= NEW_MAX_BUFFER
)
413 checkOffsets
= FALSE
;
417 end
= nct_min(targ
+ gOutBufferSize
, realBufferEnd
);
418 sourceLimit
= nct_min(src
+ gInBufferSize
, realSourceEnd
);
420 doFlush
= (UBool
)(sourceLimit
== realSourceEnd
);
422 if(targ
== realBufferEnd
) {
423 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ
, gNuConvTestName
);
426 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src
,sourceLimit
, targ
,end
, doFlush
?"TRUE":"FALSE");
429 status
= U_ZERO_ERROR
;
431 ucnv_fromUnicode (conv
,
436 checkOffsets
? offs
: NULL
,
437 doFlush
, /* flush if we're at the end of the input data */
439 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && sourceLimit
< realSourceEnd
) );
441 if(U_FAILURE(status
)) {
442 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage
, myErrorName(status
), gNuConvTestName
);
446 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
447 sourceLen
, targ
-junkout
);
449 if(getTestOption(VERBOSITY_OPTION
))
452 char offset_str
[9999];
457 for(ptr
= junkout
;ptr
<targ
;ptr
++) {
458 sprintf(junk
+ strlen(junk
), "0x%02x, ", (int)(0xFF & *ptr
));
459 sprintf(offset_str
+ strlen(offset_str
), "0x%02x, ", (int)(0xFF & junokout
[ptr
-junkout
]));
463 printSeq((const uint8_t *)expect
, expectLen
);
464 if ( checkOffsets
) {
465 log_verbose("\nOffsets:");
466 log_verbose(offset_str
);
472 if(expectLen
!= targ
-junkout
) {
473 log_err("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
474 log_verbose("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
475 fprintf(stderr
, "Got:\n");
476 printSeqErr((const unsigned char*)junkout
, (int32_t)(targ
-junkout
));
477 fprintf(stderr
, "Expected:\n");
478 printSeqErr((const unsigned char*)expect
, expectLen
);
482 if (checkOffsets
&& (expectOffsets
!= 0) ) {
483 log_verbose("comparing %d offsets..\n", targ
-junkout
);
484 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t) )){
485 log_err("did not get the expected offsets. %s\n", gNuConvTestName
);
486 printSeqErr((const unsigned char*)junkout
, (int32_t)(targ
-junkout
));
489 for(p
=junkout
;p
<targ
;p
++) {
490 log_err("%d,", junokout
[p
-junkout
]);
493 log_err("Expected: ");
494 for(i
=0; i
<(targ
-junkout
); i
++) {
495 log_err("%d,", expectOffsets
[i
]);
501 log_verbose("comparing..\n");
502 if(!memcmp(junkout
, expect
, expectLen
)) {
503 log_verbose("Matches!\n");
506 log_err("String does not match u->%s\n", gNuConvTestName
);
507 printUSeqErr(source
, sourceLen
);
508 fprintf(stderr
, "Got:\n");
509 printSeqErr((const unsigned char *)junkout
, expectLen
);
510 fprintf(stderr
, "Expected:\n");
511 printSeqErr((const unsigned char *)expect
, expectLen
);
517 /* Note: This function uses global variables and it will not do offset
518 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
519 static ETestConvertResult
testConvertToU( const uint8_t *source
, int sourcelen
, const UChar
*expect
, int expectlen
,
520 const char *codepage
, const int32_t *expectOffsets
, UBool useFallback
)
522 UErrorCode status
= U_ZERO_ERROR
;
523 UConverter
*conv
= 0;
524 UChar junkout
[NEW_MAX_BUFFER
]; /* FIX */
525 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
527 const char *realSourceEnd
;
528 const char *srcLimit
;
534 UBool checkOffsets
= TRUE
;
536 int32_t realBufferSize
;
537 UChar
*realBufferEnd
;
540 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
543 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
546 setNuConvTestName(codepage
, "TO");
548 log_verbose("\n========= %s\n", gNuConvTestName
);
550 conv
= my_ucnv_open(codepage
, &status
);
552 if(U_FAILURE(status
))
554 log_data_err("Couldn't open converter %s\n",gNuConvTestName
);
558 ucnv_setFallback(conv
,useFallback
);
560 log_verbose("Converter opened..\n");
562 src
= (const char *)source
;
566 realBufferSize
= UPRV_LENGTHOF(junkout
);
567 realBufferEnd
= junkout
+ realBufferSize
;
568 realSourceEnd
= src
+ sourcelen
;
570 if ( gOutBufferSize
!= realBufferSize
|| gInBufferSize
!= NEW_MAX_BUFFER
)
571 checkOffsets
= FALSE
;
575 end
= nct_min( targ
+ gOutBufferSize
, realBufferEnd
);
576 srcLimit
= nct_min(realSourceEnd
, src
+ gInBufferSize
);
578 if(targ
== realBufferEnd
)
580 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ
,gNuConvTestName
);
583 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ
,end
);
585 /* oldTarg = targ; */
587 status
= U_ZERO_ERROR
;
589 ucnv_toUnicode (conv
,
594 checkOffsets
? offs
: NULL
,
595 (UBool
)(srcLimit
== realSourceEnd
), /* flush if we're at the end of hte source data */
598 /* offs += (targ-oldTarg); */
600 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (srcLimit
< realSourceEnd
)) ); /* while we just need another buffer */
602 if(U_FAILURE(status
))
604 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage
, myErrorName(status
), gNuConvTestName
);
608 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
609 sourcelen
, targ
-junkout
);
610 if(getTestOption(VERBOSITY_OPTION
))
613 char offset_str
[9999];
619 for(ptr
= junkout
;ptr
<targ
;ptr
++)
621 sprintf(junk
+ strlen(junk
), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr
);
622 sprintf(offset_str
+ strlen(offset_str
), "0x%04x, ", (0xFFFF) & (unsigned int)junokout
[ptr
-junkout
]);
626 printUSeq(expect
, expectlen
);
629 log_verbose("\nOffsets:");
630 log_verbose(offset_str
);
636 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen
,expectlen
*2);
638 if (checkOffsets
&& (expectOffsets
!= 0))
640 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t))){
641 log_err("did not get the expected offsets. %s\n",gNuConvTestName
);
643 for(p
=junkout
;p
<targ
;p
++) {
644 log_err("%d,", junokout
[p
-junkout
]);
647 log_err("Expected: ");
648 for(i
=0; i
<(targ
-junkout
); i
++) {
649 log_err("%d,", expectOffsets
[i
]);
653 for(i
=0; i
<(targ
-junkout
); i
++) {
654 log_err("%X,", junkout
[i
]);
658 for(i
=0; i
<(src
-(const char *)source
); i
++) {
659 log_err("%X,", (unsigned char)source
[i
]);
665 if(!memcmp(junkout
, expect
, expectlen
*2))
667 log_verbose("Matches!\n");
672 log_err("String does not match. %s\n", gNuConvTestName
);
673 log_verbose("String does not match. %s\n", gNuConvTestName
);
675 printUSeqErr(junkout
, expectlen
);
676 printf("\nExpected:");
677 printUSeqErr(expect
, expectlen
);
683 static void TestNewConvertWithBufferSizes(int32_t outsize
, int32_t insize
)
686 /* 1 2 3 1Han 2Han 3Han . */
687 static const UChar sampleText
[] =
688 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
689 static const UChar sampleTextRoundTripUnmappable
[] =
690 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
693 static const uint8_t expectedUTF8
[] =
694 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
695 static const int32_t toUTF8Offs
[] =
696 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
697 static const int32_t fmUTF8Offs
[] =
698 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
700 #ifdef U_ENABLE_GENERIC_ISO_2022
701 /* Same as UTF8, but with ^[%B preceeding */
702 static const const uint8_t expectedISO2022
[] =
703 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
704 static const int32_t toISO2022Offs
[] =
705 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
706 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
707 static const int32_t fmISO2022Offs
[] =
708 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
711 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
712 static const uint8_t expectedIBM930
[] =
713 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
714 static const int32_t toIBM930Offs
[] =
715 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
716 static const int32_t fmIBM930Offs
[] =
717 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
719 /* 1 2 3 0 h1 h2 h3 . MBCS*/
720 static const uint8_t expectedIBM943
[] =
721 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
722 static const int32_t toIBM943Offs
[] =
723 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
724 static const int32_t fmIBM943Offs
[] =
725 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
727 /* 1 2 3 0 h1 h2 h3 . DBCS*/
728 static const uint8_t expectedIBM9027
[] =
729 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
730 static const int32_t toIBM9027Offs
[] =
731 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
733 /* 1 2 3 0 <?> <?> <?> . SBCS*/
734 static const uint8_t expectedIBM920
[] =
735 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
736 static const int32_t toIBM920Offs
[] =
737 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
739 /* 1 2 3 0 <?> <?> <?> . SBCS*/
740 static const uint8_t expectedISO88593
[] =
741 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
742 static const int32_t toISO88593Offs
[] =
743 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
745 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
746 static const uint8_t expectedLATIN1
[] =
747 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
748 static const int32_t toLATIN1Offs
[] =
749 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
753 static const uint8_t expectedUTF16BE
[] =
754 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
755 static const int32_t toUTF16BEOffs
[]=
756 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
757 static const int32_t fmUTF16BEOffs
[] =
758 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
760 static const uint8_t expectedUTF16LE
[] =
761 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
762 static const int32_t toUTF16LEOffs
[]=
763 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
764 static const int32_t fmUTF16LEOffs
[] =
765 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
767 static const uint8_t expectedUTF32BE
[] =
768 { 0x00, 0x00, 0x00, 0x31,
769 0x00, 0x00, 0x00, 0x32,
770 0x00, 0x00, 0x00, 0x33,
771 0x00, 0x00, 0x00, 0x00,
772 0x00, 0x00, 0x4e, 0x00,
773 0x00, 0x00, 0x4e, 0x8c,
774 0x00, 0x00, 0x4e, 0x09,
775 0x00, 0x00, 0x00, 0x2e,
776 0x00, 0x02, 0x00, 0x21 };
777 static const int32_t toUTF32BEOffs
[]=
778 { 0x00, 0x00, 0x00, 0x00,
779 0x01, 0x01, 0x01, 0x01,
780 0x02, 0x02, 0x02, 0x02,
781 0x03, 0x03, 0x03, 0x03,
782 0x04, 0x04, 0x04, 0x04,
783 0x05, 0x05, 0x05, 0x05,
784 0x06, 0x06, 0x06, 0x06,
785 0x07, 0x07, 0x07, 0x07,
786 0x08, 0x08, 0x08, 0x08,
787 0x08, 0x08, 0x08, 0x08 };
788 static const int32_t fmUTF32BEOffs
[] =
789 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
791 static const uint8_t expectedUTF32LE
[] =
792 { 0x31, 0x00, 0x00, 0x00,
793 0x32, 0x00, 0x00, 0x00,
794 0x33, 0x00, 0x00, 0x00,
795 0x00, 0x00, 0x00, 0x00,
796 0x00, 0x4e, 0x00, 0x00,
797 0x8c, 0x4e, 0x00, 0x00,
798 0x09, 0x4e, 0x00, 0x00,
799 0x2e, 0x00, 0x00, 0x00,
800 0x21, 0x00, 0x02, 0x00 };
801 static const int32_t toUTF32LEOffs
[]=
802 { 0x00, 0x00, 0x00, 0x00,
803 0x01, 0x01, 0x01, 0x01,
804 0x02, 0x02, 0x02, 0x02,
805 0x03, 0x03, 0x03, 0x03,
806 0x04, 0x04, 0x04, 0x04,
807 0x05, 0x05, 0x05, 0x05,
808 0x06, 0x06, 0x06, 0x06,
809 0x07, 0x07, 0x07, 0x07,
810 0x08, 0x08, 0x08, 0x08,
811 0x08, 0x08, 0x08, 0x08 };
812 static const int32_t fmUTF32LEOffs
[] =
813 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
818 /** Test chars #2 **/
820 /* Sahha [health], slashed h's */
821 static const UChar malteseUChars
[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
822 static const uint8_t expectedMaltese913
[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
825 static const UChar LMBCSUChars
[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
826 static const uint8_t expectedLMBCS
[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
827 static const int32_t toLMBCSOffs
[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
828 static const int32_t fmLMBCSOffs
[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
829 /*********************************** START OF CODE finally *************/
831 gInBufferSize
= insize
;
832 gOutBufferSize
= outsize
;
834 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize
, gOutBufferSize
);
838 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
839 expectedUTF8
, sizeof(expectedUTF8
), "UTF8", toUTF8Offs
,FALSE
);
841 log_verbose("Test surrogate behaviour for UTF8\n");
843 static const UChar testinput
[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
844 static const uint8_t expectedUTF8test2
[]= { 0xe2, 0x82, 0xac,
845 0xf0, 0x90, 0x90, 0x81,
848 static const int32_t offsets
[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
849 testConvertFromU(testinput
, UPRV_LENGTHOF(testinput
),
850 expectedUTF8test2
, sizeof(expectedUTF8test2
), "UTF8", offsets
,FALSE
);
855 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
857 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
858 expectedISO2022
, sizeof(expectedISO2022
), "ISO_2022", toISO2022Offs
,FALSE
);
862 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
863 expectedUTF16LE
, sizeof(expectedUTF16LE
), "utf-16le", toUTF16LEOffs
,FALSE
);
865 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
866 expectedUTF16BE
, sizeof(expectedUTF16BE
), "utf-16be", toUTF16BEOffs
,FALSE
);
868 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
869 expectedUTF32LE
, sizeof(expectedUTF32LE
), "utf-32le", toUTF32LEOffs
,FALSE
);
871 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
872 expectedUTF32BE
, sizeof(expectedUTF32BE
), "utf-32be", toUTF32BEOffs
,FALSE
);
875 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
876 expectedLATIN1
, sizeof(expectedLATIN1
), "LATIN_1", toLATIN1Offs
,FALSE
);
878 #if !UCONFIG_NO_LEGACY_CONVERSION
880 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
881 expectedIBM930
, sizeof(expectedIBM930
), "ibm-930", toIBM930Offs
,FALSE
);
883 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
884 expectedISO88593
, sizeof(expectedISO88593
), "iso-8859-3", toISO88593Offs
,FALSE
);
888 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
889 expectedIBM943
, sizeof(expectedIBM943
), "ibm-943", toIBM943Offs
,FALSE
);
891 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
892 expectedIBM9027
, sizeof(expectedIBM9027
), "@ibm9027", toIBM9027Offs
,FALSE
);
894 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
895 expectedIBM920
, sizeof(expectedIBM920
), "ibm-920", toIBM920Offs
,FALSE
);
897 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
898 expectedISO88593
, sizeof(expectedISO88593
), "iso-8859-3", toISO88593Offs
,FALSE
);
905 testConvertToU(expectedUTF8
, sizeof(expectedUTF8
),
906 sampleText
, UPRV_LENGTHOF(sampleText
), "utf8", fmUTF8Offs
,FALSE
);
907 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
909 testConvertToU(expectedISO2022
, sizeof(expectedISO2022
),
910 sampleText
, UPRV_LENGTHOF(sampleText
), "ISO_2022", fmISO2022Offs
,FALSE
);
914 testConvertToU(expectedUTF16LE
, sizeof(expectedUTF16LE
),
915 sampleText
, UPRV_LENGTHOF(sampleText
), "utf-16le", fmUTF16LEOffs
,FALSE
);
917 testConvertToU(expectedUTF16BE
, sizeof(expectedUTF16BE
),
918 sampleText
, UPRV_LENGTHOF(sampleText
), "utf-16be", fmUTF16BEOffs
,FALSE
);
920 testConvertToU(expectedUTF32LE
, sizeof(expectedUTF32LE
),
921 sampleText
, UPRV_LENGTHOF(sampleText
), "utf-32le", fmUTF32LEOffs
,FALSE
);
923 testConvertToU(expectedUTF32BE
, sizeof(expectedUTF32BE
),
924 sampleText
, UPRV_LENGTHOF(sampleText
), "utf-32be", fmUTF32BEOffs
,FALSE
);
926 #if !UCONFIG_NO_LEGACY_CONVERSION
928 testConvertToU(expectedIBM930
, sizeof(expectedIBM930
), sampleTextRoundTripUnmappable
,
929 UPRV_LENGTHOF(sampleTextRoundTripUnmappable
), "ibm-930", fmIBM930Offs
,FALSE
);
931 testConvertToU(expectedIBM943
, sizeof(expectedIBM943
),sampleTextRoundTripUnmappable
,
932 UPRV_LENGTHOF(sampleTextRoundTripUnmappable
), "ibm-943", fmIBM943Offs
,FALSE
);
935 /* Try it again to make sure it still works */
936 testConvertToU(expectedUTF16LE
, sizeof(expectedUTF16LE
),
937 sampleText
, UPRV_LENGTHOF(sampleText
), "utf-16le", fmUTF16LEOffs
,FALSE
);
939 #if !UCONFIG_NO_LEGACY_CONVERSION
940 testConvertToU(expectedMaltese913
, sizeof(expectedMaltese913
),
941 malteseUChars
, UPRV_LENGTHOF(malteseUChars
), "latin3", NULL
,FALSE
);
943 testConvertFromU(malteseUChars
, UPRV_LENGTHOF(malteseUChars
),
944 expectedMaltese913
, sizeof(expectedMaltese913
), "iso-8859-3", NULL
,FALSE
);
947 testConvertFromU(LMBCSUChars
, UPRV_LENGTHOF(LMBCSUChars
),
948 expectedLMBCS
, sizeof(expectedLMBCS
), "LMBCS-1", toLMBCSOffs
,FALSE
);
949 testConvertToU(expectedLMBCS
, sizeof(expectedLMBCS
),
950 LMBCSUChars
, UPRV_LENGTHOF(LMBCSUChars
), "LMBCS-1", fmLMBCSOffs
,FALSE
);
953 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
955 /* encode directly set D and set O */
956 static const uint8_t utf7
[] = {
963 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
964 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
966 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
968 static const UChar unicode
[] = {
970 Hi Mom -<WHITE SMILING FACE>-!
971 A<NOT IDENTICAL TO><ALPHA>.
973 [Japanese word "nihongo"]
975 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
976 0x41, 0x2262, 0x0391, 0x2e,
978 0x65e5, 0x672c, 0x8a9e
980 static const int32_t toUnicodeOffsets
[] = {
981 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
986 static const int32_t fromUnicodeOffsets
[] = {
987 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
988 11, 12, 12, 12, 13, 13, 13, 13, 14,
990 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
993 /* same but escaping set O (the exclamation mark) */
994 static const uint8_t utf7Restricted
[] = {
1001 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
1002 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
1004 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
1006 static const int32_t toUnicodeOffsetsR
[] = {
1007 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1012 static const int32_t fromUnicodeOffsetsR
[] = {
1013 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1014 11, 12, 12, 12, 13, 13, 13, 13, 14,
1016 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
1019 testConvertFromU(unicode
, UPRV_LENGTHOF(unicode
), utf7
, sizeof(utf7
), "UTF-7", fromUnicodeOffsets
,FALSE
);
1021 testConvertToU(utf7
, sizeof(utf7
), unicode
, UPRV_LENGTHOF(unicode
), "UTF-7", toUnicodeOffsets
,FALSE
);
1023 testConvertFromU(unicode
, UPRV_LENGTHOF(unicode
), utf7Restricted
, sizeof(utf7Restricted
), "UTF-7,version=1", fromUnicodeOffsetsR
,FALSE
);
1025 testConvertToU(utf7Restricted
, sizeof(utf7Restricted
), unicode
, UPRV_LENGTHOF(unicode
), "UTF-7,version=1", toUnicodeOffsetsR
,FALSE
);
1029 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1030 * modified according to RFC 2060,
1031 * and supplemented with the one example in RFC 2060 itself.
1034 static const uint8_t imap
[] = {
1045 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1046 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1048 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1050 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1051 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1052 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1053 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1055 static const UChar unicode
[] = {
1056 /* Hi Mom -<WHITE SMILING FACE>-!
1057 A<NOT IDENTICAL TO><ALPHA>.
1059 [Japanese word "nihongo"]
1066 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1067 0x41, 0x2262, 0x0391, 0x2e,
1069 0x65e5, 0x672c, 0x8a9e,
1071 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1072 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1073 0x2f, 0x65e5, 0x672c, 0x8a9e,
1074 0x2f, 0x53f0, 0x5317
1076 static const int32_t toUnicodeOffsets
[] = {
1077 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1082 38, 39, 40, 41, 42, 43,
1087 static const int32_t fromUnicodeOffsets
[] = {
1088 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1089 11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1091 16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1093 20, 21, 22, 23, 24, 25,
1095 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1096 35, 36, 36, 36, 37, 37, 37, 37, 37
1099 testConvertFromU(unicode
, UPRV_LENGTHOF(unicode
), imap
, sizeof(imap
), "IMAP-mailbox-name", fromUnicodeOffsets
,FALSE
);
1101 testConvertToU(imap
, sizeof(imap
), unicode
, UPRV_LENGTHOF(unicode
), "IMAP-mailbox-name", toUnicodeOffsets
,FALSE
);
1104 /* Test UTF-8 bad data handling*/
1106 static const uint8_t utf8
[]={
1108 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1111 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1112 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1113 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */
1114 0xdf, 0xbf, /* 7ff */
1115 0xbf, /* truncated tail */
1116 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */
1120 static const uint16_t utf8Expected
[]={
1134 static const int32_t utf8Offsets
[]={
1135 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
1137 testConvertToU(utf8
, sizeof(utf8
),
1138 utf8Expected
, UPRV_LENGTHOF(utf8Expected
), "utf-8", utf8Offsets
,FALSE
);
1142 /* Test UTF-32BE bad data handling*/
1144 static const uint8_t utf32
[]={
1145 0x00, 0x00, 0x00, 0x61,
1146 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
1147 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1148 0x00, 0x00, 0x00, 0x62,
1149 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1150 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
1151 0x00, 0x00, 0x01, 0x62,
1152 0x00, 0x00, 0x02, 0x62
1154 static const uint16_t utf32Expected
[]={
1156 0xfffd, /* 0x110000 out of range */
1157 0xDBFF, /* 0x10FFFF in range */
1160 0xfffd, /* 0xffffffff out of range */
1161 0xfffd, /* 0x7fffffff out of range */
1165 static const int32_t utf32Offsets
[]={
1166 0, 4, 8, 8, 12, 16, 20, 24, 28
1168 static const uint8_t utf32ExpectedBack
[]={
1169 0x00, 0x00, 0x00, 0x61,
1170 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */
1171 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1172 0x00, 0x00, 0x00, 0x62,
1173 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */
1174 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */
1175 0x00, 0x00, 0x01, 0x62,
1176 0x00, 0x00, 0x02, 0x62
1178 static const int32_t utf32OffsetsBack
[]={
1189 testConvertToU(utf32
, sizeof(utf32
),
1190 utf32Expected
, UPRV_LENGTHOF(utf32Expected
), "utf-32be", utf32Offsets
,FALSE
);
1191 testConvertFromU(utf32Expected
, UPRV_LENGTHOF(utf32Expected
),
1192 utf32ExpectedBack
, sizeof(utf32ExpectedBack
), "utf-32be", utf32OffsetsBack
, FALSE
);
1195 /* Test UTF-32LE bad data handling*/
1197 static const uint8_t utf32
[]={
1198 0x61, 0x00, 0x00, 0x00,
1199 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
1200 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1201 0x62, 0x00, 0x00, 0x00,
1202 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1203 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
1204 0x62, 0x01, 0x00, 0x00,
1205 0x62, 0x02, 0x00, 0x00,
1208 static const uint16_t utf32Expected
[]={
1210 0xfffd, /* 0x110000 out of range */
1211 0xDBFF, /* 0x10FFFF in range */
1214 0xfffd, /* 0xffffffff out of range */
1215 0xfffd, /* 0x7fffffff out of range */
1219 static const int32_t utf32Offsets
[]={
1220 0, 4, 8, 8, 12, 16, 20, 24, 28
1222 static const uint8_t utf32ExpectedBack
[]={
1223 0x61, 0x00, 0x00, 0x00,
1224 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */
1225 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1226 0x62, 0x00, 0x00, 0x00,
1227 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */
1228 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */
1229 0x62, 0x01, 0x00, 0x00,
1230 0x62, 0x02, 0x00, 0x00
1232 static const int32_t utf32OffsetsBack
[]={
1242 testConvertToU(utf32
, sizeof(utf32
),
1243 utf32Expected
, UPRV_LENGTHOF(utf32Expected
), "utf-32le", utf32Offsets
,FALSE
);
1244 testConvertFromU(utf32Expected
, UPRV_LENGTHOF(utf32Expected
),
1245 utf32ExpectedBack
, sizeof(utf32ExpectedBack
), "utf-32le", utf32OffsetsBack
, FALSE
);
1249 static void TestCoverageMBCS(){
1251 UErrorCode status
= U_ZERO_ERROR
;
1252 const char *directory
= loadTestData(&status
);
1253 char* tdpath
= NULL
;
1254 char* saveDirectory
= (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1255 int len
= strlen(directory
);
1258 tdpath
= (char*) malloc(sizeof(char) * (len
* 2));
1259 uprv_strcpy(saveDirectory
,u_getDataDirectory());
1260 log_verbose("Retrieved data directory %s \n",saveDirectory
);
1261 uprv_strcpy(tdpath
,directory
);
1262 index
=strrchr(tdpath
,(char)U_FILE_SEP_CHAR
);
1264 if((unsigned int)(index
-tdpath
) != (strlen(tdpath
)-1)){
1267 u_setDataDirectory(tdpath
);
1268 log_verbose("ICU data directory is set to: %s \n" ,tdpath
);
1271 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
1272 which is test file for MBCS conversion with single-byte codepage data.*/
1275 /* MBCS with single byte codepage data test1.ucm*/
1276 const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1277 const uint8_t expectedtest1
[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1278 int32_t totest1Offs
[] = { 0, 1, 2, 3, 5, };
1281 testConvertFromU(unicodeInput
, UPRV_LENGTHOF(unicodeInput
),
1282 expectedtest1
, sizeof(expectedtest1
), "@test1", totest1Offs
,FALSE
);
1285 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
1286 which is test file for MBCS conversion with three-byte codepage data.*/
1289 /* MBCS with three byte codepage data test3.ucm*/
1290 const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1291 const uint8_t expectedtest3
[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,};
1292 int32_t totest3Offs
[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1294 const uint8_t test3input
[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1295 const UChar expectedUnicode
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1296 int32_t fromtest3Offs
[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1299 testConvertFromU(unicodeInput
, UPRV_LENGTHOF(unicodeInput
),
1300 expectedtest3
, sizeof(expectedtest3
), "@test3", totest3Offs
,FALSE
);
1303 testConvertToU(test3input
, sizeof(test3input
),
1304 expectedUnicode
, UPRV_LENGTHOF(expectedUnicode
), "@test3", fromtest3Offs
,FALSE
);
1308 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
1309 which is test file for MBCS conversion with four-byte codepage data.*/
1312 /* MBCS with three byte codepage data test4.ucm*/
1313 static const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1314 static const uint8_t expectedtest4
[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,};
1315 static const int32_t totest4Offs
[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1317 static const uint8_t test4input
[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1318 static const UChar expectedUnicode
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1319 static const int32_t fromtest4Offs
[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1322 testConvertFromU(unicodeInput
, UPRV_LENGTHOF(unicodeInput
),
1323 expectedtest4
, sizeof(expectedtest4
), "@test4", totest4Offs
,FALSE
);
1326 testConvertToU(test4input
, sizeof(test4input
),
1327 expectedUnicode
, UPRV_LENGTHOF(expectedUnicode
), "@test4", fromtest4Offs
,FALSE
);
1332 /* restore the original data directory */
1333 log_verbose("Setting the data directory to %s \n", saveDirectory
);
1334 u_setDataDirectory(saveDirectory
);
1335 free(saveDirectory
);
1340 static void TestConverterType(const char *convName
, UConverterType convType
) {
1341 UConverter
* myConverter
;
1342 UErrorCode err
= U_ZERO_ERROR
;
1344 myConverter
= my_ucnv_open(convName
, &err
);
1346 if (U_FAILURE(err
)) {
1347 log_data_err("Failed to create an %s converter\n", convName
);
1352 if (ucnv_getType(myConverter
)!=convType
) {
1353 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1354 convName
, convType
);
1357 log_verbose("ucnv_getType %s ok\n", convName
);
1360 ucnv_close(myConverter
);
1363 static void TestConverterTypesAndStarters()
1365 #if !UCONFIG_NO_LEGACY_CONVERSION
1366 UConverter
* myConverter
;
1367 UErrorCode err
= U_ZERO_ERROR
;
1368 UBool mystarters
[256];
1370 /* const UBool expectedKSCstarters[256] = {
1371 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1372 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1373 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1374 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1375 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1376 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1377 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1378 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1379 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1380 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1381 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1382 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1383 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1384 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1385 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1386 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1387 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1388 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1389 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1390 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1391 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1392 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1393 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1394 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1395 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1396 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1399 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types.");
1401 myConverter
= ucnv_open("ksc", &err
);
1402 if (U_FAILURE(err
)) {
1403 log_data_err("Failed to create an ibm-ksc converter\n");
1408 if (ucnv_getType(myConverter
)!=UCNV_MBCS
)
1409 log_err("ucnv_getType Failed for ibm-949\n");
1411 log_verbose("ucnv_getType ibm-949 ok\n");
1413 if(myConverter
!=NULL
)
1414 ucnv_getStarters(myConverter
, mystarters
, &err
);
1416 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1417 log_err("Failed ucnv_getStarters for ksc\n");
1419 log_verbose("ucnv_getStarters ok\n");*/
1422 ucnv_close(myConverter
);
1424 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL
);
1425 TestConverterType("ibm-878", UCNV_SBCS
);
1428 TestConverterType("iso-8859-1", UCNV_LATIN_1
);
1430 TestConverterType("ibm-1208", UCNV_UTF8
);
1432 TestConverterType("utf-8", UCNV_UTF8
);
1433 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian
);
1434 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian
);
1435 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian
);
1436 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian
);
1438 #if !UCONFIG_NO_LEGACY_CONVERSION
1440 #if defined(U_ENABLE_GENERIC_ISO_2022)
1441 TestConverterType("iso-2022", UCNV_ISO_2022
);
1444 TestConverterType("hz", UCNV_HZ
);
1447 TestConverterType("scsu", UCNV_SCSU
);
1449 #if !UCONFIG_NO_LEGACY_CONVERSION
1450 TestConverterType("x-iscii-de", UCNV_ISCII
);
1453 TestConverterType("ascii", UCNV_US_ASCII
);
1454 TestConverterType("utf-7", UCNV_UTF7
);
1455 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX
);
1456 TestConverterType("bocu-1", UCNV_BOCU1
);
1460 TestAmbiguousConverter(UConverter
*cnv
) {
1461 static const char inBytes
[3]={ 0x61, 0x5B, 0x5c };
1462 UChar outUnicode
[20]={ 0, 0, 0, 0 };
1466 UErrorCode errorCode
;
1469 /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1470 errorCode
=U_ZERO_ERROR
;
1473 ucnv_toUnicode(cnv
, &u
, u
+20, &s
, s
+3, NULL
, TRUE
, &errorCode
);
1474 if(U_FAILURE(errorCode
)) {
1475 /* we do not care about general failures in this test; the input may just not be mappable */
1479 if(outUnicode
[0]!=0x61 || outUnicode
[1]!=0x5B || outUnicode
[2]==0xfffd) {
1480 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1481 /* There are some encodings that are partially ASCII based,
1482 like the ISO-7 and GSM series of codepages, which we ignore. */
1486 isAmbiguous
=ucnv_isAmbiguous(cnv
);
1488 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1489 if((outUnicode
[2]!=0x5c)!=isAmbiguous
) {
1490 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1491 ucnv_getName(cnv
, &errorCode
), outUnicode
[2]!=0x5c, isAmbiguous
);
1495 if(outUnicode
[2]!=0x5c) {
1496 /* needs fixup, fix it */
1497 ucnv_fixFileSeparator(cnv
, outUnicode
, (int32_t)(u
-outUnicode
));
1498 if(outUnicode
[2]!=0x5c) {
1499 /* the fix failed */
1500 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv
, &errorCode
));
1506 static void TestAmbiguous()
1508 UErrorCode status
= U_ZERO_ERROR
;
1509 UConverter
*ascii_cnv
= 0, *sjis_cnv
= 0, *cnv
;
1510 static const char target
[] = {
1511 /* "\\usr\\local\\share\\data\\icutest.txt" */
1512 0x5c, 0x75, 0x73, 0x72,
1513 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1514 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1515 0x5c, 0x64, 0x61, 0x74, 0x61,
1516 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1519 UChar asciiResult
[200], sjisResult
[200];
1520 int32_t /*asciiLength = 0,*/ sjisLength
= 0, i
;
1523 /* enumerate all converters */
1524 status
=U_ZERO_ERROR
;
1525 for(i
=0; (name
=ucnv_getAvailableName(i
))!=NULL
; ++i
) {
1526 cnv
=ucnv_open(name
, &status
);
1527 if(U_SUCCESS(status
)) {
1528 TestAmbiguousConverter(cnv
);
1531 log_err("error: unable to open available converter \"%s\"\n", name
);
1532 status
=U_ZERO_ERROR
;
1536 #if !UCONFIG_NO_LEGACY_CONVERSION
1537 sjis_cnv
= ucnv_open("ibm-943", &status
);
1538 if (U_FAILURE(status
))
1540 log_data_err("Failed to create a SJIS converter\n");
1543 ascii_cnv
= ucnv_open("LATIN-1", &status
);
1544 if (U_FAILURE(status
))
1546 log_data_err("Failed to create a LATIN-1 converter\n");
1547 ucnv_close(sjis_cnv
);
1550 /* convert target from SJIS to Unicode */
1551 sjisLength
= ucnv_toUChars(sjis_cnv
, sjisResult
, UPRV_LENGTHOF(sjisResult
), target
, (int32_t)strlen(target
), &status
);
1552 if (U_FAILURE(status
))
1554 log_err("Failed to convert the SJIS string.\n");
1555 ucnv_close(sjis_cnv
);
1556 ucnv_close(ascii_cnv
);
1559 /* convert target from Latin-1 to Unicode */
1560 /*asciiLength =*/ ucnv_toUChars(ascii_cnv
, asciiResult
, UPRV_LENGTHOF(asciiResult
), target
, (int32_t)strlen(target
), &status
);
1561 if (U_FAILURE(status
))
1563 log_err("Failed to convert the Latin-1 string.\n");
1564 ucnv_close(sjis_cnv
);
1565 ucnv_close(ascii_cnv
);
1568 if (!ucnv_isAmbiguous(sjis_cnv
))
1570 log_err("SJIS converter should contain ambiguous character mappings.\n");
1571 ucnv_close(sjis_cnv
);
1572 ucnv_close(ascii_cnv
);
1575 if (u_strcmp(sjisResult
, asciiResult
) == 0)
1577 log_err("File separators for SJIS don't need to be fixed.\n");
1579 ucnv_fixFileSeparator(sjis_cnv
, sjisResult
, sjisLength
);
1580 if (u_strcmp(sjisResult
, asciiResult
) != 0)
1582 log_err("Fixing file separator for SJIS failed.\n");
1584 ucnv_close(sjis_cnv
);
1585 ucnv_close(ascii_cnv
);
1590 TestSignatureDetection(){
1591 /* with null terminated strings */
1593 static const char* data
[] = {
1594 "\xFE\xFF\x00\x00", /* UTF-16BE */
1595 "\xFF\xFE\x00\x00", /* UTF-16LE */
1596 "\xEF\xBB\xBF\x00", /* UTF-8 */
1597 "\x0E\xFE\xFF\x00", /* SCSU */
1599 "\xFE\xFF", /* UTF-16BE */
1600 "\xFF\xFE", /* UTF-16LE */
1601 "\xEF\xBB\xBF", /* UTF-8 */
1602 "\x0E\xFE\xFF", /* SCSU */
1604 "\xFE\xFF\x41\x42", /* UTF-16BE */
1605 "\xFF\xFE\x41\x41", /* UTF-16LE */
1606 "\xEF\xBB\xBF\x41", /* UTF-8 */
1607 "\x0E\xFE\xFF\x41", /* SCSU */
1609 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */
1610 "\x2B\x2F\x76\x38\x41", /* UTF-7 */
1611 "\x2B\x2F\x76\x39\x41", /* UTF-7 */
1612 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */
1613 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */
1615 "\xDD\x73\x66\x73" /* UTF-EBCDIC */
1617 static const char* expected
[] = {
1640 static const int32_t expectedLength
[] ={
1665 int32_t signatureLength
= -1;
1666 const char* source
= NULL
;
1667 const char* enc
= NULL
;
1668 for( ; i
<UPRV_LENGTHOF(data
); i
++){
1671 enc
= ucnv_detectUnicodeSignature(source
, -1 , &signatureLength
, &err
);
1673 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source
,i
,u_errorName(err
));
1676 if(enc
== NULL
|| strcmp(enc
,expected
[i
]) !=0){
1677 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source
,i
,expected
[i
],enc
);
1680 if(signatureLength
!= expectedLength
[i
]){
1681 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source
,i
,signatureLength
,expectedLength
[i
]);
1686 static const char* data
[] = {
1687 "\xFE\xFF\x00", /* UTF-16BE */
1688 "\xFF\xFE\x00", /* UTF-16LE */
1689 "\xEF\xBB\xBF\x00", /* UTF-8 */
1690 "\x0E\xFE\xFF\x00", /* SCSU */
1691 "\x00\x00\xFE\xFF", /* UTF-32BE */
1692 "\xFF\xFE\x00\x00", /* UTF-32LE */
1693 "\xFE\xFF", /* UTF-16BE */
1694 "\xFF\xFE", /* UTF-16LE */
1695 "\xEF\xBB\xBF", /* UTF-8 */
1696 "\x0E\xFE\xFF", /* SCSU */
1697 "\x00\x00\xFE\xFF", /* UTF-32BE */
1698 "\xFF\xFE\x00\x00", /* UTF-32LE */
1699 "\xFE\xFF\x41\x42", /* UTF-16BE */
1700 "\xFF\xFE\x41\x41", /* UTF-16LE */
1701 "\xEF\xBB\xBF\x41", /* UTF-8 */
1702 "\x0E\xFE\xFF\x41", /* SCSU */
1703 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1704 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1705 "\xFB\xEE\x28", /* BOCU-1 */
1706 "\xFF\x41\x42" /* NULL */
1708 static const int len
[] = {
1731 static const char* expected
[] = {
1753 static const int32_t expectedLength
[] ={
1777 int32_t signatureLength
= -1;
1778 int32_t sourceLength
=-1;
1779 const char* source
= NULL
;
1780 const char* enc
= NULL
;
1781 for( ; i
<UPRV_LENGTHOF(data
); i
++){
1784 sourceLength
= len
[i
];
1785 enc
= ucnv_detectUnicodeSignature(source
, sourceLength
, &signatureLength
, &err
);
1787 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source
,i
,u_errorName(err
));
1790 if(enc
== NULL
|| strcmp(enc
,expected
[i
]) !=0){
1791 if(expected
[i
] !=NULL
){
1792 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source
,i
,expected
[i
],enc
);
1796 if(signatureLength
!= expectedLength
[i
]){
1797 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source
,i
,signatureLength
,expectedLength
[i
]);
1803 static void TestUTF7() {
1805 static const uint8_t in
[]={
1806 /* H - +Jjo- - ! +- +2AHcAQ */
1809 0x2b, 0x4a, 0x6a, 0x6f,
1813 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1816 /* expected test results */
1817 static const int32_t results
[]={
1818 /* number of bytes read, code point */
1821 4, 0x263a, /* <WHITE SMILING FACE> */
1828 const char *cnvName
;
1829 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
1830 UErrorCode errorCode
=U_ZERO_ERROR
;
1831 UConverter
*cnv
=ucnv_open("UTF-7", &errorCode
);
1832 if(U_FAILURE(errorCode
)) {
1833 log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode
));
1836 TestNextUChar(cnv
, source
, limit
, results
, "UTF-7");
1837 /* Test the condition when source >= sourceLimit */
1838 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1839 cnvName
= ucnv_getName(cnv
, &errorCode
);
1840 if (U_FAILURE(errorCode
) || uprv_strcmp(cnvName
, "UTF-7") != 0) {
1841 log_err("UTF-7 converter is called %s: %s\n", cnvName
, u_errorName(errorCode
));
1846 static void TestIMAP() {
1848 static const uint8_t in
[]={
1849 /* H - &Jjo- - ! &- &2AHcAQ- \ */
1852 0x26, 0x4a, 0x6a, 0x6f,
1856 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1859 /* expected test results */
1860 static const int32_t results
[]={
1861 /* number of bytes read, code point */
1864 4, 0x263a, /* <WHITE SMILING FACE> */
1871 const char *cnvName
;
1872 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
1873 UErrorCode errorCode
=U_ZERO_ERROR
;
1874 UConverter
*cnv
=ucnv_open("IMAP-mailbox-name", &errorCode
);
1875 if(U_FAILURE(errorCode
)) {
1876 log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode
));
1879 TestNextUChar(cnv
, source
, limit
, results
, "IMAP-mailbox-name");
1880 /* Test the condition when source >= sourceLimit */
1881 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1882 cnvName
= ucnv_getName(cnv
, &errorCode
);
1883 if (U_FAILURE(errorCode
) || uprv_strcmp(cnvName
, "IMAP-mailbox-name") != 0) {
1884 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName
, u_errorName(errorCode
));
1889 static void TestUTF8() {
1891 static const uint8_t in
[]={
1895 0xf0, 0x90, 0x80, 0x80,
1896 0xf4, 0x84, 0x8c, 0xa1,
1897 0xf0, 0x90, 0x90, 0x81
1900 /* expected test results */
1901 static const int32_t results
[]={
1902 /* number of bytes read, code point */
1911 /* error test input */
1912 static const uint8_t in2
[]={
1914 0xc0, 0x80, /* illegal non-shortest form */
1915 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1916 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1917 0xc0, 0xc0, /* illegal trail byte */
1918 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1919 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1920 0xfe, /* illegal byte altogether */
1924 /* expected error test results */
1925 static const int32_t results2
[]={
1926 /* number of bytes read, code point */
1931 UConverterToUCallback cb
;
1934 const char *source
=(const char *)in
,*limit
=(const char *)in
+sizeof(in
);
1935 UErrorCode errorCode
=U_ZERO_ERROR
;
1936 UConverter
*cnv
=ucnv_open("UTF-8", &errorCode
);
1937 if(U_FAILURE(errorCode
)) {
1938 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode
));
1941 TestNextUChar(cnv
, source
, limit
, results
, "UTF-8");
1942 /* Test the condition when source >= sourceLimit */
1943 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1945 /* test error behavior with a skip callback */
1946 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
1947 source
=(const char *)in2
;
1948 limit
=(const char *)(in2
+sizeof(in2
));
1949 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-8");
1954 static void TestCESU8() {
1956 static const uint8_t in
[]={
1960 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1961 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1962 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1966 /* expected test results */
1967 static const int32_t results
[]={
1968 /* number of bytes read, code point */
1974 -1,0xd802, /* may read 3 or 6 bytes */
1975 -1,0x10ffff,/* may read 0 or 3 bytes */
1979 /* error test input */
1980 static const uint8_t in2
[]={
1982 0xc0, 0x80, /* illegal non-shortest form */
1983 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1984 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1985 0xc0, 0xc0, /* illegal trail byte */
1986 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */
1987 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */
1988 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */
1989 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1990 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1991 0xfe, /* illegal byte altogether */
1995 /* expected error test results */
1996 static const int32_t results2
[]={
1997 /* number of bytes read, code point */
2002 UConverterToUCallback cb
;
2005 const char *source
=(const char *)in
,*limit
=(const char *)in
+sizeof(in
);
2006 UErrorCode errorCode
=U_ZERO_ERROR
;
2007 UConverter
*cnv
=ucnv_open("CESU-8", &errorCode
);
2008 if(U_FAILURE(errorCode
)) {
2009 log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode
));
2012 TestNextUChar(cnv
, source
, limit
, results
, "CESU-8");
2013 /* Test the condition when source >= sourceLimit */
2014 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2016 /* test error behavior with a skip callback */
2017 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
2018 source
=(const char *)in2
;
2019 limit
=(const char *)(in2
+sizeof(in2
));
2020 TestNextUChar(cnv
, source
, limit
, results2
, "CESU-8");
2025 static void TestUTF16() {
2027 static const uint8_t in1
[]={
2028 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2030 static const uint8_t in2
[]={
2031 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2033 static const uint8_t in3
[]={
2034 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2037 /* expected test results */
2038 static const int32_t results1
[]={
2039 /* number of bytes read, code point */
2043 static const int32_t results2
[]={
2044 /* number of bytes read, code point */
2048 static const int32_t results3
[]={
2049 /* number of bytes read, code point */
2056 const char *source
, *limit
;
2058 UErrorCode errorCode
=U_ZERO_ERROR
;
2059 UConverter
*cnv
=ucnv_open("UTF-16", &errorCode
);
2060 if(U_FAILURE(errorCode
)) {
2061 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode
));
2065 source
=(const char *)in1
, limit
=(const char *)in1
+sizeof(in1
);
2066 TestNextUChar(cnv
, source
, limit
, results1
, "UTF-16");
2068 source
=(const char *)in2
, limit
=(const char *)in2
+sizeof(in2
);
2069 ucnv_resetToUnicode(cnv
);
2070 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-16");
2072 source
=(const char *)in3
, limit
=(const char *)in3
+sizeof(in3
);
2073 ucnv_resetToUnicode(cnv
);
2074 TestNextUChar(cnv
, source
, limit
, results3
, "UTF-16");
2076 /* Test the condition when source >= sourceLimit */
2077 ucnv_resetToUnicode(cnv
);
2078 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2083 static void TestUTF16BE() {
2085 static const uint8_t in
[]={
2091 0xd8, 0x01, 0xdc, 0x01
2094 /* expected test results */
2095 static const int32_t results
[]={
2096 /* number of bytes read, code point */
2105 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2106 UErrorCode errorCode
=U_ZERO_ERROR
;
2107 UConverter
*cnv
=ucnv_open("utf-16be", &errorCode
);
2108 if(U_FAILURE(errorCode
)) {
2109 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode
));
2112 TestNextUChar(cnv
, source
, limit
, results
, "UTF-16BE");
2113 /* Test the condition when source >= sourceLimit */
2114 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2115 /*Test for the condition where there is an invalid character*/
2117 static const uint8_t source2
[]={0x61};
2118 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2119 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an invalid character");
2123 * Test disabled because currently the UTF-16BE/LE converters are supposed
2124 * to not set errors for unpaired surrogates.
2125 * This may change with
2126 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2129 /*Test for the condition where there is a surrogate pair*/
2131 const uint8_t source2
[]={0xd8, 0x01};
2132 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an truncated surrogate character");
2141 static const uint8_t in
[]={
2146 0x01, 0xd8, 0x01, 0xdc
2149 /* expected test results */
2150 static const int32_t results
[]={
2151 /* number of bytes read, code point */
2159 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2160 UErrorCode errorCode
=U_ZERO_ERROR
;
2161 UConverter
*cnv
=ucnv_open("utf-16le", &errorCode
);
2162 if(U_FAILURE(errorCode
)) {
2163 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode
));
2166 TestNextUChar(cnv
, source
, limit
, results
, "UTF-16LE");
2167 /* Test the condition when source >= sourceLimit */
2168 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2169 /*Test for the condition where there is an invalid character*/
2171 static const uint8_t source2
[]={0x61};
2172 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2173 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an invalid character");
2177 * Test disabled because currently the UTF-16BE/LE converters are supposed
2178 * to not set errors for unpaired surrogates.
2179 * This may change with
2180 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2183 /*Test for the condition where there is a surrogate character*/
2185 static const uint8_t source2
[]={0x01, 0xd8};
2186 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an truncated surrogate character");
2193 static void TestUTF32() {
2195 static const uint8_t in1
[]={
2196 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff
2198 static const uint8_t in2
[]={
2199 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00
2201 static const uint8_t in3
[]={
2202 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01
2205 /* expected test results */
2206 static const int32_t results1
[]={
2207 /* number of bytes read, code point */
2211 static const int32_t results2
[]={
2212 /* number of bytes read, code point */
2216 static const int32_t results3
[]={
2217 /* number of bytes read, code point */
2220 4, 0xfffd, /* unmatched surrogate */
2221 4, 0xfffd /* unmatched surrogate */
2224 const char *source
, *limit
;
2226 UErrorCode errorCode
=U_ZERO_ERROR
;
2227 UConverter
*cnv
=ucnv_open("UTF-32", &errorCode
);
2228 if(U_FAILURE(errorCode
)) {
2229 log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode
));
2233 source
=(const char *)in1
, limit
=(const char *)in1
+sizeof(in1
);
2234 TestNextUChar(cnv
, source
, limit
, results1
, "UTF-32");
2236 source
=(const char *)in2
, limit
=(const char *)in2
+sizeof(in2
);
2237 ucnv_resetToUnicode(cnv
);
2238 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32");
2240 source
=(const char *)in3
, limit
=(const char *)in3
+sizeof(in3
);
2241 ucnv_resetToUnicode(cnv
);
2242 TestNextUChar(cnv
, source
, limit
, results3
, "UTF-32");
2244 /* Test the condition when source >= sourceLimit */
2245 ucnv_resetToUnicode(cnv
);
2246 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2254 static const uint8_t in
[]={
2255 0x00, 0x00, 0x00, 0x61,
2256 0x00, 0x00, 0x30, 0x61,
2257 0x00, 0x00, 0xdc, 0x00,
2258 0x00, 0x00, 0xd8, 0x00,
2259 0x00, 0x00, 0xdf, 0xff,
2260 0x00, 0x00, 0xff, 0xfe,
2261 0x00, 0x10, 0xab, 0xcd,
2262 0x00, 0x10, 0xff, 0xff
2265 /* expected test results */
2266 static const int32_t results
[]={
2267 /* number of bytes read, code point */
2278 /* error test input */
2279 static const uint8_t in2
[]={
2280 0x00, 0x00, 0x00, 0x61,
2281 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
2282 0x00, 0x00, 0x00, 0x62,
2283 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2284 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
2285 0x00, 0x00, 0x01, 0x62,
2286 0x00, 0x00, 0x02, 0x62
2289 /* expected error test results */
2290 static const int32_t results2
[]={
2291 /* number of bytes read, code point */
2298 UConverterToUCallback cb
;
2301 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2302 UErrorCode errorCode
=U_ZERO_ERROR
;
2303 UConverter
*cnv
=ucnv_open("UTF-32BE", &errorCode
);
2304 if(U_FAILURE(errorCode
)) {
2305 log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode
));
2308 TestNextUChar(cnv
, source
, limit
, results
, "UTF-32BE");
2310 /* Test the condition when source >= sourceLimit */
2311 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2313 /* test error behavior with a skip callback */
2314 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
2315 source
=(const char *)in2
;
2316 limit
=(const char *)(in2
+sizeof(in2
));
2317 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32BE");
2325 static const uint8_t in
[]={
2326 0x61, 0x00, 0x00, 0x00,
2327 0x61, 0x30, 0x00, 0x00,
2328 0x00, 0xdc, 0x00, 0x00,
2329 0x00, 0xd8, 0x00, 0x00,
2330 0xff, 0xdf, 0x00, 0x00,
2331 0xfe, 0xff, 0x00, 0x00,
2332 0xcd, 0xab, 0x10, 0x00,
2333 0xff, 0xff, 0x10, 0x00
2336 /* expected test results */
2337 static const int32_t results
[]={
2338 /* number of bytes read, code point */
2349 /* error test input */
2350 static const uint8_t in2
[]={
2351 0x61, 0x00, 0x00, 0x00,
2352 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
2353 0x62, 0x00, 0x00, 0x00,
2354 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2355 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
2356 0x62, 0x01, 0x00, 0x00,
2357 0x62, 0x02, 0x00, 0x00,
2360 /* expected error test results */
2361 static const int32_t results2
[]={
2362 /* number of bytes read, code point */
2369 UConverterToUCallback cb
;
2372 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2373 UErrorCode errorCode
=U_ZERO_ERROR
;
2374 UConverter
*cnv
=ucnv_open("UTF-32LE", &errorCode
);
2375 if(U_FAILURE(errorCode
)) {
2376 log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode
));
2379 TestNextUChar(cnv
, source
, limit
, results
, "UTF-32LE");
2381 /* Test the condition when source >= sourceLimit */
2382 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2384 /* test error behavior with a skip callback */
2385 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
2386 source
=(const char *)in2
;
2387 limit
=(const char *)(in2
+sizeof(in2
));
2388 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32LE");
2396 static const uint8_t in
[]={
2405 /* expected test results */
2406 static const int32_t results
[]={
2407 /* number of bytes read, code point */
2415 static const uint16_t in1
[] = {
2416 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2417 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2418 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2419 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2420 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2421 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2422 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2423 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2424 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2425 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2426 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2429 static const uint8_t out1
[] = {
2430 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2431 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2432 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2433 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2434 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2435 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2436 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2437 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2438 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2439 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2440 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2443 static const uint16_t in2
[]={
2444 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2445 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2446 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2447 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2448 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2449 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2450 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2451 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2452 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2453 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2454 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2455 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2456 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2457 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2458 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2459 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2460 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2461 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2462 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2463 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2464 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2465 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2466 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2467 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2468 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2469 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2470 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2471 0x37, 0x20, 0x2A, 0x2F,
2473 static const unsigned char out2
[]={
2474 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2475 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2476 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2477 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2478 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2479 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2480 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2481 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2482 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2483 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2484 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2485 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2486 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2487 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2488 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2489 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2490 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2491 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2492 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2493 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2494 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2495 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2496 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2497 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2498 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2499 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2500 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2501 0x37, 0x20, 0x2A, 0x2F,
2503 const char *source
=(const char *)in
;
2504 const char *limit
=(const char *)in
+sizeof(in
);
2506 UErrorCode errorCode
=U_ZERO_ERROR
;
2507 UConverter
*cnv
=ucnv_open("LATIN_1", &errorCode
);
2508 if(U_FAILURE(errorCode
)) {
2509 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode
));
2512 TestNextUChar(cnv
, source
, limit
, results
, "LATIN_1");
2513 /* Test the condition when source >= sourceLimit */
2514 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2515 TestConv((uint16_t*)in1
,sizeof(in1
)/2,"LATIN_1","LATIN-1",(char*)out1
,sizeof(out1
));
2516 TestConv((uint16_t*)in2
,sizeof(in2
)/2,"ASCII","ASCII",(char*)out2
,sizeof(out2
));
2524 static const uint8_t in
[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2525 /* expected test results */
2526 static const int32_t results
[]={
2527 /* number of bytes read, code point */
2536 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2537 UErrorCode errorCode
=U_ZERO_ERROR
;
2538 UConverter
*cnv
=ucnv_open("x-mac-turkish", &errorCode
);
2539 if(U_FAILURE(errorCode
)) {
2540 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode
));
2543 TestNextUChar(cnv
, source
, limit
, results
, "SBCS(x-mac-turkish)");
2544 /* Test the condition when source >= sourceLimit */
2545 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2546 /*Test for Illegal character */ /*
2548 static const uint8_t input1[]={ 0xA1 };
2549 const char* illegalsource=(const char*)input1;
2550 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2559 static const uint8_t in
[]={
2568 /* expected test results */
2569 static const int32_t results
[]={
2570 /* number of bytes read, code point */
2578 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2579 UErrorCode errorCode
=U_ZERO_ERROR
;
2581 UConverter
*cnv
=my_ucnv_open("@ibm9027", &errorCode
);
2582 if(U_FAILURE(errorCode
)) {
2583 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode
));
2586 TestNextUChar(cnv
, source
, limit
, results
, "DBCS(@ibm9027)");
2587 /* Test the condition when source >= sourceLimit */
2588 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2589 /*Test for the condition where there is an invalid character*/
2591 static const uint8_t source2
[]={0x1a, 0x1b};
2592 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character");
2594 /*Test for the condition where we have a truncated char*/
2596 static const uint8_t source1
[]={0xc4};
2597 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2598 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2606 static const uint8_t in
[]={
2617 /* expected test results */
2618 static const int32_t results
[]={
2619 /* number of bytes read, code point */
2629 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2630 UErrorCode errorCode
=U_ZERO_ERROR
;
2632 UConverter
*cnv
=ucnv_open("ibm-1363", &errorCode
);
2633 if(U_FAILURE(errorCode
)) {
2634 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode
));
2637 TestNextUChar(cnv
, source
, limit
, results
, "MBCS(ibm-1363)");
2638 /* Test the condition when source >= sourceLimit */
2639 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2640 /*Test for the condition where there is an invalid character*/
2642 static const uint8_t source2
[]={0xa1, 0x80};
2643 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character");
2645 /*Test for the condition where we have a truncated char*/
2647 static const uint8_t source1
[]={0xc4};
2648 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2649 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2655 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2658 /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2660 const char *cnvName
= "ibm-1363";
2661 UErrorCode status
= U_ZERO_ERROR
;
2662 const char sourceData
[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2663 /* UChar expectUData[] = { 0x00a1, 0x001a }; */
2664 const char *source
= sourceData
;
2665 const char *sourceLim
= sourceData
+sizeof(sourceData
);
2667 UConverter
*cnv
=ucnv_open(cnvName
, &status
);
2668 if(U_FAILURE(status
)) {
2669 log_data_err("Unable to open %s converter: %s\n", cnvName
, u_errorName(status
));
2675 UChar targetBuf
[256];
2676 UChar
*target
= targetBuf
;
2677 UChar
*targetLim
= target
+256;
2678 ucnv_toUnicode(cnv
, &target
, targetLim
, &source
, sourceLim
, NULL
, TRUE
, &status
);
2680 log_info("After convert: target@%d, source@%d, status%s\n",
2681 target
-targetBuf
, source
-sourceData
, u_errorName(status
));
2683 if(U_FAILURE(status
)) {
2684 log_err("Failed to convert: %s\n", u_errorName(status
));
2691 c1
=ucnv_getNextUChar(cnv
, &source
, sourceLim
, &status
);
2692 log_verbose("c1: U+%04X, source@%d, status %s\n", c1
, source
-sourceData
, u_errorName(status
));
2694 c2
=ucnv_getNextUChar(cnv
, &source
, sourceLim
, &status
);
2695 log_verbose("c2: U+%04X, source@%d, status %s\n", c2
, source
-sourceData
, u_errorName(status
));
2697 c3
=ucnv_getNextUChar(cnv
, &source
, sourceLim
, &status
);
2698 log_verbose("c3: U+%04X, source@%d, status %s\n", c3
, source
-sourceData
, u_errorName(status
));
2700 if(status
==U_INDEX_OUTOFBOUNDS_ERROR
&& c3
==0xFFFF) {
2701 log_verbose("OK\n");
2703 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2711 #ifdef U_ENABLE_GENERIC_ISO_2022
2716 static const uint8_t in
[]={
2723 0xf0, 0x90, 0x80, 0x80
2728 /* expected test results */
2729 static const int32_t results
[]={
2730 /* number of bytes read, code point */
2731 4, 0x0031, /* 4 bytes including the escape sequence */
2739 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2740 UErrorCode errorCode
=U_ZERO_ERROR
;
2743 cnv
=ucnv_open("ISO_2022", &errorCode
);
2744 if(U_FAILURE(errorCode
)) {
2745 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
2748 TestNextUChar(cnv
, source
, limit
, results
, "ISO_2022");
2750 /* Test the condition when source >= sourceLimit */
2751 TestNextUCharError(cnv
, source
, source
-1, U_ILLEGAL_ARGUMENT_ERROR
, "sourceLimit < source");
2752 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2753 /*Test for the condition where we have a truncated char*/
2755 static const uint8_t source1
[]={0xc4};
2756 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2757 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2759 /*Test for the condition where there is an invalid character*/
2761 static const uint8_t source2
[]={0xa1, 0x01};
2762 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ILLEGAL_CHAR_FOUND
, "an invalid character");
2770 TestSmallTargetBuffer(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2771 const UChar
* uSource
;
2772 const UChar
* uSourceLimit
;
2773 const char* cSource
;
2774 const char* cSourceLimit
;
2775 UChar
*uTargetLimit
=NULL
;
2778 const char *cTargetLimit
;
2780 UChar
*uBuf
; /*,*test;*/
2781 int32_t uBufSize
= 120;
2784 UErrorCode errorCode
=U_ZERO_ERROR
;
2785 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2786 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
2789 uSource
= (UChar
*) source
;
2790 uSourceLimit
=(const UChar
*)sourceLimit
;
2794 cTargetLimit
= cBuf
;
2795 uTargetLimit
= uBuf
;
2799 cTargetLimit
= cTargetLimit
+ i
;
2800 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,FALSE
, &errorCode
);
2801 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2802 errorCode
=U_ZERO_ERROR
;
2806 if(U_FAILURE(errorCode
)){
2807 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2811 }while (uSource
<uSourceLimit
);
2813 cSourceLimit
=cTarget
;
2815 uTargetLimit
=uTargetLimit
+i
;
2816 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,FALSE
,&errorCode
);
2817 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2818 errorCode
=U_ZERO_ERROR
;
2821 if(U_FAILURE(errorCode
)){
2822 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2825 }while(cSource
<cSourceLimit
);
2829 for(len
=0;len
<(int)(source
- sourceLimit
);len
++){
2830 if(uBuf
[len
]!=uSource
[len
]){
2831 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource
[len
],(int)uBuf
[len
]) ;
2838 /* Test for Jitterbug 778 */
2839 static void TestToAndFromUChars(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2840 const UChar
* uSource
;
2841 const UChar
* uSourceLimit
;
2842 const char* cSource
;
2843 UChar
*uTargetLimit
=NULL
;
2846 const char *cTargetLimit
;
2849 int32_t uBufSize
= 120;
2850 int numCharsInTarget
=0;
2851 UErrorCode errorCode
=U_ZERO_ERROR
;
2852 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2853 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
2855 uSourceLimit
=sourceLimit
;
2857 cTargetLimit
= cBuf
+uBufSize
*5;
2859 uTargetLimit
= uBuf
+ uBufSize
*5;
2861 numCharsInTarget
=ucnv_fromUChars(cnv
, cTarget
, (int32_t)(cTargetLimit
-cTarget
), uSource
, (int32_t)(uSourceLimit
-uSource
), &errorCode
);
2862 if(U_FAILURE(errorCode
)){
2863 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2868 ucnv_toUChars(cnv
,uTarget
,(int32_t)(uTargetLimit
-uTarget
),cSource
,numCharsInTarget
,&errorCode
);
2869 if(U_FAILURE(errorCode
)){
2870 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode
));
2874 while(uSource
<uSourceLimit
){
2875 if(*test
!=*uSource
){
2877 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
2886 static void TestSmallSourceBuffer(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2887 const UChar
* uSource
;
2888 const UChar
* uSourceLimit
;
2889 const char* cSource
;
2890 const char* cSourceLimit
;
2891 UChar
*uTargetLimit
=NULL
;
2894 const char *cTargetLimit
;
2896 UChar
*uBuf
; /*,*test;*/
2897 int32_t uBufSize
= 120;
2900 const UChar
*temp
= sourceLimit
;
2901 UErrorCode errorCode
=U_ZERO_ERROR
;
2902 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2903 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
2907 uSource
= (UChar
*) source
;
2911 cTargetLimit
= cBuf
;
2912 uTargetLimit
= uBuf
+uBufSize
*5;
2913 cTargetLimit
= cTargetLimit
+uBufSize
*10;
2914 uSourceLimit
=uSource
;
2917 if (uSourceLimit
< sourceLimit
) {
2918 uSourceLimit
= uSourceLimit
+1;
2920 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,FALSE
, &errorCode
);
2921 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2922 errorCode
=U_ZERO_ERROR
;
2926 if(U_FAILURE(errorCode
)){
2927 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2931 }while (uSource
<temp
);
2935 if (cSourceLimit
< cBuf
+ (cTarget
- cBuf
)) {
2936 cSourceLimit
= cSourceLimit
+1;
2938 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,FALSE
,&errorCode
);
2939 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2940 errorCode
=U_ZERO_ERROR
;
2943 if(U_FAILURE(errorCode
)){
2944 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2947 }while(cSource
<cTarget
);
2951 for(;len
<(int)(source
- sourceLimit
);len
++){
2952 if(uBuf
[len
]!=uSource
[len
]){
2953 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource
[len
],(int)uBuf
[len
]) ;
2961 TestGetNextUChar2022(UConverter
* cnv
, const char* source
, const char* limit
,
2962 const uint16_t results
[], const char* message
){
2963 /* const char* s0; */
2964 const char* s
=(char*)source
;
2965 const uint16_t *r
=results
;
2966 UErrorCode errorCode
=U_ZERO_ERROR
;
2971 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
2972 if(errorCode
==U_INDEX_OUTOFBOUNDS_ERROR
) {
2973 break; /* no more significant input */
2974 } else if(U_FAILURE(errorCode
)) {
2975 log_err("%s ucnv_getNextUChar() failed: %s\n", message
, u_errorName(errorCode
));
2978 if(U16_IS_LEAD(*r
)){
2980 U16_NEXT(r
, i
, len
, exC
);
2985 if(c
!=(uint32_t)(exC
))
2986 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message
,(uint32_t) (*r
),c
);
2992 static int TestJitterbug930(const char* enc
){
2993 UErrorCode err
= U_ZERO_ERROR
;
2994 UConverter
*converter
;
2998 const UChar
*source
= in
;
3000 int32_t* offsets
= off
;
3001 int numOffWritten
=0;
3003 converter
= my_ucnv_open(enc
, &err
);
3005 in
[0] = 0x41; /* 0x4E00;*/
3010 memset(off
, '*', sizeof(off
));
3012 ucnv_fromUnicode (converter
,
3021 /* writes three bytes into the output buffer: 41 1B 24
3022 * but offsets contains 0 1 1
3024 while(*offsets
< off
[10]){
3028 log_verbose("Testing Jitterbug 930 for encoding %s",enc
);
3029 if(numOffWritten
!= (int)(target
-out
)){
3030 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc
, (int)(target
-out
),numOffWritten
);
3035 memset(off
,'*' , sizeof(off
));
3039 ucnv_fromUnicode (converter
,
3048 while(*offsets
< off
[10]){
3051 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc
,-1,*offsets
) ;
3056 /* writes 42 43 7A into output buffer,
3057 * offsets contains -1 -1 -1
3059 ucnv_close(converter
);
3066 static const uint16_t in
[]={
3067 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3068 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3069 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3070 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3071 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3072 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3073 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3074 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3075 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3076 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3077 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3078 0x005A, 0x005B, 0x005C, 0x000A
3080 const UChar
* uSource
;
3081 const UChar
* uSourceLimit
;
3082 const char* cSource
;
3083 const char* cSourceLimit
;
3084 UChar
*uTargetLimit
=NULL
;
3087 const char *cTargetLimit
;
3090 int32_t uBufSize
= 120;
3091 UErrorCode errorCode
=U_ZERO_ERROR
;
3093 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3094 int32_t* myOff
= offsets
;
3095 cnv
=ucnv_open("HZ", &errorCode
);
3096 if(U_FAILURE(errorCode
)) {
3097 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode
));
3101 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3102 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3103 uSource
= (const UChar
*)in
;
3104 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
3106 cTargetLimit
= cBuf
+uBufSize
*5;
3108 uTargetLimit
= uBuf
+ uBufSize
*5;
3109 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3110 if(U_FAILURE(errorCode
)){
3111 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3115 cSourceLimit
=cTarget
;
3118 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3119 if(U_FAILURE(errorCode
)){
3120 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3123 uSource
= (const UChar
*)in
;
3124 while(uSource
<uSourceLimit
){
3125 if(*test
!=*uSource
){
3127 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3132 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "HZ encoding");
3133 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3134 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3135 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3136 TestJitterbug930("csISO2022JP");
3146 static const uint16_t in
[]={
3147 /* test full range of Devanagari */
3148 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3149 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3150 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3151 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3152 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3153 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3154 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3155 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3156 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3157 0x096D,0x096E,0x096F,
3158 /* test Soft halant*/
3159 0x0915,0x094d, 0x200D,
3160 /* test explicit halant */
3161 0x0915,0x094d, 0x200c,
3162 /* test double danda */
3165 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3166 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3167 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3168 /* tests from Lotus */
3169 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3170 0x0930,0x094D,0x200D,
3171 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3172 0x0915,0x0921,0x002B,0x095F,
3174 0x0B86, 0xB87, 0xB88,
3176 0x0C05, 0x0C02, 0x0C03,0x0c31,
3178 0x0C85, 0xC82, 0x0C83,
3179 /* test Abbr sign and Anudatta */
3189 0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3190 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3193 0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3194 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3195 0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3196 0x093D /* Avagraha 0xEA, 0xE9*/,
3204 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3206 static const unsigned char byteArr
[]={
3208 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3209 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3210 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3211 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3212 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3213 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3214 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3215 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3216 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3218 /* test soft halant */
3220 /* test explicit halant */
3222 /* test double danda */
3225 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3226 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3227 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3230 /* tests from Lotus */
3231 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3232 0xEF,0x42,0xCF,0xE8,0xD9,
3233 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3234 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3236 0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3238 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3240 0xEF, 0x48,0xa4, 0xa2, 0xa3,
3241 /* anudatta and abbreviation sign */
3242 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3245 0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3247 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3249 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3251 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3253 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3255 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3257 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3259 0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3261 0xB3, 0xE9, /* Ka + NUKTA */
3263 0xB4, 0xE9, /* Kha + NUKTA */
3265 0xB5, 0xE9, /* Ga + NUKTA */
3277 /* just consume unhandled codepoints */
3281 testConvertToU(byteArr
,(sizeof(byteArr
)),in
,UPRV_LENGTHOF(in
),"x-iscii-de",NULL
,TRUE
);
3282 TestConv(in
,(sizeof(in
)/2),"ISCII,version=0","hindi", (char *)byteArr
,sizeof(byteArr
));
3289 static const uint16_t in
[]={
3290 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3291 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3292 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3293 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3294 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3295 0x201D, 0x3014, 0x000D, 0x000A,
3296 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3297 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3299 const UChar
* uSource
;
3300 const UChar
* uSourceLimit
;
3301 const char* cSource
;
3302 const char* cSourceLimit
;
3303 UChar
*uTargetLimit
=NULL
;
3306 const char *cTargetLimit
;
3309 int32_t uBufSize
= 120;
3310 UErrorCode errorCode
=U_ZERO_ERROR
;
3312 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3313 int32_t* myOff
= offsets
;
3314 cnv
=ucnv_open("ISO_2022_JP_1", &errorCode
);
3315 if(U_FAILURE(errorCode
)) {
3316 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode
));
3320 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3321 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3322 uSource
= (const UChar
*)in
;
3323 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
3325 cTargetLimit
= cBuf
+uBufSize
*5;
3327 uTargetLimit
= uBuf
+ uBufSize
*5;
3328 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3329 if(U_FAILURE(errorCode
)){
3330 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3334 cSourceLimit
=cTarget
;
3337 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3338 if(U_FAILURE(errorCode
)){
3339 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3343 uSource
= (const UChar
*)in
;
3344 while(uSource
<uSourceLimit
){
3345 if(*test
!=*uSource
){
3347 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3353 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3354 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3355 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-JP encoding");
3356 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3357 TestJitterbug930("csISO2022JP");
3364 static void TestConv(const uint16_t in
[],int len
, const char* conv
, const char* lang
, char byteArr
[],int byteArrLen
){
3365 const UChar
* uSource
;
3366 const UChar
* uSourceLimit
;
3367 const char* cSource
;
3368 const char* cSourceLimit
;
3369 UChar
*uTargetLimit
=NULL
;
3372 const char *cTargetLimit
;
3375 int32_t uBufSize
= 120*10;
3376 UErrorCode errorCode
=U_ZERO_ERROR
;
3378 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) );
3379 int32_t* myOff
= offsets
;
3380 cnv
=my_ucnv_open(conv
, &errorCode
);
3381 if(U_FAILURE(errorCode
)) {
3382 log_data_err("Unable to open a %s converter: %s\n", conv
, u_errorName(errorCode
));
3386 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
));
3387 cBuf
=(char*)malloc(uBufSize
* sizeof(char));
3388 uSource
= (const UChar
*)in
;
3389 uSourceLimit
=uSource
+len
;
3391 cTargetLimit
= cBuf
+uBufSize
;
3393 uTargetLimit
= uBuf
+ uBufSize
;
3394 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3395 if(U_FAILURE(errorCode
)){
3396 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3399 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3401 cSourceLimit
=cTarget
;
3404 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3405 if(U_FAILURE(errorCode
)){
3406 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode
));
3410 uSource
= (const UChar
*)in
;
3411 while(uSource
<uSourceLimit
){
3412 if(*test
!=*uSource
){
3413 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv
,*uSource
,(int)*test
) ;
3418 TestSmallTargetBuffer(in
,(const UChar
*)&in
[len
],cnv
);
3419 TestSmallSourceBuffer(in
,(const UChar
*)&in
[len
],cnv
);
3420 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, conv
);
3421 if(byteArr
&& byteArrLen
!=0){
3422 TestGetNextUChar2022(cnv
, byteArr
, (byteArr
+byteArrLen
), in
, lang
);
3423 TestToAndFromUChars(in
,(const UChar
*)&in
[len
],cnv
);
3426 cSourceLimit
= cSource
+byteArrLen
;
3429 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3430 if(U_FAILURE(errorCode
)){
3431 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3435 uSource
= (const UChar
*)in
;
3436 while(uSource
<uSourceLimit
){
3437 if(*test
!=*uSource
){
3438 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3451 static UChar U_CALLCONV
3452 _charAt(int32_t offset
, void *context
) {
3453 return ((char*)context
)[offset
];
3457 unescape(UChar
* dst
, int32_t dstLen
,const char* src
,int32_t srcLen
,UErrorCode
*status
){
3460 if(U_FAILURE(*status
)){
3463 if((dst
==NULL
&& dstLen
>0) || (src
==NULL
) || dstLen
< -1 || srcLen
<-1 ){
3464 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
3468 srcLen
= (int32_t)uprv_strlen(src
);
3471 for (; srcIndex
<srcLen
; ) {
3472 UChar32 c
= src
[srcIndex
++];
3473 if (c
== 0x005C /*'\\'*/) {
3474 c
= u_unescapeAt(_charAt
,&srcIndex
,srcLen
,(void*)src
); /* advances i*/
3475 if (c
== (UChar32
)0xFFFFFFFF) {
3476 *status
=U_INVALID_CHAR_FOUND
; /* return empty string */
3477 break; /* invalid escape sequence */
3480 if(dstIndex
< dstLen
){
3482 dst
[dstIndex
++] = U16_LEAD(c
);
3483 if(dstIndex
<dstLen
){
3484 dst
[dstIndex
]=U16_TRAIL(c
);
3486 *status
=U_BUFFER_OVERFLOW_ERROR
;
3489 dst
[dstIndex
]=(UChar
)c
;
3493 *status
= U_BUFFER_OVERFLOW_ERROR
;
3495 dstIndex
++; /* for preflighting */
3501 TestFullRoundtrip(const char* cp
){
3502 UChar usource
[10] ={0};
3503 UChar nsrc
[10] = {0};
3507 /* Test codepoint 0 */
3508 TestConv(usource
,1,cp
,"",NULL
,0);
3509 TestConv(usource
,2,cp
,"",NULL
,0);
3511 TestConv(nsrc
,3,cp
,"",NULL
,0);
3513 for(;i
<=0x10FFFF;i
++){
3519 usource
[0] =(UChar
) i
;
3522 usource
[0]=U16_LEAD(i
);
3523 usource
[1]=U16_TRAIL(i
);
3530 /* Test only single code points */
3531 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3532 /* Test codepoint repeated twice */
3533 usource
[ulen
]=usource
[0];
3534 usource
[ulen
+1]=usource
[1];
3536 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3537 /* Test codepoint repeated 3 times */
3538 usource
[ulen
]=usource
[0];
3539 usource
[ulen
+1]=usource
[1];
3541 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3542 /* Test codepoint in between 2 codepoints */
3546 TestConv(nsrc
,len
+2,cp
,"",NULL
,0);
3547 uprv_memset(usource
,0,sizeof(UChar
)*10);
3552 TestRoundTrippingAllUTF(void){
3553 if(!getTestOption(QUICK_OPTION
)){
3554 log_verbose("Running exhaustive round trip test for BOCU-1\n");
3555 TestFullRoundtrip("BOCU-1");
3556 log_verbose("Running exhaustive round trip test for SCSU\n");
3557 TestFullRoundtrip("SCSU");
3558 log_verbose("Running exhaustive round trip test for UTF-8\n");
3559 TestFullRoundtrip("UTF-8");
3560 log_verbose("Running exhaustive round trip test for CESU-8\n");
3561 TestFullRoundtrip("CESU-8");
3562 log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3563 TestFullRoundtrip("UTF-16BE");
3564 log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3565 TestFullRoundtrip("UTF-16LE");
3566 log_verbose("Running exhaustive round trip test for UTF-16\n");
3567 TestFullRoundtrip("UTF-16");
3568 log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3569 TestFullRoundtrip("UTF-32BE");
3570 log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3571 TestFullRoundtrip("UTF-32LE");
3572 log_verbose("Running exhaustive round trip test for UTF-32\n");
3573 TestFullRoundtrip("UTF-32");
3574 log_verbose("Running exhaustive round trip test for UTF-7\n");
3575 TestFullRoundtrip("UTF-7");
3576 log_verbose("Running exhaustive round trip test for UTF-7\n");
3577 TestFullRoundtrip("UTF-7,version=1");
3578 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3579 TestFullRoundtrip("IMAP-mailbox-name");
3582 * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
3583 * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
3584 * The old mappings remain as fallbacks.
3585 * This test may be reintroduced at a later time.
3590 log_verbose("Running exhaustive round trip test for GB18030\n");
3591 TestFullRoundtrip("GB18030");
3599 static const uint16_t germanUTF16
[]={
3600 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3603 static const uint8_t germanSCSU
[]={
3604 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3607 static const uint16_t russianUTF16
[]={
3608 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3611 static const uint8_t russianSCSU
[]={
3612 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3615 static const uint16_t japaneseUTF16
[]={
3616 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3617 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3618 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3619 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3620 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3621 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3622 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3623 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3624 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3625 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3626 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3627 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3628 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3629 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3630 0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3633 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3634 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3635 static const uint8_t japaneseSCSU
[]={
3636 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3637 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3638 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3639 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3640 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3641 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3642 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3643 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3644 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3645 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3646 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3650 static const uint16_t allFeaturesUTF16
[]={
3651 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3652 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3653 0x01df, 0xf000, 0xdbff, 0xdfff
3656 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3657 * result here (34B vs. 35B)
3659 static const uint8_t allFeaturesSCSU
[]={
3660 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3661 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3662 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3663 0xdf, 0x14, 0x80, 0x15, 0xff
3665 static const uint16_t monkeyIn
[]={
3666 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3667 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3668 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3669 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3670 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3671 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3672 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3673 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3674 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3675 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3676 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3677 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3678 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3679 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3680 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3681 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3682 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3683 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3684 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3685 /* test non-BMP code points */
3686 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3687 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3688 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3689 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3690 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3691 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3692 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3693 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3694 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3695 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3696 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3699 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3700 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3701 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3702 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3703 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3705 static const char *fTestCases
[] = {
3706 "\\ud800\\udc00", /* smallest surrogate*/
3708 "\\udBff\\udFff", /* largest surrogate pair*/
3711 "Hello \\u9292 \\u9192 World!",
3712 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3713 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3715 "\\u0648\\u06c8", /* catch missing reset*/
3718 "\\u4444\\uE001", /* lowest quotable*/
3719 "\\u4444\\uf2FF", /* highest quotable*/
3720 "\\u4444\\uf188\\u4444",
3721 "\\u4444\\uf188\\uf288",
3722 "\\u4444\\uf188abc\\u0429\\uf288",
3724 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3725 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3726 "Hello World!123456",
3727 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3729 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/
3730 "abc\\u4411d", /* uses SQU*/
3731 "abc\\u4411\\u4412d",/* uses SCU*/
3732 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3733 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3735 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3736 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3737 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3739 "", /* empty input*/
3740 "\\u0000", /* smallest BMP character*/
3741 "\\uFFFF", /* largest BMP character*/
3743 /* regression tests*/
3744 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3745 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3746 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3747 "\\u0041\\u00df\\u0401\\u015f",
3748 "\\u9066\\u2123abc",
3749 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3750 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3753 for(;i
<UPRV_LENGTHOF(fTestCases
);i
++){
3754 const char* cSrc
= fTestCases
[i
];
3755 UErrorCode status
= U_ZERO_ERROR
;
3756 int32_t cSrcLen
,srcLen
;
3758 /* UConverter* cnv = ucnv_open("SCSU",&status); */
3759 cSrcLen
= srcLen
= (int32_t)uprv_strlen(fTestCases
[i
]);
3760 src
= (UChar
*) malloc((sizeof(UChar
) * srcLen
) + sizeof(UChar
));
3761 srcLen
=unescape(src
,srcLen
,cSrc
,cSrcLen
,&status
);
3762 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc
,i
);
3763 TestConv(src
,srcLen
,"SCSU","Coverage",NULL
,0);
3766 TestConv(allFeaturesUTF16
,(sizeof(allFeaturesUTF16
)/2),"SCSU","all features", (char *)allFeaturesSCSU
,sizeof(allFeaturesSCSU
));
3767 TestConv(allFeaturesUTF16
,(sizeof(allFeaturesUTF16
)/2),"SCSU","all features",(char *)allFeaturesSCSU
,sizeof(allFeaturesSCSU
));
3768 TestConv(japaneseUTF16
,(sizeof(japaneseUTF16
)/2),"SCSU","japaneese",(char *)japaneseSCSU
,sizeof(japaneseSCSU
));
3769 TestConv(japaneseUTF16
,(sizeof(japaneseUTF16
)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU
,sizeof(japaneseSCSU
));
3770 TestConv(germanUTF16
,(sizeof(germanUTF16
)/2),"SCSU","german",(char *)germanSCSU
,sizeof(germanSCSU
));
3771 TestConv(russianUTF16
,(sizeof(russianUTF16
)/2), "SCSU","russian",(char *)russianSCSU
,sizeof(russianSCSU
));
3772 TestConv(monkeyIn
,(sizeof(monkeyIn
)/2),"SCSU","monkey",NULL
,0);
3775 #if !UCONFIG_NO_LEGACY_CONVERSION
3776 static void TestJitterbug2346(){
3777 char source
[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3778 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3779 uint16_t expected
[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3781 UChar uTarget
[500]={'\0'};
3782 UChar
* utarget
=uTarget
;
3783 UChar
* utargetLimit
=uTarget
+sizeof(uTarget
)/2;
3785 char cTarget
[500]={'\0'};
3786 char* ctarget
=cTarget
;
3787 char* ctargetLimit
=cTarget
+sizeof(cTarget
);
3788 const char* csource
=source
;
3789 UChar
* temp
= expected
;
3790 UErrorCode err
=U_ZERO_ERROR
;
3792 UConverter
* conv
=ucnv_open("ISO_2022_JP",&err
);
3793 if(U_FAILURE(err
)) {
3794 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err
));
3797 ucnv_toUnicode(conv
,&utarget
,utargetLimit
,&csource
,csource
+sizeof(source
),NULL
,TRUE
,&err
);
3798 if(U_FAILURE(err
)) {
3799 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err
));
3802 utargetLimit
=utarget
;
3804 while(utarget
<utargetLimit
){
3805 if(*temp
!=*utarget
){
3807 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget
,(int)*temp
) ;
3812 ucnv_fromUnicode(conv
,&ctarget
,ctargetLimit
,(const UChar
**)&utarget
,utargetLimit
,NULL
,TRUE
,&err
);
3813 if(U_FAILURE(err
)) {
3814 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err
));
3817 ctargetLimit
=ctarget
;
3825 TestISO_2022_JP_1() {
3827 static const uint16_t in
[]={
3828 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3829 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3830 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3831 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3832 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3833 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3834 0x201D, 0x000D, 0x000A,
3835 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3836 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3837 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3838 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3839 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3840 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3842 const UChar
* uSource
;
3843 const UChar
* uSourceLimit
;
3844 const char* cSource
;
3845 const char* cSourceLimit
;
3846 UChar
*uTargetLimit
=NULL
;
3849 const char *cTargetLimit
;
3852 int32_t uBufSize
= 120;
3853 UErrorCode errorCode
=U_ZERO_ERROR
;
3856 cnv
=ucnv_open("ISO_2022_JP_1", &errorCode
);
3857 if(U_FAILURE(errorCode
)) {
3858 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
3862 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3863 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3864 uSource
= (const UChar
*)in
;
3865 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
3867 cTargetLimit
= cBuf
+uBufSize
*5;
3869 uTargetLimit
= uBuf
+ uBufSize
*5;
3870 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,TRUE
, &errorCode
);
3871 if(U_FAILURE(errorCode
)){
3872 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3876 cSourceLimit
=cTarget
;
3878 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,TRUE
,&errorCode
);
3879 if(U_FAILURE(errorCode
)){
3880 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3883 uSource
= (const UChar
*)in
;
3884 while(uSource
<uSourceLimit
){
3885 if(*test
!=*uSource
){
3887 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3893 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3894 /*Test for the condition where there is an invalid character*/
3897 static const uint8_t source2
[]={0x0e,0x24,0x053};
3898 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-JP-1]");
3900 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3901 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3908 TestISO_2022_JP_2() {
3910 static const uint16_t in
[]={
3911 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3912 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3913 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3914 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3915 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3916 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3917 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3918 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3919 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3920 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3921 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3922 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3923 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3924 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3925 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3926 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3927 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3928 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3929 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3931 const UChar
* uSource
;
3932 const UChar
* uSourceLimit
;
3933 const char* cSource
;
3934 const char* cSourceLimit
;
3935 UChar
*uTargetLimit
=NULL
;
3938 const char *cTargetLimit
;
3941 int32_t uBufSize
= 120;
3942 UErrorCode errorCode
=U_ZERO_ERROR
;
3944 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3945 int32_t* myOff
= offsets
;
3946 cnv
=ucnv_open("ISO_2022_JP_2", &errorCode
);
3947 if(U_FAILURE(errorCode
)) {
3948 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
3952 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3953 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3954 uSource
= (const UChar
*)in
;
3955 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
3957 cTargetLimit
= cBuf
+uBufSize
*5;
3959 uTargetLimit
= uBuf
+ uBufSize
*5;
3960 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3961 if(U_FAILURE(errorCode
)){
3962 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3966 cSourceLimit
=cTarget
;
3969 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3970 if(U_FAILURE(errorCode
)){
3971 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3974 uSource
= (const UChar
*)in
;
3975 while(uSource
<uSourceLimit
){
3976 if(*test
!=*uSource
){
3978 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3983 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3984 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3985 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3986 /*Test for the condition where there is an invalid character*/
3989 static const uint8_t source2
[]={0x0e,0x24,0x053};
3990 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-JP-2]");
4001 static const uint16_t in
[]={
4002 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4003 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4004 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4005 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4006 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4007 ,0x53E3,0x53E4,0x000A,0x000D};
4008 const UChar
* uSource
;
4009 const UChar
* uSourceLimit
;
4010 const char* cSource
;
4011 const char* cSourceLimit
;
4012 UChar
*uTargetLimit
=NULL
;
4015 const char *cTargetLimit
;
4018 int32_t uBufSize
= 120;
4019 UErrorCode errorCode
=U_ZERO_ERROR
;
4021 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4022 int32_t* myOff
= offsets
;
4023 cnv
=ucnv_open("ISO_2022,locale=kr", &errorCode
);
4024 if(U_FAILURE(errorCode
)) {
4025 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4029 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4030 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
4031 uSource
= (const UChar
*)in
;
4032 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
4034 cTargetLimit
= cBuf
+uBufSize
*5;
4036 uTargetLimit
= uBuf
+ uBufSize
*5;
4037 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4038 if(U_FAILURE(errorCode
)){
4039 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4043 cSourceLimit
=cTarget
;
4046 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4047 if(U_FAILURE(errorCode
)){
4048 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4051 uSource
= (const UChar
*)in
;
4052 while(uSource
<uSourceLimit
){
4053 if(*test
!=*uSource
){
4054 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,*test
) ;
4059 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-KR encoding");
4060 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4061 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4062 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4063 TestJitterbug930("csISO2022KR");
4064 /*Test for the condition where there is an invalid character*/
4067 static const uint8_t source2
[]={0x1b,0x24,0x053};
4068 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
4069 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ILLEGAL_ESCAPE_SEQUENCE
, "an invalid character [ISO-2022-KR]");
4078 TestISO_2022_KR_1() {
4080 static const uint16_t in
[]={
4081 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4082 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4083 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4084 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4085 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4086 ,0x53E3,0x53E4,0x000A,0x000D};
4087 const UChar
* uSource
;
4088 const UChar
* uSourceLimit
;
4089 const char* cSource
;
4090 const char* cSourceLimit
;
4091 UChar
*uTargetLimit
=NULL
;
4094 const char *cTargetLimit
;
4097 int32_t uBufSize
= 120;
4098 UErrorCode errorCode
=U_ZERO_ERROR
;
4100 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4101 int32_t* myOff
= offsets
;
4102 cnv
=ucnv_open("ibm-25546", &errorCode
);
4103 if(U_FAILURE(errorCode
)) {
4104 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4108 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4109 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
4110 uSource
= (const UChar
*)in
;
4111 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
4113 cTargetLimit
= cBuf
+uBufSize
*5;
4115 uTargetLimit
= uBuf
+ uBufSize
*5;
4116 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4117 if(U_FAILURE(errorCode
)){
4118 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4122 cSourceLimit
=cTarget
;
4125 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4126 if(U_FAILURE(errorCode
)){
4127 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4130 uSource
= (const UChar
*)in
;
4131 while(uSource
<uSourceLimit
){
4132 if(*test
!=*uSource
){
4133 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,*test
) ;
4139 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-KR encoding");
4140 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4141 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4143 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4144 /*Test for the condition where there is an invalid character*/
4147 static const uint8_t source2
[]={0x1b,0x24,0x053};
4148 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
4149 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ILLEGAL_ESCAPE_SEQUENCE
, "an invalid character [ISO-2022-KR]");
4157 static void TestJitterbug2411(){
4158 static const char* source
= "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4159 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4160 UConverter
* kr
=NULL
, *kr1
=NULL
;
4161 UErrorCode errorCode
= U_ZERO_ERROR
;
4162 UChar tgt
[100]={'\0'};
4163 UChar
* target
= tgt
;
4164 UChar
* targetLimit
= target
+100;
4165 kr
=ucnv_open("iso-2022-kr", &errorCode
);
4166 if(U_FAILURE(errorCode
)) {
4167 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode
));
4170 ucnv_toUnicode(kr
,&target
,targetLimit
,&source
,source
+uprv_strlen(source
),NULL
,TRUE
,&errorCode
);
4171 if(U_FAILURE(errorCode
)) {
4172 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode
));
4175 kr1
= ucnv_open("ibm-25546", &errorCode
);
4176 if(U_FAILURE(errorCode
)) {
4177 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode
));
4181 targetLimit
= target
+100;
4182 ucnv_toUnicode(kr
,&target
,targetLimit
,&source
,source
+uprv_strlen(source
),NULL
,TRUE
,&errorCode
);
4184 if(U_FAILURE(errorCode
)) {
4185 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode
));
4196 /* From Unicode moved to testdata/conversion.txt */
4199 static const uint8_t sampleTextJIS
[] = {
4200 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4201 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4202 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4204 static const uint16_t expectedISO2022JIS
[] = {
4209 static const int32_t toISO2022JISOffs
[]={
4215 static const uint8_t sampleTextJIS7
[] = {
4216 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4217 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4218 0x1b,0x24,0x42,0x21,0x21,
4219 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */
4221 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4223 static const uint16_t expectedISO2022JIS7
[] = {
4231 static const int32_t toISO2022JIS7Offs
[]={
4238 static const uint8_t sampleTextJIS8
[] = {
4239 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4240 0xa1,0xc8,0xd9,/*Katakana Set*/
4243 0xb1,0xc3, /*Katakana Set*/
4244 0x1b,0x24,0x42,0x21,0x21
4246 static const uint16_t expectedISO2022JIS8
[] = {
4248 0xff61, 0xff88, 0xff99,
4253 static const int32_t toISO2022JIS8Offs
[]={
4259 testConvertToU(sampleTextJIS
,sizeof(sampleTextJIS
),expectedISO2022JIS
,
4260 UPRV_LENGTHOF(expectedISO2022JIS
),"JIS", toISO2022JISOffs
,TRUE
);
4261 testConvertToU(sampleTextJIS7
,sizeof(sampleTextJIS7
),expectedISO2022JIS7
,
4262 UPRV_LENGTHOF(expectedISO2022JIS7
),"JIS7", toISO2022JIS7Offs
,TRUE
);
4263 testConvertToU(sampleTextJIS8
,sizeof(sampleTextJIS8
),expectedISO2022JIS8
,
4264 UPRV_LENGTHOF(expectedISO2022JIS8
),"JIS8", toISO2022JIS8Offs
,TRUE
);
4271 ICU
4.4 (ticket
#7314) removes mappings for CNS 11643 planes 3..7
4273 static void TestJitterbug915(){
4274 /* tests for roundtripping of the below sequence
4275 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * /
4276 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4277 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4278 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4279 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4280 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4281 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4283 static const char cSource
[]={
4284 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4285 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4286 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4287 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4288 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4289 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4290 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4291 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4292 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4293 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4294 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4295 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4296 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4297 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4298 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4299 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4300 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4301 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4302 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4303 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4304 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4305 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4306 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4307 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4308 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4309 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4310 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4311 0x37, 0x20, 0x2A, 0x2F
4313 UChar uTarget
[500]={'\0'};
4314 UChar
* utarget
=uTarget
;
4315 UChar
* utargetLimit
=uTarget
+sizeof(uTarget
)/2;
4317 char cTarget
[500]={'\0'};
4318 char* ctarget
=cTarget
;
4319 char* ctargetLimit
=cTarget
+sizeof(cTarget
);
4320 const char* csource
=cSource
;
4321 const char* tempSrc
= cSource
;
4322 UErrorCode err
=U_ZERO_ERROR
;
4324 UConverter
* conv
=ucnv_open("ISO_2022_CN_EXT",&err
);
4325 if(U_FAILURE(err
)) {
4326 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err
));
4329 ucnv_toUnicode(conv
,&utarget
,utargetLimit
,&csource
,csource
+sizeof(cSource
),NULL
,TRUE
,&err
);
4330 if(U_FAILURE(err
)) {
4331 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err
));
4334 utargetLimit
=utarget
;
4336 ucnv_fromUnicode(conv
,&ctarget
,ctargetLimit
,(const UChar
**)&utarget
,utargetLimit
,NULL
,TRUE
,&err
);
4337 if(U_FAILURE(err
)) {
4338 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err
));
4341 ctargetLimit
=ctarget
;
4343 while(ctarget
<ctargetLimit
){
4344 if(*ctarget
!= *tempSrc
){
4345 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget
-cTarget
), *ctarget
,(int)*tempSrc
) ;
4355 TestISO_2022_CN_EXT() {
4357 static const uint16_t in
[]={
4358 /* test Non-BMP code points */
4359 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4360 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4361 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4362 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4363 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4364 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4365 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4366 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4367 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4370 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4371 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4372 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4373 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4374 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4375 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4376 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4377 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4378 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4379 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4380 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4381 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4382 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4383 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4384 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4385 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4386 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4387 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4389 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4393 const UChar
* uSource
;
4394 const UChar
* uSourceLimit
;
4395 const char* cSource
;
4396 const char* cSourceLimit
;
4397 UChar
*uTargetLimit
=NULL
;
4400 const char *cTargetLimit
;
4403 int32_t uBufSize
= 180;
4404 UErrorCode errorCode
=U_ZERO_ERROR
;
4406 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4407 int32_t* myOff
= offsets
;
4408 cnv
=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode
);
4409 if(U_FAILURE(errorCode
)) {
4410 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4414 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4415 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
4416 uSource
= (const UChar
*)in
;
4417 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
4419 cTargetLimit
= cBuf
+uBufSize
*5;
4421 uTargetLimit
= uBuf
+ uBufSize
*5;
4422 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4423 if(U_FAILURE(errorCode
)){
4424 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4428 cSourceLimit
=cTarget
;
4431 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4432 if(U_FAILURE(errorCode
)){
4433 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4436 uSource
= (const UChar
*)in
;
4437 while(uSource
<uSourceLimit
){
4438 if(*test
!=*uSource
){
4439 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
4442 log_verbose(" Got: \\u%04X\n",(int)*test
) ;
4447 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4448 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4449 /*Test for the condition where there is an invalid character*/
4452 static const uint8_t source2
[]={0x0e,0x24,0x053};
4453 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-CN-EXT]");
4465 static const uint16_t in
[]={
4467 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4468 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4469 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4470 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4471 0x0020, 0x0045, 0x004e, 0x0044,
4473 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4474 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4475 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4476 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4477 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4478 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4479 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4480 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4481 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4482 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4483 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4484 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4485 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4486 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4487 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4488 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4489 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4492 const UChar
* uSource
;
4493 const UChar
* uSourceLimit
;
4494 const char* cSource
;
4495 const char* cSourceLimit
;
4496 UChar
*uTargetLimit
=NULL
;
4499 const char *cTargetLimit
;
4502 int32_t uBufSize
= 180;
4503 UErrorCode errorCode
=U_ZERO_ERROR
;
4505 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4506 int32_t* myOff
= offsets
;
4507 cnv
=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode
);
4508 if(U_FAILURE(errorCode
)) {
4509 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4513 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4514 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
4515 uSource
= (const UChar
*)in
;
4516 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
4518 cTargetLimit
= cBuf
+uBufSize
*5;
4520 uTargetLimit
= uBuf
+ uBufSize
*5;
4521 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4522 if(U_FAILURE(errorCode
)){
4523 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4527 cSourceLimit
=cTarget
;
4530 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4531 if(U_FAILURE(errorCode
)){
4532 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4535 uSource
= (const UChar
*)in
;
4536 while(uSource
<uSourceLimit
){
4537 if(*test
!=*uSource
){
4538 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
4541 log_verbose(" Got: \\u%04X\n",(int)*test
) ;
4546 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-CN encoding");
4547 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4548 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4549 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4550 TestJitterbug930("csISO2022CN");
4551 /*Test for the condition where there is an invalid character*/
4554 static const uint8_t source2
[]={0x0e,0x24,0x053};
4555 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-CN]");
4564 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4566 const char * converterName
;
4567 const char * inputText
;
4568 int inputTextLength
;
4571 /* Callback for TestJitterbug6175, should only get called for empty segment errors */
4572 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context
, UConverterToUnicodeArgs
*toArgs
, const char* codeUnits
,
4573 int32_t length
, UConverterCallbackReason reason
, UErrorCode
* err
) {
4574 if (reason
> UCNV_IRREGULAR
) {
4577 if (reason
!= UCNV_IRREGULAR
) {
4578 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4580 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4581 *err
= U_ZERO_ERROR
;
4582 ucnv_cbToUWriteSub(toArgs
,0,err
);
4585 enum { kEmptySegmentToUCharsMax
= 64 };
4586 static void TestJitterbug6175(void) {
4587 static const char iso2022jp_a
[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4588 static const char iso2022kr_a
[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4589 static const char iso2022cn_a
[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4590 static const char iso2022cn_b
[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4591 static const char hzGB2312_a
[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4592 static const EmptySegmentTest emptySegmentTests
[] = {
4593 /* converterName inputText inputTextLength */
4594 { "ISO-2022-JP", iso2022jp_a
, sizeof(iso2022jp_a
) },
4595 { "ISO-2022-KR", iso2022kr_a
, sizeof(iso2022kr_a
) },
4596 { "ISO-2022-CN", iso2022cn_a
, sizeof(iso2022cn_a
) },
4597 { "ISO-2022-CN", iso2022cn_b
, sizeof(iso2022cn_b
) },
4598 { "HZ-GB-2312", hzGB2312_a
, sizeof(hzGB2312_a
) },
4602 const EmptySegmentTest
* testPtr
;
4603 for (testPtr
= emptySegmentTests
; testPtr
->converterName
!= NULL
; ++testPtr
) {
4604 UErrorCode err
= U_ZERO_ERROR
;
4605 UConverter
* cnv
= ucnv_open(testPtr
->converterName
, &err
);
4606 if (U_FAILURE(err
)) {
4607 log_data_err("Unable to open %s converter: %s\n", testPtr
->converterName
, u_errorName(err
));
4610 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_EMPTYSEGMENT
, NULL
, NULL
, NULL
, &err
);
4611 if (U_FAILURE(err
)) {
4612 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr
->converterName
, u_errorName(err
));
4617 UChar toUChars
[kEmptySegmentToUCharsMax
];
4618 UChar
* toUCharsPtr
= toUChars
;
4619 const UChar
* toUCharsLimit
= toUCharsPtr
+ kEmptySegmentToUCharsMax
;
4620 const char * inCharsPtr
= testPtr
->inputText
;
4621 const char * inCharsLimit
= inCharsPtr
+ testPtr
->inputTextLength
;
4622 ucnv_toUnicode(cnv
, &toUCharsPtr
, toUCharsLimit
, &inCharsPtr
, inCharsLimit
, NULL
, TRUE
, &err
);
4629 TestEBCDIC_STATEFUL() {
4631 static const uint8_t in
[]={
4640 /* expected test results */
4641 static const int32_t results
[]={
4642 /* number of bytes read, code point */
4651 static const uint8_t in2
[]={
4657 /* expected test results */
4658 static const int32_t results2
[]={
4659 /* number of bytes read, code point */
4664 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
4665 UErrorCode errorCode
=U_ZERO_ERROR
;
4666 UConverter
*cnv
=ucnv_open("ibm-930", &errorCode
);
4667 if(U_FAILURE(errorCode
)) {
4668 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode
));
4671 TestNextUChar(cnv
, source
, limit
, results
, "EBCDIC_STATEFUL(ibm-930)");
4673 /* Test the condition when source >= sourceLimit */
4674 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
4676 /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4678 static const uint8_t source1
[]={0x0f};
4679 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_INDEX_OUTOFBOUNDS_ERROR
, "a character is truncated");
4681 /*Test for the condition where there is an invalid character*/
4684 static const uint8_t source2
[]={0x0e, 0x7F, 0xFF};
4685 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [EBCDIC STATEFUL]");
4688 source
=(const char*)in2
;
4689 limit
=(const char*)in2
+sizeof(in2
);
4690 TestNextUChar(cnv
,source
,limit
,results2
,"EBCDIC_STATEFUL(ibm-930),seq#2");
4698 static const uint8_t in
[]={
4701 0x81, 0x30, 0x81, 0x30,
4705 0x82, 0x35, 0x8f, 0x33,
4706 0x84, 0x31, 0xa4, 0x39,
4707 0x90, 0x30, 0x81, 0x30,
4708 0xe3, 0x32, 0x9a, 0x35
4711 * Feature removed markus 2000-oct-26
4712 * Only some codepages must match surrogate pairs into supplementary code points -
4713 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4714 * GB 18030 provides direct encodings for supplementary code points, therefore
4715 * it must not combine two single-encoded surrogates into one code point.
4717 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4721 /* expected test results */
4722 static const int32_t results
[]={
4723 /* number of bytes read, code point */
4735 /* Feature removed. See comment above. */
4740 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4741 UErrorCode errorCode
=U_ZERO_ERROR
;
4742 UConverter
*cnv
=ucnv_open("gb18030", &errorCode
);
4743 if(U_FAILURE(errorCode
)) {
4744 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode
));
4747 TestNextUChar(cnv
, (const char *)in
, (const char *)in
+sizeof(in
), results
, "gb18030");
4753 /* LMBCS-1 string */
4754 static const uint8_t pszLMBCS
[]={
4763 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4767 /* Unicode UChar32 equivalents */
4768 static const UChar32 pszUnicode32
[]={
4778 0x00023456, /* code point for surrogate pair */
4782 /* Unicode UChar equivalents */
4783 static const UChar pszUnicode
[]={
4793 0xD84D, /* low surrogate */
4794 0xDC56, /* high surrogate */
4798 /* expected test results */
4799 static const int offsets32
[]={
4800 /* number of bytes read, code point */
4814 /* expected test results */
4815 static const int offsets
[]={
4816 /* number of bytes read, code point */
4834 #define NAME_LMBCS_1 "LMBCS-1"
4835 #define NAME_LMBCS_2 "LMBCS-2"
4838 /* Some basic open/close/property tests on some LMBCS converters */
4841 char expected_subchars
[] = {0x3F}; /* ANSI Question Mark */
4842 char new_subchars
[] = {0x7F}; /* subst char used by SmartSuite..*/
4843 char get_subchars
[1];
4844 const char * get_name
;
4848 int8_t len
= sizeof(get_subchars
);
4850 UErrorCode errorCode
=U_ZERO_ERROR
;
4853 cnv1
=ucnv_open(NAME_LMBCS_1
, &errorCode
);
4854 if(U_FAILURE(errorCode
)) {
4855 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
4858 cnv2
=ucnv_open(NAME_LMBCS_2
, &errorCode
);
4859 if(U_FAILURE(errorCode
)) {
4860 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode
));
4865 get_name
= ucnv_getName (cnv1
, &errorCode
);
4866 if (strcmp(NAME_LMBCS_1
,get_name
)){
4867 log_err("Unexpected converter name: %s\n", get_name
);
4869 get_name
= ucnv_getName (cnv2
, &errorCode
);
4870 if (strcmp(NAME_LMBCS_2
,get_name
)){
4871 log_err("Unexpected converter name: %s\n", get_name
);
4874 /* substitution chars */
4875 ucnv_getSubstChars (cnv1
, get_subchars
, &len
, &errorCode
);
4876 if(U_FAILURE(errorCode
)) {
4877 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode
));
4880 log_err("Unexpected length of sub chars\n");
4882 if (get_subchars
[0] != expected_subchars
[0]){
4883 log_err("Unexpected value of sub chars\n");
4885 ucnv_setSubstChars (cnv2
,new_subchars
, len
, &errorCode
);
4886 if(U_FAILURE(errorCode
)) {
4887 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode
));
4889 ucnv_getSubstChars (cnv2
, get_subchars
, &len
, &errorCode
);
4890 if(U_FAILURE(errorCode
)) {
4891 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode
));
4894 log_err("Unexpected length of sub chars\n");
4896 if (get_subchars
[0] != new_subchars
[0]){
4897 log_err("Unexpected value of sub chars\n");
4904 /* LMBCS to Unicode - offsets */
4906 UErrorCode errorCode
=U_ZERO_ERROR
;
4908 const char * pSource
= (const char *)pszLMBCS
;
4909 const char * sourceLimit
= (const char *)pszLMBCS
+ sizeof(pszLMBCS
);
4911 UChar Out
[sizeof(pszUnicode
) + 1];
4913 UChar
* OutLimit
= Out
+ UPRV_LENGTHOF(pszUnicode
);
4915 int32_t off
[sizeof(offsets
)];
4917 /* last 'offset' in expected results is just the final size.
4918 (Makes other tests easier). Compensate here: */
4920 off
[UPRV_LENGTHOF(offsets
)-1] = sizeof(pszLMBCS
);
4924 cnv
=ucnv_open("lmbcs", &errorCode
); /* use generic name for LMBCS-1 */
4925 if(U_FAILURE(errorCode
)) {
4926 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode
));
4932 ucnv_toUnicode (cnv
,
4942 if (memcmp(off
,offsets
,sizeof(offsets
)))
4944 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4946 if (memcmp(Out
,pszUnicode
,sizeof(pszUnicode
)))
4948 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4953 /* LMBCS to Unicode - getNextUChar */
4954 const char * sourceStart
;
4955 const char *source
=(const char *)pszLMBCS
;
4956 const char *limit
=(const char *)pszLMBCS
+sizeof(pszLMBCS
);
4957 const UChar32
*results
= pszUnicode32
;
4958 const int *off
= offsets32
;
4960 UErrorCode errorCode
=U_ZERO_ERROR
;
4963 cnv
=ucnv_open("LMBCS-1", &errorCode
);
4964 if(U_FAILURE(errorCode
)) {
4965 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
4971 while(source
<limit
) {
4973 uniChar
=ucnv_getNextUChar(cnv
, &source
, source
+ (off
[1] - off
[0]), &errorCode
);
4974 if(U_FAILURE(errorCode
)) {
4975 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode
));
4977 } else if(source
-sourceStart
!= off
[1] - off
[0] || uniChar
!= *results
) {
4978 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4979 uniChar
, (source
-sourceStart
), *results
, *off
);
4988 { /* test locale & optimization group operations: Unicode to LMBCS */
4990 UErrorCode errorCode
=U_ZERO_ERROR
;
4991 UConverter
*cnv16he
= ucnv_open("LMBCS-16,locale=he", &errorCode
);
4992 UConverter
*cnv16jp
= ucnv_open("LMBCS-16,locale=ja_JP", &errorCode
);
4993 UConverter
*cnv01us
= ucnv_open("LMBCS-1,locale=us_EN", &errorCode
);
4994 UChar uniString
[] = {0x0192}; /* Latin Small letter f with hook */
4995 const UChar
* pUniOut
= uniString
;
4996 UChar
* pUniIn
= uniString
;
4997 uint8_t lmbcsString
[4];
4998 const char * pLMBCSOut
= (const char *)lmbcsString
;
4999 char * pLMBCSIn
= (char *)lmbcsString
;
5001 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
5002 ucnv_fromUnicode (cnv16he
,
5003 &pLMBCSIn
, (pLMBCSIn
+ UPRV_LENGTHOF(lmbcsString
)),
5004 &pUniOut
, pUniOut
+ UPRV_LENGTHOF(uniString
),
5005 NULL
, 1, &errorCode
);
5007 if (lmbcsString
[0] != 0x3 || lmbcsString
[1] != 0x83)
5009 log_err("LMBCS-16,locale=he gives unexpected translation\n");
5012 pLMBCSIn
= (char *)lmbcsString
;
5013 pUniOut
= uniString
;
5014 ucnv_fromUnicode (cnv01us
,
5015 &pLMBCSIn
, (const char *)(lmbcsString
+ UPRV_LENGTHOF(lmbcsString
)),
5016 &pUniOut
, pUniOut
+ UPRV_LENGTHOF(uniString
),
5017 NULL
, 1, &errorCode
);
5019 if (lmbcsString
[0] != 0x9F)
5021 log_err("LMBCS-1,locale=US gives unexpected translation\n");
5024 /* single byte char from mbcs char set */
5025 lmbcsString
[0] = 0xAE; /* 1/2 width katakana letter small Yo */
5026 pLMBCSOut
= (const char *)lmbcsString
;
5028 ucnv_toUnicode (cnv16jp
,
5029 &pUniIn
, pUniIn
+ 1,
5030 &pLMBCSOut
, (pLMBCSOut
+ 1),
5031 NULL
, 1, &errorCode
);
5032 if (U_FAILURE(errorCode
) || pLMBCSOut
!= (const char *)lmbcsString
+1 || pUniIn
!= uniString
+1 || uniString
[0] != 0xFF6E)
5034 log_err("Unexpected results from LMBCS-16 single byte char\n");
5036 /* convert to group 1: should be 3 bytes */
5037 pLMBCSIn
= (char *)lmbcsString
;
5038 pUniOut
= uniString
;
5039 ucnv_fromUnicode (cnv01us
,
5040 &pLMBCSIn
, (const char *)(pLMBCSIn
+ 3),
5041 &pUniOut
, pUniOut
+ 1,
5042 NULL
, 1, &errorCode
);
5043 if (U_FAILURE(errorCode
) || pLMBCSIn
!= (const char *)lmbcsString
+3 || pUniOut
!= uniString
+1
5044 || lmbcsString
[0] != 0x10 || lmbcsString
[1] != 0x10 || lmbcsString
[2] != 0xAE)
5046 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5048 pLMBCSOut
= (const char *)lmbcsString
;
5050 ucnv_toUnicode (cnv01us
,
5051 &pUniIn
, pUniIn
+ 1,
5052 &pLMBCSOut
, (const char *)(pLMBCSOut
+ 3),
5053 NULL
, 1, &errorCode
);
5054 if (U_FAILURE(errorCode
) || pLMBCSOut
!= (const char *)lmbcsString
+3 || pUniIn
!= uniString
+1 || uniString
[0] != 0xFF6E)
5056 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5058 pLMBCSIn
= (char *)lmbcsString
;
5059 pUniOut
= uniString
;
5060 ucnv_fromUnicode (cnv16jp
,
5061 &pLMBCSIn
, (const char *)(pLMBCSIn
+ 1),
5062 &pUniOut
, pUniOut
+ 1,
5063 NULL
, 1, &errorCode
);
5064 if (U_FAILURE(errorCode
) || pLMBCSIn
!= (const char *)lmbcsString
+1 || pUniOut
!= uniString
+1 || lmbcsString
[0] != 0xAE)
5066 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5068 ucnv_close(cnv16he
);
5069 ucnv_close(cnv16jp
);
5070 ucnv_close(cnv01us
);
5073 /* Small source buffer testing, LMBCS -> Unicode */
5075 UErrorCode errorCode
=U_ZERO_ERROR
;
5077 const char * pSource
= (const char *)pszLMBCS
;
5078 const char * sourceLimit
= (const char *)pszLMBCS
+ sizeof(pszLMBCS
);
5079 int codepointCount
= 0;
5081 UChar Out
[sizeof(pszUnicode
) + 1];
5083 UChar
* OutLimit
= Out
+ UPRV_LENGTHOF(pszUnicode
);
5086 cnv
= ucnv_open(NAME_LMBCS_1
, &errorCode
);
5087 if(U_FAILURE(errorCode
)) {
5088 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
5093 while ((pSource
< sourceLimit
) && U_SUCCESS (errorCode
))
5095 ucnv_toUnicode (cnv
,
5099 (pSource
+1), /* claim that this is a 1- byte buffer */
5101 FALSE
, /* FALSE means there might be more chars in the next buffer */
5104 if (U_SUCCESS (errorCode
))
5106 if ((pSource
- (const char *)pszLMBCS
) == offsets
[codepointCount
+1])
5108 /* we are on to the next code point: check value */
5110 if (Out
[0] != pszUnicode
[codepointCount
]){
5111 log_err("LMBCS->Uni result %lx should have been %lx \n",
5112 Out
[0], pszUnicode
[codepointCount
]);
5115 pOut
= Out
; /* reset for accumulating next code point */
5121 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode
));
5125 /* limits & surrogate error testing */
5126 char LIn
[sizeof(pszLMBCS
)];
5127 const char * pLIn
= LIn
;
5129 char LOut
[sizeof(pszLMBCS
)];
5130 char * pLOut
= LOut
;
5132 UChar UOut
[sizeof(pszUnicode
)];
5133 UChar
* pUOut
= UOut
;
5135 UChar UIn
[sizeof(pszUnicode
)];
5136 const UChar
* pUIn
= UIn
;
5138 int32_t off
[sizeof(offsets
)];
5141 errorCode
=U_ZERO_ERROR
;
5143 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5145 ucnv_fromUnicode(cnv
, &pLOut
, pLOut
+1, &pUIn
, pUIn
-1, off
, FALSE
, &errorCode
);
5146 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
5148 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode
));
5152 errorCode
=U_ZERO_ERROR
;
5153 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+1,(const char **)&pLIn
,(const char *)(pLIn
-1),off
,FALSE
, &errorCode
);
5154 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
5156 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode
));
5158 errorCode
=U_ZERO_ERROR
;
5160 uniChar
= ucnv_getNextUChar(cnv
, (const char **)&pLIn
, (const char *)(pLIn
-1), &errorCode
);
5161 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
5163 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode
));
5165 errorCode
=U_ZERO_ERROR
;
5167 /* 0 byte source request - no error, no pointer movement */
5168 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+1,(const char **)&pLIn
,(const char *)pLIn
,off
,FALSE
, &errorCode
);
5169 ucnv_fromUnicode(cnv
, &pLOut
,pLOut
+1,&pUIn
,pUIn
,off
,FALSE
, &errorCode
);
5170 if(U_FAILURE(errorCode
)) {
5171 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode
));
5173 if ((pUOut
!= UOut
) || (pUIn
!= UIn
) || (pLOut
!= LOut
) || (pLIn
!= LIn
))
5175 log_err("Unexpected pointer move in 0 byte source request \n");
5177 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5178 uniChar
= ucnv_getNextUChar(cnv
, (const char **)&pLIn
, (const char *)pLIn
, &errorCode
);
5179 if (errorCode
!= U_INDEX_OUTOFBOUNDS_ERROR
)
5181 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode
));
5183 if (((uint32_t)uniChar
- 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5185 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5187 errorCode
= U_ZERO_ERROR
;
5189 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5192 ucnv_fromUnicode(cnv
, &pLOut
,pLOut
+offsets
[4],&pUIn
,pUIn
+UPRV_LENGTHOF(pszUnicode
),off
,FALSE
, &errorCode
);
5193 if (errorCode
!= U_BUFFER_OVERFLOW_ERROR
|| pLOut
!= LOut
+ offsets
[4] || pUIn
!= pszUnicode
+4 )
5195 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5198 errorCode
= U_ZERO_ERROR
;
5200 pLIn
= (const char *)pszLMBCS
;
5201 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+4,&pLIn
,(pLIn
+sizeof(pszLMBCS
)),off
,FALSE
, &errorCode
);
5202 if (errorCode
!= U_BUFFER_OVERFLOW_ERROR
|| pUOut
!= UOut
+ 4 || pLIn
!= (const char *)pszLMBCS
+offsets
[4])
5204 log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5207 /* unpaired or chopped LMBCS surrogates */
5209 /* OK high surrogate, Low surrogate is chopped */
5210 LIn
[0] = (char)0x14;
5211 LIn
[1] = (char)0xD8;
5212 LIn
[2] = (char)0x01;
5213 LIn
[3] = (char)0x14;
5214 LIn
[4] = (char)0xDC;
5216 errorCode
= U_ZERO_ERROR
;
5219 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
5220 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
5221 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 5)
5223 log_err("Unexpected results on chopped low surrogate\n");
5226 /* chopped at surrogate boundary */
5227 LIn
[0] = (char)0x14;
5228 LIn
[1] = (char)0xD8;
5229 LIn
[2] = (char)0x01;
5231 errorCode
= U_ZERO_ERROR
;
5234 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+3),off
,TRUE
, &errorCode
);
5235 if (UOut
[0] != 0xD801 || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 3)
5237 log_err("Unexpected results on chopped at surrogate boundary \n");
5240 /* unpaired surrogate plus valid Unichar */
5241 LIn
[0] = (char)0x14;
5242 LIn
[1] = (char)0xD8;
5243 LIn
[2] = (char)0x01;
5244 LIn
[3] = (char)0x14;
5245 LIn
[4] = (char)0xC9;
5246 LIn
[5] = (char)0xD0;
5248 errorCode
= U_ZERO_ERROR
;
5251 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+6),off
,TRUE
, &errorCode
);
5252 if (UOut
[0] != 0xD801 || UOut
[1] != 0xC9D0 || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 2 || pLIn
!= LIn
+ 6)
5254 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5257 /* unpaired surrogate plus chopped Unichar */
5258 LIn
[0] = (char)0x14;
5259 LIn
[1] = (char)0xD8;
5260 LIn
[2] = (char)0x01;
5261 LIn
[3] = (char)0x14;
5262 LIn
[4] = (char)0xC9;
5265 errorCode
= U_ZERO_ERROR
;
5268 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
5269 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 5)
5271 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5274 /* unpaired surrogate plus valid non-Unichar */
5275 LIn
[0] = (char)0x14;
5276 LIn
[1] = (char)0xD8;
5277 LIn
[2] = (char)0x01;
5278 LIn
[3] = (char)0x0F;
5279 LIn
[4] = (char)0x3B;
5282 errorCode
= U_ZERO_ERROR
;
5285 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
5286 if (UOut
[0] != 0xD801 || UOut
[1] != 0x1B || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 2 || pLIn
!= LIn
+ 5)
5288 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5291 /* unpaired surrogate plus chopped non-Unichar */
5292 LIn
[0] = (char)0x14;
5293 LIn
[1] = (char)0xD8;
5294 LIn
[2] = (char)0x01;
5295 LIn
[3] = (char)0x0F;
5298 errorCode
= U_ZERO_ERROR
;
5301 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+4),off
,TRUE
, &errorCode
);
5303 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 4)
5305 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5309 ucnv_close(cnv
); /* final cleanup */
5313 static void TestJitterbug255()
5315 static const uint8_t testBytes
[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5316 const char *testBuffer
= (const char *)testBytes
;
5317 const char *testEnd
= (const char *)testBytes
+ sizeof(testBytes
);
5318 UErrorCode status
= U_ZERO_ERROR
;
5320 UConverter
*cnv
= 0;
5322 cnv
= ucnv_open("shift-jis", &status
);
5323 if (U_FAILURE(status
) || cnv
== 0) {
5324 log_data_err("Failed to open the converter for SJIS.\n");
5327 while (testBuffer
!= testEnd
)
5329 /*result = */ucnv_getNextUChar (cnv
, &testBuffer
, testEnd
, &status
);
5330 if (U_FAILURE(status
))
5332 log_err("Failed to convert the next UChar for SJIS.\n");
5339 static void TestEBCDICUS4XML()
5341 UChar unicodes_x
[] = {0x0000, 0x0000, 0x0000, 0x0000};
5342 static const UChar toUnicodeMaps_x
[] = {0x000A, 0x000A, 0x000D, 0x0000};
5343 static const char fromUnicodeMaps_x
[] = {0x25, 0x25, 0x0D, 0x00};
5344 static const char newLines_x
[] = {0x25, 0x15, 0x0D, 0x00};
5345 char target_x
[] = {0x00, 0x00, 0x00, 0x00};
5346 UChar
*unicodes
= unicodes_x
;
5347 const UChar
*toUnicodeMaps
= toUnicodeMaps_x
;
5348 char *target
= target_x
;
5349 const char* fromUnicodeMaps
= fromUnicodeMaps_x
, *newLines
= newLines_x
;
5350 UErrorCode status
= U_ZERO_ERROR
;
5351 UConverter
*cnv
= 0;
5353 cnv
= ucnv_open("ebcdic-xml-us", &status
);
5354 if (U_FAILURE(status
) || cnv
== 0) {
5355 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5358 ucnv_toUnicode(cnv
, &unicodes
, unicodes
+3, (const char**)&newLines
, newLines
+3, NULL
, TRUE
, &status
);
5359 if (U_FAILURE(status
) || memcmp(unicodes_x
, toUnicodeMaps
, sizeof(UChar
)*3) != 0) {
5360 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5361 u_errorName(status
));
5362 printUSeqErr(unicodes_x
, 3);
5363 printUSeqErr(toUnicodeMaps
, 3);
5365 status
= U_ZERO_ERROR
;
5366 ucnv_fromUnicode(cnv
, &target
, target
+3, (const UChar
**)&toUnicodeMaps
, toUnicodeMaps
+3, NULL
, TRUE
, &status
);
5367 if (U_FAILURE(status
) || memcmp(target_x
, fromUnicodeMaps
, sizeof(char)*3) != 0) {
5368 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5369 u_errorName(status
));
5370 printSeqErr((const unsigned char*)target_x
, 3);
5371 printSeqErr((const unsigned char*)fromUnicodeMaps
, 3);
5375 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5377 #if !UCONFIG_NO_COLLATION
5379 static void TestJitterbug981(){
5381 int32_t rules_length
, target_cap
, bytes_needed
, buff_size
;
5382 UErrorCode status
= U_ZERO_ERROR
;
5383 UConverter
*utf8cnv
;
5384 UCollator
* myCollator
;
5387 utf8cnv
= ucnv_open ("utf8", &status
);
5388 if(U_FAILURE(status
)){
5389 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status
));
5392 myCollator
= ucol_open("zh", &status
);
5393 if(U_FAILURE(status
)){
5394 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status
));
5395 ucnv_close(utf8cnv
);
5399 rules
= ucol_getRules(myCollator
, &rules_length
);
5400 if(rules_length
== 0) {
5401 log_data_err("missing zh tailoring rule string\n");
5402 ucol_close(myCollator
);
5403 ucnv_close(utf8cnv
);
5406 buff_size
= rules_length
* ucnv_getMaxCharSize(utf8cnv
);
5407 buff
= malloc(buff_size
);
5411 ucnv_reset(utf8cnv
);
5412 status
= U_ZERO_ERROR
;
5413 if(target_cap
>= buff_size
) {
5414 log_err("wanted %d bytes, only %d available\n", target_cap
, buff_size
);
5417 bytes_needed
= ucnv_fromUChars(utf8cnv
, buff
, target_cap
,
5418 rules
, rules_length
, &status
);
5419 target_cap
= (bytes_needed
> target_cap
) ? bytes_needed
: target_cap
+1;
5420 if(numNeeded
!=0 && numNeeded
!= bytes_needed
){
5421 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5424 numNeeded
= bytes_needed
;
5425 } while (status
== U_BUFFER_OVERFLOW_ERROR
);
5426 ucol_close(myCollator
);
5427 ucnv_close(utf8cnv
);
5433 #if !UCONFIG_NO_LEGACY_CONVERSION
5434 static void TestJitterbug1293(){
5435 static const UChar src
[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5437 UErrorCode status
= U_ZERO_ERROR
;
5438 UConverter
* conv
=NULL
;
5439 int32_t target_cap
, bytes_needed
, numNeeded
= 0;
5440 conv
= ucnv_open("shift-jis",&status
);
5441 if(U_FAILURE(status
)){
5442 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status
));
5448 bytes_needed
= ucnv_fromUChars(conv
,target
,256,src
,u_strlen(src
),&status
);
5449 target_cap
= (bytes_needed
> target_cap
) ? bytes_needed
: target_cap
+1;
5450 if(numNeeded
!=0 && numNeeded
!= bytes_needed
){
5451 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5453 numNeeded
= bytes_needed
;
5454 } while (status
== U_BUFFER_OVERFLOW_ERROR
);
5455 if(U_FAILURE(status
)){
5456 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status
));
5463 static void TestJB5275_1(){
5465 static const char* data
= "\x3B\xB3\x0A" /* Easy characters */
5466 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5467 /* Switch script: */
5468 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5469 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5470 "\xEF\x40\x3B\xB3\x0A";
5471 static const UChar expected
[] ={
5472 0x003b, 0x0a15, 0x000a, /* Easy characters */
5473 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5474 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5475 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5476 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5479 UErrorCode status
= U_ZERO_ERROR
;
5480 UConverter
* conv
= ucnv_open("iscii-gur", &status
);
5481 UChar dest
[100] = {'\0'};
5482 UChar
* target
= dest
;
5483 UChar
* targetLimit
= dest
+100;
5484 const char* source
= data
;
5485 const char* sourceLimit
= data
+strlen(data
);
5486 const UChar
* exp
= expected
;
5488 if (U_FAILURE(status
)) {
5489 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status
));
5493 log_verbose("Testing switching back to default script when new line is encountered.\n");
5494 ucnv_toUnicode(conv
, &target
, targetLimit
, &source
, sourceLimit
, NULL
, TRUE
, &status
);
5495 if(U_FAILURE(status
)){
5496 log_err("conversion failed: %s \n", u_errorName(status
));
5498 targetLimit
= target
;
5500 printUSeq(target
, targetLimit
-target
);
5501 while(target
<targetLimit
){
5503 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp
, *target
);
5511 static void TestJB5275(){
5512 static const char* data
=
5513 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */
5514 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */
5515 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */
5516 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5517 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */
5518 "\xEF\x48\x38\xB3\x0A" /* Kannada test */
5519 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */
5520 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */
5521 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */
5522 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */;
5523 static const UChar expected
[] ={
5524 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5525 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */
5526 0x0038, 0x0C95, 0x000A, /* Kannada test */
5527 0x0039, 0x0D15, 0x000A, /* Malayalam test */
5528 0x003A, 0x0A95, 0x000A, /* Gujarati test */
5529 0x003B, 0x0A15, 0x000A, /* Punjabi test */
5532 UErrorCode status
= U_ZERO_ERROR
;
5533 UConverter
* conv
= ucnv_open("iscii", &status
);
5534 UChar dest
[100] = {'\0'};
5535 UChar
* target
= dest
;
5536 UChar
* targetLimit
= dest
+100;
5537 const char* source
= data
;
5538 const char* sourceLimit
= data
+strlen(data
);
5539 const UChar
* exp
= expected
;
5540 ucnv_toUnicode(conv
, &target
, targetLimit
, &source
, sourceLimit
, NULL
, TRUE
, &status
);
5541 if(U_FAILURE(status
)){
5542 log_data_err("conversion failed: %s \n", u_errorName(status
));
5544 targetLimit
= target
;
5547 printUSeq(target
, targetLimit
-target
);
5549 while(target
<targetLimit
){
5551 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp
, *target
);
5560 TestIsFixedWidth() {
5561 UErrorCode status
= U_ZERO_ERROR
;
5562 UConverter
*cnv
= NULL
;
5565 const char *fixedWidth
[] = {
5568 "ibm-5478_P100-1995"
5571 const char *notFixedWidth
[] = {
5578 for (i
= 0; i
< UPRV_LENGTHOF(fixedWidth
); i
++) {
5579 cnv
= ucnv_open(fixedWidth
[i
], &status
);
5580 if (cnv
== NULL
|| U_FAILURE(status
)) {
5581 log_data_err("Error open converter: %s - %s \n", fixedWidth
[i
], u_errorName(status
));
5585 if (!ucnv_isFixedWidth(cnv
, &status
)) {
5586 log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth
[i
]);
5591 for (i
= 0; i
< UPRV_LENGTHOF(notFixedWidth
); i
++) {
5592 cnv
= ucnv_open(notFixedWidth
[i
], &status
);
5593 if (cnv
== NULL
|| U_FAILURE(status
)) {
5594 log_data_err("Error open converter: %s - %s \n", notFixedWidth
[i
], u_errorName(status
));
5598 if (ucnv_isFixedWidth(cnv
, &status
)) {
5599 log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth
[i
]);