1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /*******************************************************************************
12 * Modification History:
14 * Steven R. Loomis 7/8/1999 Adding input buffer test
15 ********************************************************************************
19 #include "unicode/uloc.h"
20 #include "unicode/ucnv.h"
21 #include "unicode/ucnv_err.h"
22 #include "unicode/ucnv_cb.h"
24 #include "unicode/utypes.h"
25 #include "unicode/ustring.h"
26 #include "unicode/ucol.h"
27 #include "unicode/utf16.h"
31 static void TestNextUChar(UConverter
* cnv
, const char* source
, const char* limit
, const int32_t results
[], const char* message
);
32 static void TestNextUCharError(UConverter
* cnv
, const char* source
, const char* limit
, UErrorCode expected
, const char* message
);
33 #if !UCONFIG_NO_COLLATION
34 static void TestJitterbug981(void);
36 #if !UCONFIG_NO_LEGACY_CONVERSION
37 static void TestJitterbug1293(void);
39 static void TestNewConvertWithBufferSizes(int32_t osize
, int32_t isize
) ;
40 static void TestConverterTypesAndStarters(void);
41 static void TestAmbiguous(void);
42 static void TestSignatureDetection(void);
43 static void TestUTF7(void);
44 static void TestIMAP(void);
45 static void TestUTF8(void);
46 static void TestCESU8(void);
47 static void TestUTF16(void);
48 static void TestUTF16BE(void);
49 static void TestUTF16LE(void);
50 static void TestUTF32(void);
51 static void TestUTF32BE(void);
52 static void TestUTF32LE(void);
53 static void TestLATIN1(void);
55 #if !UCONFIG_NO_LEGACY_CONVERSION
56 static void TestSBCS(void);
57 static void TestDBCS(void);
58 static void TestMBCS(void);
59 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
60 static void TestICCRunout(void);
63 #ifdef U_ENABLE_GENERIC_ISO_2022
64 static void TestISO_2022(void);
67 static void TestISO_2022_JP(void);
68 static void TestISO_2022_JP_1(void);
69 static void TestISO_2022_JP_2(void);
70 static void TestISO_2022_KR(void);
71 static void TestISO_2022_KR_1(void);
72 static void TestISO_2022_CN(void);
75 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
77 static void TestISO_2022_CN_EXT(void);
79 static void TestJIS(void);
80 static void TestHZ(void);
83 static void TestSCSU(void);
85 #if !UCONFIG_NO_LEGACY_CONVERSION
86 static void TestEBCDIC_STATEFUL(void);
87 static void TestGB18030(void);
88 static void TestLMBCS(void);
89 static void TestJitterbug255(void);
90 static void TestEBCDICUS4XML(void);
93 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
95 static void TestJitterbug915(void);
97 static void TestISCII(void);
99 static void TestCoverageMBCS(void);
100 static void TestJitterbug2346(void);
101 static void TestJitterbug2411(void);
102 static void TestJB5275(void);
103 static void TestJB5275_1(void);
104 static void TestJitterbug6175(void);
106 static void TestIsFixedWidth(void);
109 static void TestInBufSizes(void);
111 static void TestRoundTrippingAllUTF(void);
112 static void TestConv(const uint16_t in
[],
119 /* open a converter, using test data if it begins with '@' */
120 static UConverter
*my_ucnv_open(const char *cnv
, UErrorCode
*err
);
123 #define NEW_MAX_BUFFER 999
125 static int32_t gInBufferSize
= NEW_MAX_BUFFER
;
126 static int32_t gOutBufferSize
= NEW_MAX_BUFFER
;
127 static char gNuConvTestName
[1024];
129 #define nct_min(x,y) ((x<y) ? x : y)
131 static UConverter
*my_ucnv_open(const char *cnv
, UErrorCode
*err
)
133 if(cnv
&& cnv
[0] == '@') {
134 return ucnv_openPackage(loadTestData(err
), cnv
+1, err
);
136 return ucnv_open(cnv
, err
);
140 static void printSeq(const unsigned char* a
, int len
)
145 log_verbose("0x%02x ", a
[i
++]);
149 static void printUSeq(const UChar
* a
, int len
)
153 while (i
<len
) log_verbose("0x%04x ", a
[i
++]);
157 static void printSeqErr(const unsigned char* a
, int len
)
160 fprintf(stderr
, "{");
162 fprintf(stderr
, "0x%02x ", a
[i
++]);
163 fprintf(stderr
, "}\n");
166 static void printUSeqErr(const UChar
* a
, int len
)
169 fprintf(stderr
, "{U+");
171 fprintf(stderr
, "0x%04x ", a
[i
++]);
172 fprintf(stderr
,"}\n");
176 TestNextUChar(UConverter
* cnv
, const char* source
, const char* limit
, const int32_t results
[], const char* message
)
179 const char* s
=(char*)source
;
180 const int32_t *r
=results
;
181 UErrorCode errorCode
=U_ZERO_ERROR
;
186 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
187 if(errorCode
==U_INDEX_OUTOFBOUNDS_ERROR
) {
188 break; /* no more significant input */
189 } else if(U_FAILURE(errorCode
)) {
190 log_err("%s ucnv_getNextUChar() failed: %s\n", message
, u_errorName(errorCode
));
193 /* test the expected number of input bytes only if >=0 */
194 (*r
>=0 && (int32_t)(s
-s0
)!=*r
) ||
197 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
198 message
, c
, (s
-s0
), *(r
+1), *r
);
206 TestNextUCharError(UConverter
* cnv
, const char* source
, const char* limit
, UErrorCode expected
, const char* message
)
208 const char* s
=(char*)source
;
209 UErrorCode errorCode
=U_ZERO_ERROR
;
211 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
212 if(errorCode
!= expected
){
213 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected
), message
, myErrorName(errorCode
));
215 if(c
!= 0xFFFD && c
!= 0xffff){
216 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message
, c
);
221 static void TestInBufSizes(void)
223 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,1);
225 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,2);
226 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,3);
227 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,4);
228 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,5);
229 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,6);
230 TestNewConvertWithBufferSizes(1,1);
231 TestNewConvertWithBufferSizes(2,3);
232 TestNewConvertWithBufferSizes(3,2);
236 static void TestOutBufSizes(void)
239 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,NEW_MAX_BUFFER
);
240 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER
);
241 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER
);
242 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER
);
243 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER
);
244 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER
);
250 void addTestNewConvert(TestNode
** root
)
252 #if !UCONFIG_NO_FILE_IO
253 addTest(root
, &TestInBufSizes
, "tsconv/nucnvtst/TestInBufSizes");
254 addTest(root
, &TestOutBufSizes
, "tsconv/nucnvtst/TestOutBufSizes");
256 addTest(root
, &TestConverterTypesAndStarters
, "tsconv/nucnvtst/TestConverterTypesAndStarters");
257 addTest(root
, &TestAmbiguous
, "tsconv/nucnvtst/TestAmbiguous");
258 addTest(root
, &TestSignatureDetection
, "tsconv/nucnvtst/TestSignatureDetection");
259 addTest(root
, &TestUTF7
, "tsconv/nucnvtst/TestUTF7");
260 addTest(root
, &TestIMAP
, "tsconv/nucnvtst/TestIMAP");
261 addTest(root
, &TestUTF8
, "tsconv/nucnvtst/TestUTF8");
263 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
264 addTest(root
, &TestCESU8
, "tsconv/nucnvtst/TestCESU8");
265 addTest(root
, &TestUTF16
, "tsconv/nucnvtst/TestUTF16");
266 addTest(root
, &TestUTF16BE
, "tsconv/nucnvtst/TestUTF16BE");
267 addTest(root
, &TestUTF16LE
, "tsconv/nucnvtst/TestUTF16LE");
268 addTest(root
, &TestUTF32
, "tsconv/nucnvtst/TestUTF32");
269 addTest(root
, &TestUTF32BE
, "tsconv/nucnvtst/TestUTF32BE");
270 addTest(root
, &TestUTF32LE
, "tsconv/nucnvtst/TestUTF32LE");
272 #if !UCONFIG_NO_LEGACY_CONVERSION
273 addTest(root
, &TestLMBCS
, "tsconv/nucnvtst/TestLMBCS");
276 addTest(root
, &TestLATIN1
, "tsconv/nucnvtst/TestLATIN1");
278 #if !UCONFIG_NO_LEGACY_CONVERSION
279 addTest(root
, &TestSBCS
, "tsconv/nucnvtst/TestSBCS");
280 #if !UCONFIG_NO_FILE_IO
281 addTest(root
, &TestDBCS
, "tsconv/nucnvtst/TestDBCS");
282 addTest(root
, &TestICCRunout
, "tsconv/nucnvtst/TestICCRunout");
284 addTest(root
, &TestMBCS
, "tsconv/nucnvtst/TestMBCS");
286 #ifdef U_ENABLE_GENERIC_ISO_2022
287 addTest(root
, &TestISO_2022
, "tsconv/nucnvtst/TestISO_2022");
290 addTest(root
, &TestISO_2022_JP
, "tsconv/nucnvtst/TestISO_2022_JP");
291 addTest(root
, &TestJIS
, "tsconv/nucnvtst/TestJIS");
292 addTest(root
, &TestISO_2022_JP_1
, "tsconv/nucnvtst/TestISO_2022_JP_1");
293 addTest(root
, &TestISO_2022_JP_2
, "tsconv/nucnvtst/TestISO_2022_JP_2");
294 addTest(root
, &TestISO_2022_KR
, "tsconv/nucnvtst/TestISO_2022_KR");
295 addTest(root
, &TestISO_2022_KR_1
, "tsconv/nucnvtst/TestISO_2022_KR_1");
296 addTest(root
, &TestISO_2022_CN
, "tsconv/nucnvtst/TestISO_2022_CN");
298 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
299 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
300 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
302 addTest(root
, &TestHZ
, "tsconv/nucnvtst/TestHZ");
305 addTest(root
, &TestSCSU
, "tsconv/nucnvtst/TestSCSU");
307 #if !UCONFIG_NO_LEGACY_CONVERSION
308 addTest(root
, &TestEBCDIC_STATEFUL
, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
309 addTest(root
, &TestGB18030
, "tsconv/nucnvtst/TestGB18030");
310 addTest(root
, &TestJitterbug255
, "tsconv/nucnvtst/TestJitterbug255");
311 addTest(root
, &TestEBCDICUS4XML
, "tsconv/nucnvtst/TestEBCDICUS4XML");
312 addTest(root
, &TestISCII
, "tsconv/nucnvtst/TestISCII");
313 addTest(root
, &TestJB5275
, "tsconv/nucnvtst/TestJB5275");
314 addTest(root
, &TestJB5275_1
, "tsconv/nucnvtst/TestJB5275_1");
315 #if !UCONFIG_NO_COLLATION
316 addTest(root
, &TestJitterbug981
, "tsconv/nucnvtst/TestJitterbug981");
319 addTest(root
, &TestJitterbug1293
, "tsconv/nucnvtst/TestJitterbug1293");
323 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
324 addTest(root
, &TestCoverageMBCS
, "tsconv/nucnvtst/TestCoverageMBCS");
327 addTest(root
, &TestRoundTrippingAllUTF
, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
329 #if !UCONFIG_NO_LEGACY_CONVERSION
330 addTest(root
, &TestJitterbug2346
, "tsconv/nucnvtst/TestJitterbug2346");
331 addTest(root
, &TestJitterbug2411
, "tsconv/nucnvtst/TestJitterbug2411");
332 addTest(root
, &TestJitterbug6175
, "tsconv/nucnvtst/TestJitterbug6175");
334 addTest(root
, &TestIsFixedWidth
, "tsconv/nucnvtst/TestIsFixedWidth");
339 /* Note that this test already makes use of statics, so it's not really
341 This convenience function lets us make the error messages actually useful.
344 static void setNuConvTestName(const char *codepage
, const char *direction
)
346 sprintf(gNuConvTestName
, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
350 (int)gOutBufferSize
);
355 TC_OK
= 0, /* test was OK */
356 TC_MISMATCH
= 1, /* Match failed - err was printed */
357 TC_FAIL
= 2 /* Test failed, don't print an err because it was already printed. */
358 } ETestConvertResult
;
360 /* Note: This function uses global variables and it will not do offset
361 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
362 static ETestConvertResult
testConvertFromU( const UChar
*source
, int sourceLen
, const uint8_t *expect
, int expectLen
,
363 const char *codepage
, const int32_t *expectOffsets
, UBool useFallback
)
365 UErrorCode status
= U_ZERO_ERROR
;
366 UConverter
*conv
= 0;
367 char junkout
[NEW_MAX_BUFFER
]; /* FIX */
368 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
375 int32_t realBufferSize
;
377 const UChar
*realSourceEnd
;
378 const UChar
*sourceLimit
;
379 UBool checkOffsets
= TRUE
;
382 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
383 junkout
[i
] = (char)0xF0;
384 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
387 setNuConvTestName(codepage
, "FROM");
389 log_verbose("\n========= %s\n", gNuConvTestName
);
391 conv
= my_ucnv_open(codepage
, &status
);
393 if(U_FAILURE(status
))
395 log_data_err("Couldn't open converter %s\n",codepage
);
399 ucnv_setFallback(conv
,useFallback
);
402 log_verbose("Converter opened..\n");
408 realBufferSize
= UPRV_LENGTHOF(junkout
);
409 realBufferEnd
= junkout
+ realBufferSize
;
410 realSourceEnd
= source
+ sourceLen
;
412 if ( gOutBufferSize
!= realBufferSize
|| gInBufferSize
!= NEW_MAX_BUFFER
)
413 checkOffsets
= FALSE
;
417 end
= nct_min(targ
+ gOutBufferSize
, realBufferEnd
);
418 sourceLimit
= nct_min(src
+ gInBufferSize
, realSourceEnd
);
420 doFlush
= (UBool
)(sourceLimit
== realSourceEnd
);
422 if(targ
== realBufferEnd
) {
423 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ
, gNuConvTestName
);
426 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src
,sourceLimit
, targ
,end
, doFlush
?"TRUE":"FALSE");
429 status
= U_ZERO_ERROR
;
431 ucnv_fromUnicode (conv
,
436 checkOffsets
? offs
: NULL
,
437 doFlush
, /* flush if we're at the end of the input data */
439 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && sourceLimit
< realSourceEnd
) );
441 if(U_FAILURE(status
)) {
442 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage
, myErrorName(status
), gNuConvTestName
);
446 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
447 sourceLen
, targ
-junkout
);
449 if(getTestOption(VERBOSITY_OPTION
))
452 char offset_str
[9999];
457 for(ptr
= junkout
;ptr
<targ
;ptr
++) {
458 sprintf(junk
+ strlen(junk
), "0x%02x, ", (int)(0xFF & *ptr
));
459 sprintf(offset_str
+ strlen(offset_str
), "0x%02x, ", (int)(0xFF & junokout
[ptr
-junkout
]));
463 printSeq((const uint8_t *)expect
, expectLen
);
464 if ( checkOffsets
) {
465 log_verbose("\nOffsets:");
466 log_verbose(offset_str
);
472 if(expectLen
!= targ
-junkout
) {
473 log_err("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
474 log_verbose("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
475 fprintf(stderr
, "Got:\n");
476 printSeqErr((const unsigned char*)junkout
, (int32_t)(targ
-junkout
));
477 fprintf(stderr
, "Expected:\n");
478 printSeqErr((const unsigned char*)expect
, expectLen
);
482 if (checkOffsets
&& (expectOffsets
!= 0) ) {
483 log_verbose("comparing %d offsets..\n", targ
-junkout
);
484 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t) )){
485 log_err("did not get the expected offsets. %s\n", gNuConvTestName
);
486 printSeqErr((const unsigned char*)junkout
, (int32_t)(targ
-junkout
));
489 for(p
=junkout
;p
<targ
;p
++) {
490 log_err("%d,", junokout
[p
-junkout
]);
493 log_err("Expected: ");
494 for(i
=0; i
<(targ
-junkout
); i
++) {
495 log_err("%d,", expectOffsets
[i
]);
501 log_verbose("comparing..\n");
502 if(!memcmp(junkout
, expect
, expectLen
)) {
503 log_verbose("Matches!\n");
506 log_err("String does not match u->%s\n", gNuConvTestName
);
507 printUSeqErr(source
, sourceLen
);
508 fprintf(stderr
, "Got:\n");
509 printSeqErr((const unsigned char *)junkout
, expectLen
);
510 fprintf(stderr
, "Expected:\n");
511 printSeqErr((const unsigned char *)expect
, expectLen
);
517 /* Note: This function uses global variables and it will not do offset
518 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
519 static ETestConvertResult
testConvertToU( const uint8_t *source
, int sourcelen
, const UChar
*expect
, int expectlen
,
520 const char *codepage
, const int32_t *expectOffsets
, UBool useFallback
)
522 UErrorCode status
= U_ZERO_ERROR
;
523 UConverter
*conv
= 0;
524 UChar junkout
[NEW_MAX_BUFFER
]; /* FIX */
525 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
527 const char *realSourceEnd
;
528 const char *srcLimit
;
534 UBool checkOffsets
= TRUE
;
536 int32_t realBufferSize
;
537 UChar
*realBufferEnd
;
540 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
543 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
546 setNuConvTestName(codepage
, "TO");
548 log_verbose("\n========= %s\n", gNuConvTestName
);
550 conv
= my_ucnv_open(codepage
, &status
);
552 if(U_FAILURE(status
))
554 log_data_err("Couldn't open converter %s\n",gNuConvTestName
);
558 ucnv_setFallback(conv
,useFallback
);
560 log_verbose("Converter opened..\n");
562 src
= (const char *)source
;
566 realBufferSize
= UPRV_LENGTHOF(junkout
);
567 realBufferEnd
= junkout
+ realBufferSize
;
568 realSourceEnd
= src
+ sourcelen
;
570 if ( gOutBufferSize
!= realBufferSize
|| gInBufferSize
!= NEW_MAX_BUFFER
)
571 checkOffsets
= FALSE
;
575 end
= nct_min( targ
+ gOutBufferSize
, realBufferEnd
);
576 srcLimit
= nct_min(realSourceEnd
, src
+ gInBufferSize
);
578 if(targ
== realBufferEnd
)
580 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ
,gNuConvTestName
);
583 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ
,end
);
585 /* oldTarg = targ; */
587 status
= U_ZERO_ERROR
;
589 ucnv_toUnicode (conv
,
594 checkOffsets
? offs
: NULL
,
595 (UBool
)(srcLimit
== realSourceEnd
), /* flush if we're at the end of hte source data */
598 /* offs += (targ-oldTarg); */
600 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (srcLimit
< realSourceEnd
)) ); /* while we just need another buffer */
602 if(U_FAILURE(status
))
604 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage
, myErrorName(status
), gNuConvTestName
);
608 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
609 sourcelen
, targ
-junkout
);
610 if(getTestOption(VERBOSITY_OPTION
))
613 char offset_str
[9999];
619 for(ptr
= junkout
;ptr
<targ
;ptr
++)
621 sprintf(junk
+ strlen(junk
), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr
);
622 sprintf(offset_str
+ strlen(offset_str
), "0x%04x, ", (0xFFFF) & (unsigned int)junokout
[ptr
-junkout
]);
626 printUSeq(expect
, expectlen
);
629 log_verbose("\nOffsets:");
630 log_verbose(offset_str
);
636 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen
,expectlen
*2);
638 if (checkOffsets
&& (expectOffsets
!= 0))
640 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t))){
641 log_err("did not get the expected offsets. %s\n",gNuConvTestName
);
643 for(p
=junkout
;p
<targ
;p
++) {
644 log_err("%d,", junokout
[p
-junkout
]);
647 log_err("Expected: ");
648 for(i
=0; i
<(targ
-junkout
); i
++) {
649 log_err("%d,", expectOffsets
[i
]);
653 for(i
=0; i
<(targ
-junkout
); i
++) {
654 log_err("%X,", junkout
[i
]);
658 for(i
=0; i
<(src
-(const char *)source
); i
++) {
659 log_err("%X,", (unsigned char)source
[i
]);
665 if(!memcmp(junkout
, expect
, expectlen
*2))
667 log_verbose("Matches!\n");
672 log_err("String does not match. %s\n", gNuConvTestName
);
673 log_verbose("String does not match. %s\n", gNuConvTestName
);
675 printUSeqErr(junkout
, expectlen
);
676 printf("\nExpected:");
677 printUSeqErr(expect
, expectlen
);
683 static void TestNewConvertWithBufferSizes(int32_t outsize
, int32_t insize
)
686 /* 1 2 3 1Han 2Han 3Han . */
687 static const UChar sampleText
[] =
688 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
689 static const UChar sampleTextRoundTripUnmappable
[] =
690 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
693 static const uint8_t expectedUTF8
[] =
694 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
695 static const int32_t toUTF8Offs
[] =
696 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
697 static const int32_t fmUTF8Offs
[] =
698 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
700 #ifdef U_ENABLE_GENERIC_ISO_2022
701 /* Same as UTF8, but with ^[%B preceeding */
702 static const const uint8_t expectedISO2022
[] =
703 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
704 static const int32_t toISO2022Offs
[] =
705 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
706 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
707 static const int32_t fmISO2022Offs
[] =
708 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
711 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
712 static const uint8_t expectedIBM930
[] =
713 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
714 static const int32_t toIBM930Offs
[] =
715 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
716 static const int32_t fmIBM930Offs
[] =
717 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
719 /* 1 2 3 0 h1 h2 h3 . MBCS*/
720 static const uint8_t expectedIBM943
[] =
721 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
722 static const int32_t toIBM943Offs
[] =
723 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
724 static const int32_t fmIBM943Offs
[] =
725 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
727 /* 1 2 3 0 h1 h2 h3 . DBCS*/
728 static const uint8_t expectedIBM9027
[] =
729 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
730 static const int32_t toIBM9027Offs
[] =
731 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
733 /* 1 2 3 0 <?> <?> <?> . SBCS*/
734 static const uint8_t expectedIBM920
[] =
735 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
736 static const int32_t toIBM920Offs
[] =
737 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
739 /* 1 2 3 0 <?> <?> <?> . SBCS*/
740 static const uint8_t expectedISO88593
[] =
741 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
742 static const int32_t toISO88593Offs
[] =
743 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
745 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
746 static const uint8_t expectedLATIN1
[] =
747 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
748 static const int32_t toLATIN1Offs
[] =
749 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
753 static const uint8_t expectedUTF16BE
[] =
754 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
755 static const int32_t toUTF16BEOffs
[]=
756 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
757 static const int32_t fmUTF16BEOffs
[] =
758 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
760 static const uint8_t expectedUTF16LE
[] =
761 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
762 static const int32_t toUTF16LEOffs
[]=
763 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
764 static const int32_t fmUTF16LEOffs
[] =
765 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
767 static const uint8_t expectedUTF32BE
[] =
768 { 0x00, 0x00, 0x00, 0x31,
769 0x00, 0x00, 0x00, 0x32,
770 0x00, 0x00, 0x00, 0x33,
771 0x00, 0x00, 0x00, 0x00,
772 0x00, 0x00, 0x4e, 0x00,
773 0x00, 0x00, 0x4e, 0x8c,
774 0x00, 0x00, 0x4e, 0x09,
775 0x00, 0x00, 0x00, 0x2e,
776 0x00, 0x02, 0x00, 0x21 };
777 static const int32_t toUTF32BEOffs
[]=
778 { 0x00, 0x00, 0x00, 0x00,
779 0x01, 0x01, 0x01, 0x01,
780 0x02, 0x02, 0x02, 0x02,
781 0x03, 0x03, 0x03, 0x03,
782 0x04, 0x04, 0x04, 0x04,
783 0x05, 0x05, 0x05, 0x05,
784 0x06, 0x06, 0x06, 0x06,
785 0x07, 0x07, 0x07, 0x07,
786 0x08, 0x08, 0x08, 0x08,
787 0x08, 0x08, 0x08, 0x08 };
788 static const int32_t fmUTF32BEOffs
[] =
789 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
791 static const uint8_t expectedUTF32LE
[] =
792 { 0x31, 0x00, 0x00, 0x00,
793 0x32, 0x00, 0x00, 0x00,
794 0x33, 0x00, 0x00, 0x00,
795 0x00, 0x00, 0x00, 0x00,
796 0x00, 0x4e, 0x00, 0x00,
797 0x8c, 0x4e, 0x00, 0x00,
798 0x09, 0x4e, 0x00, 0x00,
799 0x2e, 0x00, 0x00, 0x00,
800 0x21, 0x00, 0x02, 0x00 };
801 static const int32_t toUTF32LEOffs
[]=
802 { 0x00, 0x00, 0x00, 0x00,
803 0x01, 0x01, 0x01, 0x01,
804 0x02, 0x02, 0x02, 0x02,
805 0x03, 0x03, 0x03, 0x03,
806 0x04, 0x04, 0x04, 0x04,
807 0x05, 0x05, 0x05, 0x05,
808 0x06, 0x06, 0x06, 0x06,
809 0x07, 0x07, 0x07, 0x07,
810 0x08, 0x08, 0x08, 0x08,
811 0x08, 0x08, 0x08, 0x08 };
812 static const int32_t fmUTF32LEOffs
[] =
813 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
818 /** Test chars #2 **/
820 /* Sahha [health], slashed h's */
821 static const UChar malteseUChars
[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
822 static const uint8_t expectedMaltese913
[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
825 static const UChar LMBCSUChars
[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
826 static const uint8_t expectedLMBCS
[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
827 static const int32_t toLMBCSOffs
[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
828 static const int32_t fmLMBCSOffs
[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
829 /*********************************** START OF CODE finally *************/
831 gInBufferSize
= insize
;
832 gOutBufferSize
= outsize
;
834 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize
, gOutBufferSize
);
838 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
839 expectedUTF8
, sizeof(expectedUTF8
), "UTF8", toUTF8Offs
,FALSE
);
841 log_verbose("Test surrogate behaviour for UTF8\n");
843 static const UChar testinput
[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
844 static const uint8_t expectedUTF8test2
[]= { 0xe2, 0x82, 0xac,
845 0xf0, 0x90, 0x90, 0x81,
848 static const int32_t offsets
[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
849 testConvertFromU(testinput
, UPRV_LENGTHOF(testinput
),
850 expectedUTF8test2
, sizeof(expectedUTF8test2
), "UTF8", offsets
,FALSE
);
855 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
857 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
858 expectedISO2022
, sizeof(expectedISO2022
), "ISO_2022", toISO2022Offs
,FALSE
);
862 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
863 expectedUTF16LE
, sizeof(expectedUTF16LE
), "utf-16le", toUTF16LEOffs
,FALSE
);
865 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
866 expectedUTF16BE
, sizeof(expectedUTF16BE
), "utf-16be", toUTF16BEOffs
,FALSE
);
868 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
869 expectedUTF32LE
, sizeof(expectedUTF32LE
), "utf-32le", toUTF32LEOffs
,FALSE
);
871 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
872 expectedUTF32BE
, sizeof(expectedUTF32BE
), "utf-32be", toUTF32BEOffs
,FALSE
);
875 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
876 expectedLATIN1
, sizeof(expectedLATIN1
), "LATIN_1", toLATIN1Offs
,FALSE
);
878 #if !UCONFIG_NO_LEGACY_CONVERSION
880 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
881 expectedIBM930
, sizeof(expectedIBM930
), "ibm-930", toIBM930Offs
,FALSE
);
883 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
884 expectedISO88593
, sizeof(expectedISO88593
), "iso-8859-3", toISO88593Offs
,FALSE
);
888 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
889 expectedIBM943
, sizeof(expectedIBM943
), "ibm-943", toIBM943Offs
,FALSE
);
891 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
892 expectedIBM9027
, sizeof(expectedIBM9027
), "@ibm9027", toIBM9027Offs
,FALSE
);
894 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
895 expectedIBM920
, sizeof(expectedIBM920
), "ibm-920", toIBM920Offs
,FALSE
);
897 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
898 expectedISO88593
, sizeof(expectedISO88593
), "iso-8859-3", toISO88593Offs
,FALSE
);
905 testConvertToU(expectedUTF8
, sizeof(expectedUTF8
),
906 sampleText
, UPRV_LENGTHOF(sampleText
), "utf8", fmUTF8Offs
,FALSE
);
907 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
909 testConvertToU(expectedISO2022
, sizeof(expectedISO2022
),
910 sampleText
, UPRV_LENGTHOF(sampleText
), "ISO_2022", fmISO2022Offs
,FALSE
);
914 testConvertToU(expectedUTF16LE
, sizeof(expectedUTF16LE
),
915 sampleText
, UPRV_LENGTHOF(sampleText
), "utf-16le", fmUTF16LEOffs
,FALSE
);
917 testConvertToU(expectedUTF16BE
, sizeof(expectedUTF16BE
),
918 sampleText
, UPRV_LENGTHOF(sampleText
), "utf-16be", fmUTF16BEOffs
,FALSE
);
920 testConvertToU(expectedUTF32LE
, sizeof(expectedUTF32LE
),
921 sampleText
, UPRV_LENGTHOF(sampleText
), "utf-32le", fmUTF32LEOffs
,FALSE
);
923 testConvertToU(expectedUTF32BE
, sizeof(expectedUTF32BE
),
924 sampleText
, UPRV_LENGTHOF(sampleText
), "utf-32be", fmUTF32BEOffs
,FALSE
);
926 #if !UCONFIG_NO_LEGACY_CONVERSION
928 testConvertToU(expectedIBM930
, sizeof(expectedIBM930
), sampleTextRoundTripUnmappable
,
929 UPRV_LENGTHOF(sampleTextRoundTripUnmappable
), "ibm-930", fmIBM930Offs
,FALSE
);
931 testConvertToU(expectedIBM943
, sizeof(expectedIBM943
),sampleTextRoundTripUnmappable
,
932 UPRV_LENGTHOF(sampleTextRoundTripUnmappable
), "ibm-943", fmIBM943Offs
,FALSE
);
935 /* Try it again to make sure it still works */
936 testConvertToU(expectedUTF16LE
, sizeof(expectedUTF16LE
),
937 sampleText
, UPRV_LENGTHOF(sampleText
), "utf-16le", fmUTF16LEOffs
,FALSE
);
939 #if !UCONFIG_NO_LEGACY_CONVERSION
940 testConvertToU(expectedMaltese913
, sizeof(expectedMaltese913
),
941 malteseUChars
, UPRV_LENGTHOF(malteseUChars
), "latin3", NULL
,FALSE
);
943 testConvertFromU(malteseUChars
, UPRV_LENGTHOF(malteseUChars
),
944 expectedMaltese913
, sizeof(expectedMaltese913
), "iso-8859-3", NULL
,FALSE
);
947 testConvertFromU(LMBCSUChars
, UPRV_LENGTHOF(LMBCSUChars
),
948 expectedLMBCS
, sizeof(expectedLMBCS
), "LMBCS-1", toLMBCSOffs
,FALSE
);
949 testConvertToU(expectedLMBCS
, sizeof(expectedLMBCS
),
950 LMBCSUChars
, UPRV_LENGTHOF(LMBCSUChars
), "LMBCS-1", fmLMBCSOffs
,FALSE
);
953 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
955 /* encode directly set D and set O */
956 static const uint8_t utf7
[] = {
963 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
964 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
966 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
968 static const UChar unicode
[] = {
970 Hi Mom -<WHITE SMILING FACE>-!
971 A<NOT IDENTICAL TO><ALPHA>.
973 [Japanese word "nihongo"]
975 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
976 0x41, 0x2262, 0x0391, 0x2e,
978 0x65e5, 0x672c, 0x8a9e
980 static const int32_t toUnicodeOffsets
[] = {
981 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
986 static const int32_t fromUnicodeOffsets
[] = {
987 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
988 11, 12, 12, 12, 13, 13, 13, 13, 14,
990 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
993 /* same but escaping set O (the exclamation mark) */
994 static const uint8_t utf7Restricted
[] = {
1001 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
1002 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
1004 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
1006 static const int32_t toUnicodeOffsetsR
[] = {
1007 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1012 static const int32_t fromUnicodeOffsetsR
[] = {
1013 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1014 11, 12, 12, 12, 13, 13, 13, 13, 14,
1016 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
1019 testConvertFromU(unicode
, UPRV_LENGTHOF(unicode
), utf7
, sizeof(utf7
), "UTF-7", fromUnicodeOffsets
,FALSE
);
1021 testConvertToU(utf7
, sizeof(utf7
), unicode
, UPRV_LENGTHOF(unicode
), "UTF-7", toUnicodeOffsets
,FALSE
);
1023 testConvertFromU(unicode
, UPRV_LENGTHOF(unicode
), utf7Restricted
, sizeof(utf7Restricted
), "UTF-7,version=1", fromUnicodeOffsetsR
,FALSE
);
1025 testConvertToU(utf7Restricted
, sizeof(utf7Restricted
), unicode
, UPRV_LENGTHOF(unicode
), "UTF-7,version=1", toUnicodeOffsetsR
,FALSE
);
1029 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1030 * modified according to RFC 2060,
1031 * and supplemented with the one example in RFC 2060 itself.
1034 static const uint8_t imap
[] = {
1045 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1046 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1048 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1050 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1051 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1052 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1053 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1055 static const UChar unicode
[] = {
1056 /* Hi Mom -<WHITE SMILING FACE>-!
1057 A<NOT IDENTICAL TO><ALPHA>.
1059 [Japanese word "nihongo"]
1066 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1067 0x41, 0x2262, 0x0391, 0x2e,
1069 0x65e5, 0x672c, 0x8a9e,
1071 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1072 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1073 0x2f, 0x65e5, 0x672c, 0x8a9e,
1074 0x2f, 0x53f0, 0x5317
1076 static const int32_t toUnicodeOffsets
[] = {
1077 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1082 38, 39, 40, 41, 42, 43,
1087 static const int32_t fromUnicodeOffsets
[] = {
1088 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1089 11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1091 16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1093 20, 21, 22, 23, 24, 25,
1095 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1096 35, 36, 36, 36, 37, 37, 37, 37, 37
1099 testConvertFromU(unicode
, UPRV_LENGTHOF(unicode
), imap
, sizeof(imap
), "IMAP-mailbox-name", fromUnicodeOffsets
,FALSE
);
1101 testConvertToU(imap
, sizeof(imap
), unicode
, UPRV_LENGTHOF(unicode
), "IMAP-mailbox-name", toUnicodeOffsets
,FALSE
);
1104 /* Test UTF-8 bad data handling*/
1106 static const uint8_t utf8
[]={
1108 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1111 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1112 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1113 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */
1114 0xdf, 0xbf, /* 7ff */
1115 0xbf, /* truncated tail */
1116 0xf4, 0x90, 0x80, 0x80, /* 110000 */
1120 static const uint16_t utf8Expected
[]={
1122 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1125 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1126 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1130 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1134 static const int32_t utf8Offsets
[]={
1147 testConvertToU(utf8
, sizeof(utf8
),
1148 utf8Expected
, UPRV_LENGTHOF(utf8Expected
), "utf-8", utf8Offsets
,FALSE
);
1152 /* Test UTF-32BE bad data handling*/
1154 static const uint8_t utf32
[]={
1155 0x00, 0x00, 0x00, 0x61,
1156 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
1157 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1158 0x00, 0x00, 0x00, 0x62,
1159 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1160 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
1161 0x00, 0x00, 0x01, 0x62,
1162 0x00, 0x00, 0x02, 0x62
1164 static const uint16_t utf32Expected
[]={
1166 0xfffd, /* 0x110000 out of range */
1167 0xDBFF, /* 0x10FFFF in range */
1170 0xfffd, /* 0xffffffff out of range */
1171 0xfffd, /* 0x7fffffff out of range */
1175 static const int32_t utf32Offsets
[]={
1176 0, 4, 8, 8, 12, 16, 20, 24, 28
1178 static const uint8_t utf32ExpectedBack
[]={
1179 0x00, 0x00, 0x00, 0x61,
1180 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */
1181 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1182 0x00, 0x00, 0x00, 0x62,
1183 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */
1184 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */
1185 0x00, 0x00, 0x01, 0x62,
1186 0x00, 0x00, 0x02, 0x62
1188 static const int32_t utf32OffsetsBack
[]={
1199 testConvertToU(utf32
, sizeof(utf32
),
1200 utf32Expected
, UPRV_LENGTHOF(utf32Expected
), "utf-32be", utf32Offsets
,FALSE
);
1201 testConvertFromU(utf32Expected
, UPRV_LENGTHOF(utf32Expected
),
1202 utf32ExpectedBack
, sizeof(utf32ExpectedBack
), "utf-32be", utf32OffsetsBack
, FALSE
);
1205 /* Test UTF-32LE bad data handling*/
1207 static const uint8_t utf32
[]={
1208 0x61, 0x00, 0x00, 0x00,
1209 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
1210 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1211 0x62, 0x00, 0x00, 0x00,
1212 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1213 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
1214 0x62, 0x01, 0x00, 0x00,
1215 0x62, 0x02, 0x00, 0x00,
1218 static const uint16_t utf32Expected
[]={
1220 0xfffd, /* 0x110000 out of range */
1221 0xDBFF, /* 0x10FFFF in range */
1224 0xfffd, /* 0xffffffff out of range */
1225 0xfffd, /* 0x7fffffff out of range */
1229 static const int32_t utf32Offsets
[]={
1230 0, 4, 8, 8, 12, 16, 20, 24, 28
1232 static const uint8_t utf32ExpectedBack
[]={
1233 0x61, 0x00, 0x00, 0x00,
1234 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */
1235 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1236 0x62, 0x00, 0x00, 0x00,
1237 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */
1238 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */
1239 0x62, 0x01, 0x00, 0x00,
1240 0x62, 0x02, 0x00, 0x00
1242 static const int32_t utf32OffsetsBack
[]={
1252 testConvertToU(utf32
, sizeof(utf32
),
1253 utf32Expected
, UPRV_LENGTHOF(utf32Expected
), "utf-32le", utf32Offsets
,FALSE
);
1254 testConvertFromU(utf32Expected
, UPRV_LENGTHOF(utf32Expected
),
1255 utf32ExpectedBack
, sizeof(utf32ExpectedBack
), "utf-32le", utf32OffsetsBack
, FALSE
);
1259 static void TestCoverageMBCS(){
1261 UErrorCode status
= U_ZERO_ERROR
;
1262 const char *directory
= loadTestData(&status
);
1263 char* tdpath
= NULL
;
1264 char* saveDirectory
= (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1265 int len
= strlen(directory
);
1268 tdpath
= (char*) malloc(sizeof(char) * (len
* 2));
1269 uprv_strcpy(saveDirectory
,u_getDataDirectory());
1270 log_verbose("Retrieved data directory %s \n",saveDirectory
);
1271 uprv_strcpy(tdpath
,directory
);
1272 index
=strrchr(tdpath
,(char)U_FILE_SEP_CHAR
);
1274 if((unsigned int)(index
-tdpath
) != (strlen(tdpath
)-1)){
1277 u_setDataDirectory(tdpath
);
1278 log_verbose("ICU data directory is set to: %s \n" ,tdpath
);
1281 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
1282 which is test file for MBCS conversion with single-byte codepage data.*/
1285 /* MBCS with single byte codepage data test1.ucm*/
1286 const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1287 const uint8_t expectedtest1
[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1288 int32_t totest1Offs
[] = { 0, 1, 2, 3, 5, };
1291 testConvertFromU(unicodeInput
, UPRV_LENGTHOF(unicodeInput
),
1292 expectedtest1
, sizeof(expectedtest1
), "@test1", totest1Offs
,FALSE
);
1295 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
1296 which is test file for MBCS conversion with three-byte codepage data.*/
1299 /* MBCS with three byte codepage data test3.ucm*/
1300 const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1301 const uint8_t expectedtest3
[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,};
1302 int32_t totest3Offs
[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1304 const uint8_t test3input
[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1305 const UChar expectedUnicode
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1306 int32_t fromtest3Offs
[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1309 testConvertFromU(unicodeInput
, UPRV_LENGTHOF(unicodeInput
),
1310 expectedtest3
, sizeof(expectedtest3
), "@test3", totest3Offs
,FALSE
);
1313 testConvertToU(test3input
, sizeof(test3input
),
1314 expectedUnicode
, UPRV_LENGTHOF(expectedUnicode
), "@test3", fromtest3Offs
,FALSE
);
1318 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
1319 which is test file for MBCS conversion with four-byte codepage data.*/
1322 /* MBCS with three byte codepage data test4.ucm*/
1323 static const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1324 static const uint8_t expectedtest4
[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,};
1325 static const int32_t totest4Offs
[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1327 static const uint8_t test4input
[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1328 static const UChar expectedUnicode
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1329 static const int32_t fromtest4Offs
[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1332 testConvertFromU(unicodeInput
, UPRV_LENGTHOF(unicodeInput
),
1333 expectedtest4
, sizeof(expectedtest4
), "@test4", totest4Offs
,FALSE
);
1336 testConvertToU(test4input
, sizeof(test4input
),
1337 expectedUnicode
, UPRV_LENGTHOF(expectedUnicode
), "@test4", fromtest4Offs
,FALSE
);
1342 /* restore the original data directory */
1343 log_verbose("Setting the data directory to %s \n", saveDirectory
);
1344 u_setDataDirectory(saveDirectory
);
1345 free(saveDirectory
);
1350 static void TestConverterType(const char *convName
, UConverterType convType
) {
1351 UConverter
* myConverter
;
1352 UErrorCode err
= U_ZERO_ERROR
;
1354 myConverter
= my_ucnv_open(convName
, &err
);
1356 if (U_FAILURE(err
)) {
1357 log_data_err("Failed to create an %s converter\n", convName
);
1362 if (ucnv_getType(myConverter
)!=convType
) {
1363 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1364 convName
, convType
);
1367 log_verbose("ucnv_getType %s ok\n", convName
);
1370 ucnv_close(myConverter
);
1373 static void TestConverterTypesAndStarters()
1375 #if !UCONFIG_NO_LEGACY_CONVERSION
1376 UConverter
* myConverter
;
1377 UErrorCode err
= U_ZERO_ERROR
;
1378 UBool mystarters
[256];
1380 /* const UBool expectedKSCstarters[256] = {
1381 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1382 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1383 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1384 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1385 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1386 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1387 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1388 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1389 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1390 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1391 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1392 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1393 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1394 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1395 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1396 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1397 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1398 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1399 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1400 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1401 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1402 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1403 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1404 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1405 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1406 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1409 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types.");
1411 myConverter
= ucnv_open("ksc", &err
);
1412 if (U_FAILURE(err
)) {
1413 log_data_err("Failed to create an ibm-ksc converter\n");
1418 if (ucnv_getType(myConverter
)!=UCNV_MBCS
)
1419 log_err("ucnv_getType Failed for ibm-949\n");
1421 log_verbose("ucnv_getType ibm-949 ok\n");
1423 if(myConverter
!=NULL
)
1424 ucnv_getStarters(myConverter
, mystarters
, &err
);
1426 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1427 log_err("Failed ucnv_getStarters for ksc\n");
1429 log_verbose("ucnv_getStarters ok\n");*/
1432 ucnv_close(myConverter
);
1434 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL
);
1435 TestConverterType("ibm-878", UCNV_SBCS
);
1438 TestConverterType("iso-8859-1", UCNV_LATIN_1
);
1440 TestConverterType("ibm-1208", UCNV_UTF8
);
1442 TestConverterType("utf-8", UCNV_UTF8
);
1443 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian
);
1444 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian
);
1445 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian
);
1446 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian
);
1448 #if !UCONFIG_NO_LEGACY_CONVERSION
1450 #if defined(U_ENABLE_GENERIC_ISO_2022)
1451 TestConverterType("iso-2022", UCNV_ISO_2022
);
1454 TestConverterType("hz", UCNV_HZ
);
1457 TestConverterType("scsu", UCNV_SCSU
);
1459 #if !UCONFIG_NO_LEGACY_CONVERSION
1460 TestConverterType("x-iscii-de", UCNV_ISCII
);
1463 TestConverterType("ascii", UCNV_US_ASCII
);
1464 TestConverterType("utf-7", UCNV_UTF7
);
1465 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX
);
1466 TestConverterType("bocu-1", UCNV_BOCU1
);
1470 TestAmbiguousConverter(UConverter
*cnv
) {
1471 static const char inBytes
[3]={ 0x61, 0x5B, 0x5c };
1472 UChar outUnicode
[20]={ 0, 0, 0, 0 };
1476 UErrorCode errorCode
;
1479 /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1480 errorCode
=U_ZERO_ERROR
;
1483 ucnv_toUnicode(cnv
, &u
, u
+20, &s
, s
+3, NULL
, TRUE
, &errorCode
);
1484 if(U_FAILURE(errorCode
)) {
1485 /* we do not care about general failures in this test; the input may just not be mappable */
1489 if(outUnicode
[0]!=0x61 || outUnicode
[1]!=0x5B || outUnicode
[2]==0xfffd) {
1490 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1491 /* There are some encodings that are partially ASCII based,
1492 like the ISO-7 and GSM series of codepages, which we ignore. */
1496 isAmbiguous
=ucnv_isAmbiguous(cnv
);
1498 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1499 if((outUnicode
[2]!=0x5c)!=isAmbiguous
) {
1500 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1501 ucnv_getName(cnv
, &errorCode
), outUnicode
[2]!=0x5c, isAmbiguous
);
1505 if(outUnicode
[2]!=0x5c) {
1506 /* needs fixup, fix it */
1507 ucnv_fixFileSeparator(cnv
, outUnicode
, (int32_t)(u
-outUnicode
));
1508 if(outUnicode
[2]!=0x5c) {
1509 /* the fix failed */
1510 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv
, &errorCode
));
1516 static void TestAmbiguous()
1518 UErrorCode status
= U_ZERO_ERROR
;
1519 UConverter
*ascii_cnv
= 0, *sjis_cnv
= 0, *cnv
;
1520 static const char target
[] = {
1521 /* "\\usr\\local\\share\\data\\icutest.txt" */
1522 0x5c, 0x75, 0x73, 0x72,
1523 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1524 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1525 0x5c, 0x64, 0x61, 0x74, 0x61,
1526 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1529 UChar asciiResult
[200], sjisResult
[200];
1530 int32_t /*asciiLength = 0,*/ sjisLength
= 0, i
;
1533 /* enumerate all converters */
1534 status
=U_ZERO_ERROR
;
1535 for(i
=0; (name
=ucnv_getAvailableName(i
))!=NULL
; ++i
) {
1536 cnv
=ucnv_open(name
, &status
);
1537 if(U_SUCCESS(status
)) {
1538 TestAmbiguousConverter(cnv
);
1541 log_err("error: unable to open available converter \"%s\"\n", name
);
1542 status
=U_ZERO_ERROR
;
1546 #if !UCONFIG_NO_LEGACY_CONVERSION
1547 sjis_cnv
= ucnv_open("ibm-943", &status
);
1548 if (U_FAILURE(status
))
1550 log_data_err("Failed to create a SJIS converter\n");
1553 ascii_cnv
= ucnv_open("LATIN-1", &status
);
1554 if (U_FAILURE(status
))
1556 log_data_err("Failed to create a LATIN-1 converter\n");
1557 ucnv_close(sjis_cnv
);
1560 /* convert target from SJIS to Unicode */
1561 sjisLength
= ucnv_toUChars(sjis_cnv
, sjisResult
, UPRV_LENGTHOF(sjisResult
), target
, (int32_t)strlen(target
), &status
);
1562 if (U_FAILURE(status
))
1564 log_err("Failed to convert the SJIS string.\n");
1565 ucnv_close(sjis_cnv
);
1566 ucnv_close(ascii_cnv
);
1569 /* convert target from Latin-1 to Unicode */
1570 /*asciiLength =*/ ucnv_toUChars(ascii_cnv
, asciiResult
, UPRV_LENGTHOF(asciiResult
), target
, (int32_t)strlen(target
), &status
);
1571 if (U_FAILURE(status
))
1573 log_err("Failed to convert the Latin-1 string.\n");
1574 ucnv_close(sjis_cnv
);
1575 ucnv_close(ascii_cnv
);
1578 if (!ucnv_isAmbiguous(sjis_cnv
))
1580 log_err("SJIS converter should contain ambiguous character mappings.\n");
1581 ucnv_close(sjis_cnv
);
1582 ucnv_close(ascii_cnv
);
1585 if (u_strcmp(sjisResult
, asciiResult
) == 0)
1587 log_err("File separators for SJIS don't need to be fixed.\n");
1589 ucnv_fixFileSeparator(sjis_cnv
, sjisResult
, sjisLength
);
1590 if (u_strcmp(sjisResult
, asciiResult
) != 0)
1592 log_err("Fixing file separator for SJIS failed.\n");
1594 ucnv_close(sjis_cnv
);
1595 ucnv_close(ascii_cnv
);
1600 TestSignatureDetection(){
1601 /* with null terminated strings */
1603 static const char* data
[] = {
1604 "\xFE\xFF\x00\x00", /* UTF-16BE */
1605 "\xFF\xFE\x00\x00", /* UTF-16LE */
1606 "\xEF\xBB\xBF\x00", /* UTF-8 */
1607 "\x0E\xFE\xFF\x00", /* SCSU */
1609 "\xFE\xFF", /* UTF-16BE */
1610 "\xFF\xFE", /* UTF-16LE */
1611 "\xEF\xBB\xBF", /* UTF-8 */
1612 "\x0E\xFE\xFF", /* SCSU */
1614 "\xFE\xFF\x41\x42", /* UTF-16BE */
1615 "\xFF\xFE\x41\x41", /* UTF-16LE */
1616 "\xEF\xBB\xBF\x41", /* UTF-8 */
1617 "\x0E\xFE\xFF\x41", /* SCSU */
1619 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */
1620 "\x2B\x2F\x76\x38\x41", /* UTF-7 */
1621 "\x2B\x2F\x76\x39\x41", /* UTF-7 */
1622 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */
1623 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */
1625 "\xDD\x73\x66\x73" /* UTF-EBCDIC */
1627 static const char* expected
[] = {
1650 static const int32_t expectedLength
[] ={
1675 int32_t signatureLength
= -1;
1676 const char* source
= NULL
;
1677 const char* enc
= NULL
;
1678 for( ; i
<UPRV_LENGTHOF(data
); i
++){
1681 enc
= ucnv_detectUnicodeSignature(source
, -1 , &signatureLength
, &err
);
1683 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source
,i
,u_errorName(err
));
1686 if(enc
== NULL
|| strcmp(enc
,expected
[i
]) !=0){
1687 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source
,i
,expected
[i
],enc
);
1690 if(signatureLength
!= expectedLength
[i
]){
1691 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source
,i
,signatureLength
,expectedLength
[i
]);
1696 static const char* data
[] = {
1697 "\xFE\xFF\x00", /* UTF-16BE */
1698 "\xFF\xFE\x00", /* UTF-16LE */
1699 "\xEF\xBB\xBF\x00", /* UTF-8 */
1700 "\x0E\xFE\xFF\x00", /* SCSU */
1701 "\x00\x00\xFE\xFF", /* UTF-32BE */
1702 "\xFF\xFE\x00\x00", /* UTF-32LE */
1703 "\xFE\xFF", /* UTF-16BE */
1704 "\xFF\xFE", /* UTF-16LE */
1705 "\xEF\xBB\xBF", /* UTF-8 */
1706 "\x0E\xFE\xFF", /* SCSU */
1707 "\x00\x00\xFE\xFF", /* UTF-32BE */
1708 "\xFF\xFE\x00\x00", /* UTF-32LE */
1709 "\xFE\xFF\x41\x42", /* UTF-16BE */
1710 "\xFF\xFE\x41\x41", /* UTF-16LE */
1711 "\xEF\xBB\xBF\x41", /* UTF-8 */
1712 "\x0E\xFE\xFF\x41", /* SCSU */
1713 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1714 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1715 "\xFB\xEE\x28", /* BOCU-1 */
1716 "\xFF\x41\x42" /* NULL */
1718 static const int len
[] = {
1741 static const char* expected
[] = {
1763 static const int32_t expectedLength
[] ={
1787 int32_t signatureLength
= -1;
1788 int32_t sourceLength
=-1;
1789 const char* source
= NULL
;
1790 const char* enc
= NULL
;
1791 for( ; i
<UPRV_LENGTHOF(data
); i
++){
1794 sourceLength
= len
[i
];
1795 enc
= ucnv_detectUnicodeSignature(source
, sourceLength
, &signatureLength
, &err
);
1797 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source
,i
,u_errorName(err
));
1800 if(enc
== NULL
|| strcmp(enc
,expected
[i
]) !=0){
1801 if(expected
[i
] !=NULL
){
1802 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source
,i
,expected
[i
],enc
);
1806 if(signatureLength
!= expectedLength
[i
]){
1807 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source
,i
,signatureLength
,expectedLength
[i
]);
1813 static void TestUTF7() {
1815 static const uint8_t in
[]={
1816 /* H - +Jjo- - ! +- +2AHcAQ */
1819 0x2b, 0x4a, 0x6a, 0x6f,
1823 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1826 /* expected test results */
1827 static const int32_t results
[]={
1828 /* number of bytes read, code point */
1831 4, 0x263a, /* <WHITE SMILING FACE> */
1838 const char *cnvName
;
1839 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
1840 UErrorCode errorCode
=U_ZERO_ERROR
;
1841 UConverter
*cnv
=ucnv_open("UTF-7", &errorCode
);
1842 if(U_FAILURE(errorCode
)) {
1843 log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode
));
1846 TestNextUChar(cnv
, source
, limit
, results
, "UTF-7");
1847 /* Test the condition when source >= sourceLimit */
1848 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1849 cnvName
= ucnv_getName(cnv
, &errorCode
);
1850 if (U_FAILURE(errorCode
) || uprv_strcmp(cnvName
, "UTF-7") != 0) {
1851 log_err("UTF-7 converter is called %s: %s\n", cnvName
, u_errorName(errorCode
));
1856 static void TestIMAP() {
1858 static const uint8_t in
[]={
1859 /* H - &Jjo- - ! &- &2AHcAQ- \ */
1862 0x26, 0x4a, 0x6a, 0x6f,
1866 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1869 /* expected test results */
1870 static const int32_t results
[]={
1871 /* number of bytes read, code point */
1874 4, 0x263a, /* <WHITE SMILING FACE> */
1881 const char *cnvName
;
1882 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
1883 UErrorCode errorCode
=U_ZERO_ERROR
;
1884 UConverter
*cnv
=ucnv_open("IMAP-mailbox-name", &errorCode
);
1885 if(U_FAILURE(errorCode
)) {
1886 log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode
));
1889 TestNextUChar(cnv
, source
, limit
, results
, "IMAP-mailbox-name");
1890 /* Test the condition when source >= sourceLimit */
1891 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1892 cnvName
= ucnv_getName(cnv
, &errorCode
);
1893 if (U_FAILURE(errorCode
) || uprv_strcmp(cnvName
, "IMAP-mailbox-name") != 0) {
1894 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName
, u_errorName(errorCode
));
1899 static void TestUTF8() {
1901 static const uint8_t in
[]={
1905 0xf0, 0x90, 0x80, 0x80,
1906 0xf4, 0x84, 0x8c, 0xa1,
1907 0xf0, 0x90, 0x90, 0x81
1910 /* expected test results */
1911 static const int32_t results
[]={
1912 /* number of bytes read, code point */
1921 /* error test input */
1922 static const uint8_t in2
[]={
1924 0xc0, 0x80, /* illegal non-shortest form */
1925 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1926 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1927 0xc0, 0xc0, /* illegal trail byte */
1928 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1929 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1930 0xfe, /* illegal byte altogether */
1934 /* expected error test results */
1935 static const int32_t results2
[]={
1936 /* number of bytes read, code point */
1941 UConverterToUCallback cb
;
1944 const char *source
=(const char *)in
,*limit
=(const char *)in
+sizeof(in
);
1945 UErrorCode errorCode
=U_ZERO_ERROR
;
1946 UConverter
*cnv
=ucnv_open("UTF-8", &errorCode
);
1947 if(U_FAILURE(errorCode
)) {
1948 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode
));
1951 TestNextUChar(cnv
, source
, limit
, results
, "UTF-8");
1952 /* Test the condition when source >= sourceLimit */
1953 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1955 /* test error behavior with a skip callback */
1956 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
1957 source
=(const char *)in2
;
1958 limit
=(const char *)(in2
+sizeof(in2
));
1959 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-8");
1964 static void TestCESU8() {
1966 static const uint8_t in
[]={
1970 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1971 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1972 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1976 /* expected test results */
1977 static const int32_t results
[]={
1978 /* number of bytes read, code point */
1984 -1,0xd802, /* may read 3 or 6 bytes */
1985 -1,0x10ffff,/* may read 0 or 3 bytes */
1989 /* error test input */
1990 static const uint8_t in2
[]={
1992 0xc0, 0x80, /* illegal non-shortest form */
1993 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1994 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1995 0xc0, 0xc0, /* illegal trail byte */
1996 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */
1997 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */
1998 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */
1999 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
2000 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
2001 0xfe, /* illegal byte altogether */
2005 /* expected error test results */
2006 static const int32_t results2
[]={
2007 /* number of bytes read, code point */
2012 UConverterToUCallback cb
;
2015 const char *source
=(const char *)in
,*limit
=(const char *)in
+sizeof(in
);
2016 UErrorCode errorCode
=U_ZERO_ERROR
;
2017 UConverter
*cnv
=ucnv_open("CESU-8", &errorCode
);
2018 if(U_FAILURE(errorCode
)) {
2019 log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode
));
2022 TestNextUChar(cnv
, source
, limit
, results
, "CESU-8");
2023 /* Test the condition when source >= sourceLimit */
2024 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2026 /* test error behavior with a skip callback */
2027 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
2028 source
=(const char *)in2
;
2029 limit
=(const char *)(in2
+sizeof(in2
));
2030 TestNextUChar(cnv
, source
, limit
, results2
, "CESU-8");
2035 static void TestUTF16() {
2037 static const uint8_t in1
[]={
2038 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2040 static const uint8_t in2
[]={
2041 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2043 static const uint8_t in3
[]={
2044 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2047 /* expected test results */
2048 static const int32_t results1
[]={
2049 /* number of bytes read, code point */
2053 static const int32_t results2
[]={
2054 /* number of bytes read, code point */
2058 static const int32_t results3
[]={
2059 /* number of bytes read, code point */
2066 const char *source
, *limit
;
2068 UErrorCode errorCode
=U_ZERO_ERROR
;
2069 UConverter
*cnv
=ucnv_open("UTF-16", &errorCode
);
2070 if(U_FAILURE(errorCode
)) {
2071 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode
));
2075 source
=(const char *)in1
, limit
=(const char *)in1
+sizeof(in1
);
2076 TestNextUChar(cnv
, source
, limit
, results1
, "UTF-16");
2078 source
=(const char *)in2
, limit
=(const char *)in2
+sizeof(in2
);
2079 ucnv_resetToUnicode(cnv
);
2080 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-16");
2082 source
=(const char *)in3
, limit
=(const char *)in3
+sizeof(in3
);
2083 ucnv_resetToUnicode(cnv
);
2084 TestNextUChar(cnv
, source
, limit
, results3
, "UTF-16");
2086 /* Test the condition when source >= sourceLimit */
2087 ucnv_resetToUnicode(cnv
);
2088 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2093 static void TestUTF16BE() {
2095 static const uint8_t in
[]={
2101 0xd8, 0x01, 0xdc, 0x01
2104 /* expected test results */
2105 static const int32_t results
[]={
2106 /* number of bytes read, code point */
2115 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2116 UErrorCode errorCode
=U_ZERO_ERROR
;
2117 UConverter
*cnv
=ucnv_open("utf-16be", &errorCode
);
2118 if(U_FAILURE(errorCode
)) {
2119 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode
));
2122 TestNextUChar(cnv
, source
, limit
, results
, "UTF-16BE");
2123 /* Test the condition when source >= sourceLimit */
2124 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2125 /*Test for the condition where there is an invalid character*/
2127 static const uint8_t source2
[]={0x61};
2128 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2129 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an invalid character");
2133 * Test disabled because currently the UTF-16BE/LE converters are supposed
2134 * to not set errors for unpaired surrogates.
2135 * This may change with
2136 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2139 /*Test for the condition where there is a surrogate pair*/
2141 const uint8_t source2
[]={0xd8, 0x01};
2142 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an truncated surrogate character");
2151 static const uint8_t in
[]={
2156 0x01, 0xd8, 0x01, 0xdc
2159 /* expected test results */
2160 static const int32_t results
[]={
2161 /* number of bytes read, code point */
2169 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2170 UErrorCode errorCode
=U_ZERO_ERROR
;
2171 UConverter
*cnv
=ucnv_open("utf-16le", &errorCode
);
2172 if(U_FAILURE(errorCode
)) {
2173 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode
));
2176 TestNextUChar(cnv
, source
, limit
, results
, "UTF-16LE");
2177 /* Test the condition when source >= sourceLimit */
2178 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2179 /*Test for the condition where there is an invalid character*/
2181 static const uint8_t source2
[]={0x61};
2182 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2183 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an invalid character");
2187 * Test disabled because currently the UTF-16BE/LE converters are supposed
2188 * to not set errors for unpaired surrogates.
2189 * This may change with
2190 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2193 /*Test for the condition where there is a surrogate character*/
2195 static const uint8_t source2
[]={0x01, 0xd8};
2196 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an truncated surrogate character");
2203 static void TestUTF32() {
2205 static const uint8_t in1
[]={
2206 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff
2208 static const uint8_t in2
[]={
2209 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00
2211 static const uint8_t in3
[]={
2212 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01
2215 /* expected test results */
2216 static const int32_t results1
[]={
2217 /* number of bytes read, code point */
2221 static const int32_t results2
[]={
2222 /* number of bytes read, code point */
2226 static const int32_t results3
[]={
2227 /* number of bytes read, code point */
2230 4, 0xfffd, /* unmatched surrogate */
2231 4, 0xfffd /* unmatched surrogate */
2234 const char *source
, *limit
;
2236 UErrorCode errorCode
=U_ZERO_ERROR
;
2237 UConverter
*cnv
=ucnv_open("UTF-32", &errorCode
);
2238 if(U_FAILURE(errorCode
)) {
2239 log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode
));
2243 source
=(const char *)in1
, limit
=(const char *)in1
+sizeof(in1
);
2244 TestNextUChar(cnv
, source
, limit
, results1
, "UTF-32");
2246 source
=(const char *)in2
, limit
=(const char *)in2
+sizeof(in2
);
2247 ucnv_resetToUnicode(cnv
);
2248 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32");
2250 source
=(const char *)in3
, limit
=(const char *)in3
+sizeof(in3
);
2251 ucnv_resetToUnicode(cnv
);
2252 TestNextUChar(cnv
, source
, limit
, results3
, "UTF-32");
2254 /* Test the condition when source >= sourceLimit */
2255 ucnv_resetToUnicode(cnv
);
2256 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2264 static const uint8_t in
[]={
2265 0x00, 0x00, 0x00, 0x61,
2266 0x00, 0x00, 0x30, 0x61,
2267 0x00, 0x00, 0xdc, 0x00,
2268 0x00, 0x00, 0xd8, 0x00,
2269 0x00, 0x00, 0xdf, 0xff,
2270 0x00, 0x00, 0xff, 0xfe,
2271 0x00, 0x10, 0xab, 0xcd,
2272 0x00, 0x10, 0xff, 0xff
2275 /* expected test results */
2276 static const int32_t results
[]={
2277 /* number of bytes read, code point */
2288 /* error test input */
2289 static const uint8_t in2
[]={
2290 0x00, 0x00, 0x00, 0x61,
2291 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
2292 0x00, 0x00, 0x00, 0x62,
2293 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2294 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
2295 0x00, 0x00, 0x01, 0x62,
2296 0x00, 0x00, 0x02, 0x62
2299 /* expected error test results */
2300 static const int32_t results2
[]={
2301 /* number of bytes read, code point */
2308 UConverterToUCallback cb
;
2311 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2312 UErrorCode errorCode
=U_ZERO_ERROR
;
2313 UConverter
*cnv
=ucnv_open("UTF-32BE", &errorCode
);
2314 if(U_FAILURE(errorCode
)) {
2315 log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode
));
2318 TestNextUChar(cnv
, source
, limit
, results
, "UTF-32BE");
2320 /* Test the condition when source >= sourceLimit */
2321 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2323 /* test error behavior with a skip callback */
2324 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
2325 source
=(const char *)in2
;
2326 limit
=(const char *)(in2
+sizeof(in2
));
2327 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32BE");
2335 static const uint8_t in
[]={
2336 0x61, 0x00, 0x00, 0x00,
2337 0x61, 0x30, 0x00, 0x00,
2338 0x00, 0xdc, 0x00, 0x00,
2339 0x00, 0xd8, 0x00, 0x00,
2340 0xff, 0xdf, 0x00, 0x00,
2341 0xfe, 0xff, 0x00, 0x00,
2342 0xcd, 0xab, 0x10, 0x00,
2343 0xff, 0xff, 0x10, 0x00
2346 /* expected test results */
2347 static const int32_t results
[]={
2348 /* number of bytes read, code point */
2359 /* error test input */
2360 static const uint8_t in2
[]={
2361 0x61, 0x00, 0x00, 0x00,
2362 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
2363 0x62, 0x00, 0x00, 0x00,
2364 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2365 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
2366 0x62, 0x01, 0x00, 0x00,
2367 0x62, 0x02, 0x00, 0x00,
2370 /* expected error test results */
2371 static const int32_t results2
[]={
2372 /* number of bytes read, code point */
2379 UConverterToUCallback cb
;
2382 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2383 UErrorCode errorCode
=U_ZERO_ERROR
;
2384 UConverter
*cnv
=ucnv_open("UTF-32LE", &errorCode
);
2385 if(U_FAILURE(errorCode
)) {
2386 log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode
));
2389 TestNextUChar(cnv
, source
, limit
, results
, "UTF-32LE");
2391 /* Test the condition when source >= sourceLimit */
2392 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2394 /* test error behavior with a skip callback */
2395 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
2396 source
=(const char *)in2
;
2397 limit
=(const char *)(in2
+sizeof(in2
));
2398 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32LE");
2406 static const uint8_t in
[]={
2415 /* expected test results */
2416 static const int32_t results
[]={
2417 /* number of bytes read, code point */
2425 static const uint16_t in1
[] = {
2426 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2427 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2428 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2429 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2430 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2431 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2432 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2433 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2434 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2435 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2436 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2439 static const uint8_t out1
[] = {
2440 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2441 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2442 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2443 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2444 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2445 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2446 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2447 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2448 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2449 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2450 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2453 static const uint16_t in2
[]={
2454 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2455 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2456 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2457 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2458 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2459 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2460 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2461 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2462 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2463 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2464 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2465 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2466 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2467 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2468 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2469 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2470 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2471 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2472 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2473 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2474 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2475 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2476 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2477 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2478 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2479 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2480 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2481 0x37, 0x20, 0x2A, 0x2F,
2483 static const unsigned char out2
[]={
2484 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2485 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2486 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2487 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2488 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2489 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2490 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2491 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2492 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2493 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2494 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2495 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2496 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2497 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2498 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2499 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2500 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2501 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2502 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2503 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2504 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2505 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2506 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2507 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2508 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2509 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2510 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2511 0x37, 0x20, 0x2A, 0x2F,
2513 const char *source
=(const char *)in
;
2514 const char *limit
=(const char *)in
+sizeof(in
);
2516 UErrorCode errorCode
=U_ZERO_ERROR
;
2517 UConverter
*cnv
=ucnv_open("LATIN_1", &errorCode
);
2518 if(U_FAILURE(errorCode
)) {
2519 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode
));
2522 TestNextUChar(cnv
, source
, limit
, results
, "LATIN_1");
2523 /* Test the condition when source >= sourceLimit */
2524 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2525 TestConv((uint16_t*)in1
,sizeof(in1
)/2,"LATIN_1","LATIN-1",(char*)out1
,sizeof(out1
));
2526 TestConv((uint16_t*)in2
,sizeof(in2
)/2,"ASCII","ASCII",(char*)out2
,sizeof(out2
));
2534 static const uint8_t in
[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2535 /* expected test results */
2536 static const int32_t results
[]={
2537 /* number of bytes read, code point */
2546 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2547 UErrorCode errorCode
=U_ZERO_ERROR
;
2548 UConverter
*cnv
=ucnv_open("x-mac-turkish", &errorCode
);
2549 if(U_FAILURE(errorCode
)) {
2550 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode
));
2553 TestNextUChar(cnv
, source
, limit
, results
, "SBCS(x-mac-turkish)");
2554 /* Test the condition when source >= sourceLimit */
2555 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2556 /*Test for Illegal character */ /*
2558 static const uint8_t input1[]={ 0xA1 };
2559 const char* illegalsource=(const char*)input1;
2560 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2569 static const uint8_t in
[]={
2578 /* expected test results */
2579 static const int32_t results
[]={
2580 /* number of bytes read, code point */
2588 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2589 UErrorCode errorCode
=U_ZERO_ERROR
;
2591 UConverter
*cnv
=my_ucnv_open("@ibm9027", &errorCode
);
2592 if(U_FAILURE(errorCode
)) {
2593 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode
));
2596 TestNextUChar(cnv
, source
, limit
, results
, "DBCS(@ibm9027)");
2597 /* Test the condition when source >= sourceLimit */
2598 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2599 /*Test for the condition where there is an invalid character*/
2601 static const uint8_t source2
[]={0x1a, 0x1b};
2602 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character");
2604 /*Test for the condition where we have a truncated char*/
2606 static const uint8_t source1
[]={0xc4};
2607 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2608 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2616 static const uint8_t in
[]={
2627 /* expected test results */
2628 static const int32_t results
[]={
2629 /* number of bytes read, code point */
2639 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2640 UErrorCode errorCode
=U_ZERO_ERROR
;
2642 UConverter
*cnv
=ucnv_open("ibm-1363", &errorCode
);
2643 if(U_FAILURE(errorCode
)) {
2644 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode
));
2647 TestNextUChar(cnv
, source
, limit
, results
, "MBCS(ibm-1363)");
2648 /* Test the condition when source >= sourceLimit */
2649 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2650 /*Test for the condition where there is an invalid character*/
2652 static const uint8_t source2
[]={0xa1, 0x80};
2653 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character");
2655 /*Test for the condition where we have a truncated char*/
2657 static const uint8_t source1
[]={0xc4};
2658 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2659 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2665 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2668 /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2670 const char *cnvName
= "ibm-1363";
2671 UErrorCode status
= U_ZERO_ERROR
;
2672 const char sourceData
[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2673 /* UChar expectUData[] = { 0x00a1, 0x001a }; */
2674 const char *source
= sourceData
;
2675 const char *sourceLim
= sourceData
+sizeof(sourceData
);
2677 UConverter
*cnv
=ucnv_open(cnvName
, &status
);
2678 if(U_FAILURE(status
)) {
2679 log_data_err("Unable to open %s converter: %s\n", cnvName
, u_errorName(status
));
2685 UChar targetBuf
[256];
2686 UChar
*target
= targetBuf
;
2687 UChar
*targetLim
= target
+256;
2688 ucnv_toUnicode(cnv
, &target
, targetLim
, &source
, sourceLim
, NULL
, TRUE
, &status
);
2690 log_info("After convert: target@%d, source@%d, status%s\n",
2691 target
-targetBuf
, source
-sourceData
, u_errorName(status
));
2693 if(U_FAILURE(status
)) {
2694 log_err("Failed to convert: %s\n", u_errorName(status
));
2701 c1
=ucnv_getNextUChar(cnv
, &source
, sourceLim
, &status
);
2702 log_verbose("c1: U+%04X, source@%d, status %s\n", c1
, source
-sourceData
, u_errorName(status
));
2704 c2
=ucnv_getNextUChar(cnv
, &source
, sourceLim
, &status
);
2705 log_verbose("c2: U+%04X, source@%d, status %s\n", c2
, source
-sourceData
, u_errorName(status
));
2707 c3
=ucnv_getNextUChar(cnv
, &source
, sourceLim
, &status
);
2708 log_verbose("c3: U+%04X, source@%d, status %s\n", c3
, source
-sourceData
, u_errorName(status
));
2710 if(status
==U_INDEX_OUTOFBOUNDS_ERROR
&& c3
==0xFFFF) {
2711 log_verbose("OK\n");
2713 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2721 #ifdef U_ENABLE_GENERIC_ISO_2022
2726 static const uint8_t in
[]={
2733 0xf0, 0x90, 0x80, 0x80
2738 /* expected test results */
2739 static const int32_t results
[]={
2740 /* number of bytes read, code point */
2741 4, 0x0031, /* 4 bytes including the escape sequence */
2749 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2750 UErrorCode errorCode
=U_ZERO_ERROR
;
2753 cnv
=ucnv_open("ISO_2022", &errorCode
);
2754 if(U_FAILURE(errorCode
)) {
2755 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
2758 TestNextUChar(cnv
, source
, limit
, results
, "ISO_2022");
2760 /* Test the condition when source >= sourceLimit */
2761 TestNextUCharError(cnv
, source
, source
-1, U_ILLEGAL_ARGUMENT_ERROR
, "sourceLimit < source");
2762 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2763 /*Test for the condition where we have a truncated char*/
2765 static const uint8_t source1
[]={0xc4};
2766 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2767 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2769 /*Test for the condition where there is an invalid character*/
2771 static const uint8_t source2
[]={0xa1, 0x01};
2772 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ILLEGAL_CHAR_FOUND
, "an invalid character");
2780 TestSmallTargetBuffer(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2781 const UChar
* uSource
;
2782 const UChar
* uSourceLimit
;
2783 const char* cSource
;
2784 const char* cSourceLimit
;
2785 UChar
*uTargetLimit
=NULL
;
2788 const char *cTargetLimit
;
2790 UChar
*uBuf
; /*,*test;*/
2791 int32_t uBufSize
= 120;
2794 UErrorCode errorCode
=U_ZERO_ERROR
;
2795 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2796 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
2799 uSource
= (UChar
*) source
;
2800 uSourceLimit
=(const UChar
*)sourceLimit
;
2804 cTargetLimit
= cBuf
;
2805 uTargetLimit
= uBuf
;
2809 cTargetLimit
= cTargetLimit
+ i
;
2810 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,FALSE
, &errorCode
);
2811 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2812 errorCode
=U_ZERO_ERROR
;
2816 if(U_FAILURE(errorCode
)){
2817 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2821 }while (uSource
<uSourceLimit
);
2823 cSourceLimit
=cTarget
;
2825 uTargetLimit
=uTargetLimit
+i
;
2826 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,FALSE
,&errorCode
);
2827 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2828 errorCode
=U_ZERO_ERROR
;
2831 if(U_FAILURE(errorCode
)){
2832 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2835 }while(cSource
<cSourceLimit
);
2839 for(len
=0;len
<(int)(source
- sourceLimit
);len
++){
2840 if(uBuf
[len
]!=uSource
[len
]){
2841 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource
[len
],(int)uBuf
[len
]) ;
2848 /* Test for Jitterbug 778 */
2849 static void TestToAndFromUChars(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2850 const UChar
* uSource
;
2851 const UChar
* uSourceLimit
;
2852 const char* cSource
;
2853 UChar
*uTargetLimit
=NULL
;
2856 const char *cTargetLimit
;
2859 int32_t uBufSize
= 120;
2860 int numCharsInTarget
=0;
2861 UErrorCode errorCode
=U_ZERO_ERROR
;
2862 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2863 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
2865 uSourceLimit
=sourceLimit
;
2867 cTargetLimit
= cBuf
+uBufSize
*5;
2869 uTargetLimit
= uBuf
+ uBufSize
*5;
2871 numCharsInTarget
=ucnv_fromUChars(cnv
, cTarget
, (int32_t)(cTargetLimit
-cTarget
), uSource
, (int32_t)(uSourceLimit
-uSource
), &errorCode
);
2872 if(U_FAILURE(errorCode
)){
2873 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2878 ucnv_toUChars(cnv
,uTarget
,(int32_t)(uTargetLimit
-uTarget
),cSource
,numCharsInTarget
,&errorCode
);
2879 if(U_FAILURE(errorCode
)){
2880 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode
));
2884 while(uSource
<uSourceLimit
){
2885 if(*test
!=*uSource
){
2887 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
2896 static void TestSmallSourceBuffer(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2897 const UChar
* uSource
;
2898 const UChar
* uSourceLimit
;
2899 const char* cSource
;
2900 const char* cSourceLimit
;
2901 UChar
*uTargetLimit
=NULL
;
2904 const char *cTargetLimit
;
2906 UChar
*uBuf
; /*,*test;*/
2907 int32_t uBufSize
= 120;
2910 const UChar
*temp
= sourceLimit
;
2911 UErrorCode errorCode
=U_ZERO_ERROR
;
2912 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2913 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
2917 uSource
= (UChar
*) source
;
2921 cTargetLimit
= cBuf
;
2922 uTargetLimit
= uBuf
+uBufSize
*5;
2923 cTargetLimit
= cTargetLimit
+uBufSize
*10;
2924 uSourceLimit
=uSource
;
2927 if (uSourceLimit
< sourceLimit
) {
2928 uSourceLimit
= uSourceLimit
+1;
2930 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,FALSE
, &errorCode
);
2931 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2932 errorCode
=U_ZERO_ERROR
;
2936 if(U_FAILURE(errorCode
)){
2937 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2941 }while (uSource
<temp
);
2945 if (cSourceLimit
< cBuf
+ (cTarget
- cBuf
)) {
2946 cSourceLimit
= cSourceLimit
+1;
2948 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,FALSE
,&errorCode
);
2949 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2950 errorCode
=U_ZERO_ERROR
;
2953 if(U_FAILURE(errorCode
)){
2954 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2957 }while(cSource
<cTarget
);
2961 for(;len
<(int)(source
- sourceLimit
);len
++){
2962 if(uBuf
[len
]!=uSource
[len
]){
2963 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource
[len
],(int)uBuf
[len
]) ;
2971 TestGetNextUChar2022(UConverter
* cnv
, const char* source
, const char* limit
,
2972 const uint16_t results
[], const char* message
){
2973 /* const char* s0; */
2974 const char* s
=(char*)source
;
2975 const uint16_t *r
=results
;
2976 UErrorCode errorCode
=U_ZERO_ERROR
;
2981 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
2982 if(errorCode
==U_INDEX_OUTOFBOUNDS_ERROR
) {
2983 break; /* no more significant input */
2984 } else if(U_FAILURE(errorCode
)) {
2985 log_err("%s ucnv_getNextUChar() failed: %s\n", message
, u_errorName(errorCode
));
2988 if(U16_IS_LEAD(*r
)){
2990 U16_NEXT(r
, i
, len
, exC
);
2995 if(c
!=(uint32_t)(exC
))
2996 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message
,(uint32_t) (*r
),c
);
3002 static int TestJitterbug930(const char* enc
){
3003 UErrorCode err
= U_ZERO_ERROR
;
3004 UConverter
*converter
;
3008 const UChar
*source
= in
;
3010 int32_t* offsets
= off
;
3011 int numOffWritten
=0;
3013 converter
= my_ucnv_open(enc
, &err
);
3015 in
[0] = 0x41; /* 0x4E00;*/
3020 memset(off
, '*', sizeof(off
));
3022 ucnv_fromUnicode (converter
,
3031 /* writes three bytes into the output buffer: 41 1B 24
3032 * but offsets contains 0 1 1
3034 while(*offsets
< off
[10]){
3038 log_verbose("Testing Jitterbug 930 for encoding %s",enc
);
3039 if(numOffWritten
!= (int)(target
-out
)){
3040 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc
, (int)(target
-out
),numOffWritten
);
3045 memset(off
,'*' , sizeof(off
));
3049 ucnv_fromUnicode (converter
,
3058 while(*offsets
< off
[10]){
3061 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc
,-1,*offsets
) ;
3066 /* writes 42 43 7A into output buffer,
3067 * offsets contains -1 -1 -1
3069 ucnv_close(converter
);
3076 static const uint16_t in
[]={
3077 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3078 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3079 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3080 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3081 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3082 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3083 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3084 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3085 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3086 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3087 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3088 0x005A, 0x005B, 0x005C, 0x000A
3090 const UChar
* uSource
;
3091 const UChar
* uSourceLimit
;
3092 const char* cSource
;
3093 const char* cSourceLimit
;
3094 UChar
*uTargetLimit
=NULL
;
3097 const char *cTargetLimit
;
3100 int32_t uBufSize
= 120;
3101 UErrorCode errorCode
=U_ZERO_ERROR
;
3103 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3104 int32_t* myOff
= offsets
;
3105 cnv
=ucnv_open("HZ", &errorCode
);
3106 if(U_FAILURE(errorCode
)) {
3107 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode
));
3111 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3112 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3113 uSource
= (const UChar
*)in
;
3114 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
3116 cTargetLimit
= cBuf
+uBufSize
*5;
3118 uTargetLimit
= uBuf
+ uBufSize
*5;
3119 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3120 if(U_FAILURE(errorCode
)){
3121 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3125 cSourceLimit
=cTarget
;
3128 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3129 if(U_FAILURE(errorCode
)){
3130 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3133 uSource
= (const UChar
*)in
;
3134 while(uSource
<uSourceLimit
){
3135 if(*test
!=*uSource
){
3137 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3142 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "HZ encoding");
3143 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3144 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3145 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3146 TestJitterbug930("csISO2022JP");
3156 static const uint16_t in
[]={
3157 /* test full range of Devanagari */
3158 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3159 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3160 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3161 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3162 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3163 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3164 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3165 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3166 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3167 0x096D,0x096E,0x096F,
3168 /* test Soft halant*/
3169 0x0915,0x094d, 0x200D,
3170 /* test explicit halant */
3171 0x0915,0x094d, 0x200c,
3172 /* test double danda */
3175 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3176 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3177 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3178 /* tests from Lotus */
3179 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3180 0x0930,0x094D,0x200D,
3181 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3182 0x0915,0x0921,0x002B,0x095F,
3184 0x0B86, 0xB87, 0xB88,
3186 0x0C05, 0x0C02, 0x0C03,0x0c31,
3188 0x0C85, 0xC82, 0x0C83,
3189 /* test Abbr sign and Anudatta */
3199 0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3200 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3203 0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3204 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3205 0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3206 0x093D /* Avagraha 0xEA, 0xE9*/,
3214 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3216 static const unsigned char byteArr
[]={
3218 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3219 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3220 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3221 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3222 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3223 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3224 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3225 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3226 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3228 /* test soft halant */
3230 /* test explicit halant */
3232 /* test double danda */
3235 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3236 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3237 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3240 /* tests from Lotus */
3241 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3242 0xEF,0x42,0xCF,0xE8,0xD9,
3243 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3244 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3246 0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3248 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3250 0xEF, 0x48,0xa4, 0xa2, 0xa3,
3251 /* anudatta and abbreviation sign */
3252 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3255 0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3257 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3259 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3261 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3263 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3265 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3267 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3269 0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3271 0xB3, 0xE9, /* Ka + NUKTA */
3273 0xB4, 0xE9, /* Kha + NUKTA */
3275 0xB5, 0xE9, /* Ga + NUKTA */
3287 /* just consume unhandled codepoints */
3291 testConvertToU(byteArr
,(sizeof(byteArr
)),in
,UPRV_LENGTHOF(in
),"x-iscii-de",NULL
,TRUE
);
3292 TestConv(in
,(sizeof(in
)/2),"ISCII,version=0","hindi", (char *)byteArr
,sizeof(byteArr
));
3299 static const uint16_t in
[]={
3300 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3301 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3302 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3303 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3304 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3305 0x201D, 0x3014, 0x000D, 0x000A,
3306 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3307 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3309 const UChar
* uSource
;
3310 const UChar
* uSourceLimit
;
3311 const char* cSource
;
3312 const char* cSourceLimit
;
3313 UChar
*uTargetLimit
=NULL
;
3316 const char *cTargetLimit
;
3319 int32_t uBufSize
= 120;
3320 UErrorCode errorCode
=U_ZERO_ERROR
;
3322 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3323 int32_t* myOff
= offsets
;
3324 cnv
=ucnv_open("ISO_2022_JP_1", &errorCode
);
3325 if(U_FAILURE(errorCode
)) {
3326 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode
));
3330 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3331 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3332 uSource
= (const UChar
*)in
;
3333 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
3335 cTargetLimit
= cBuf
+uBufSize
*5;
3337 uTargetLimit
= uBuf
+ uBufSize
*5;
3338 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3339 if(U_FAILURE(errorCode
)){
3340 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3344 cSourceLimit
=cTarget
;
3347 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3348 if(U_FAILURE(errorCode
)){
3349 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3353 uSource
= (const UChar
*)in
;
3354 while(uSource
<uSourceLimit
){
3355 if(*test
!=*uSource
){
3357 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3363 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3364 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3365 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-JP encoding");
3366 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3367 TestJitterbug930("csISO2022JP");
3374 static void TestConv(const uint16_t in
[],int len
, const char* conv
, const char* lang
, char byteArr
[],int byteArrLen
){
3375 const UChar
* uSource
;
3376 const UChar
* uSourceLimit
;
3377 const char* cSource
;
3378 const char* cSourceLimit
;
3379 UChar
*uTargetLimit
=NULL
;
3382 const char *cTargetLimit
;
3385 int32_t uBufSize
= 120*10;
3386 UErrorCode errorCode
=U_ZERO_ERROR
;
3388 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) );
3389 int32_t* myOff
= offsets
;
3390 cnv
=my_ucnv_open(conv
, &errorCode
);
3391 if(U_FAILURE(errorCode
)) {
3392 log_data_err("Unable to open a %s converter: %s\n", conv
, u_errorName(errorCode
));
3396 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
));
3397 cBuf
=(char*)malloc(uBufSize
* sizeof(char));
3398 uSource
= (const UChar
*)in
;
3399 uSourceLimit
=uSource
+len
;
3401 cTargetLimit
= cBuf
+uBufSize
;
3403 uTargetLimit
= uBuf
+ uBufSize
;
3404 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3405 if(U_FAILURE(errorCode
)){
3406 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3409 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3411 cSourceLimit
=cTarget
;
3414 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3415 if(U_FAILURE(errorCode
)){
3416 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode
));
3420 uSource
= (const UChar
*)in
;
3421 while(uSource
<uSourceLimit
){
3422 if(*test
!=*uSource
){
3423 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv
,*uSource
,(int)*test
) ;
3428 TestSmallTargetBuffer(in
,(const UChar
*)&in
[len
],cnv
);
3429 TestSmallSourceBuffer(in
,(const UChar
*)&in
[len
],cnv
);
3430 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, conv
);
3431 if(byteArr
&& byteArrLen
!=0){
3432 TestGetNextUChar2022(cnv
, byteArr
, (byteArr
+byteArrLen
), in
, lang
);
3433 TestToAndFromUChars(in
,(const UChar
*)&in
[len
],cnv
);
3436 cSourceLimit
= cSource
+byteArrLen
;
3439 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3440 if(U_FAILURE(errorCode
)){
3441 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3445 uSource
= (const UChar
*)in
;
3446 while(uSource
<uSourceLimit
){
3447 if(*test
!=*uSource
){
3448 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3461 static UChar U_CALLCONV
3462 _charAt(int32_t offset
, void *context
) {
3463 return ((char*)context
)[offset
];
3467 unescape(UChar
* dst
, int32_t dstLen
,const char* src
,int32_t srcLen
,UErrorCode
*status
){
3470 if(U_FAILURE(*status
)){
3473 if((dst
==NULL
&& dstLen
>0) || (src
==NULL
) || dstLen
< -1 || srcLen
<-1 ){
3474 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
3478 srcLen
= (int32_t)uprv_strlen(src
);
3481 for (; srcIndex
<srcLen
; ) {
3482 UChar32 c
= src
[srcIndex
++];
3483 if (c
== 0x005C /*'\\'*/) {
3484 c
= u_unescapeAt(_charAt
,&srcIndex
,srcLen
,(void*)src
); /* advances i*/
3485 if (c
== (UChar32
)0xFFFFFFFF) {
3486 *status
=U_INVALID_CHAR_FOUND
; /* return empty string */
3487 break; /* invalid escape sequence */
3490 if(dstIndex
< dstLen
){
3492 dst
[dstIndex
++] = U16_LEAD(c
);
3493 if(dstIndex
<dstLen
){
3494 dst
[dstIndex
]=U16_TRAIL(c
);
3496 *status
=U_BUFFER_OVERFLOW_ERROR
;
3499 dst
[dstIndex
]=(UChar
)c
;
3503 *status
= U_BUFFER_OVERFLOW_ERROR
;
3505 dstIndex
++; /* for preflighting */
3511 TestFullRoundtrip(const char* cp
){
3512 UChar usource
[10] ={0};
3513 UChar nsrc
[10] = {0};
3517 /* Test codepoint 0 */
3518 TestConv(usource
,1,cp
,"",NULL
,0);
3519 TestConv(usource
,2,cp
,"",NULL
,0);
3521 TestConv(nsrc
,3,cp
,"",NULL
,0);
3523 for(;i
<=0x10FFFF;i
++){
3529 usource
[0] =(UChar
) i
;
3532 usource
[0]=U16_LEAD(i
);
3533 usource
[1]=U16_TRAIL(i
);
3540 /* Test only single code points */
3541 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3542 /* Test codepoint repeated twice */
3543 usource
[ulen
]=usource
[0];
3544 usource
[ulen
+1]=usource
[1];
3546 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3547 /* Test codepoint repeated 3 times */
3548 usource
[ulen
]=usource
[0];
3549 usource
[ulen
+1]=usource
[1];
3551 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3552 /* Test codepoint in between 2 codepoints */
3556 TestConv(nsrc
,len
+2,cp
,"",NULL
,0);
3557 uprv_memset(usource
,0,sizeof(UChar
)*10);
3562 TestRoundTrippingAllUTF(void){
3563 if(!getTestOption(QUICK_OPTION
)){
3564 log_verbose("Running exhaustive round trip test for BOCU-1\n");
3565 TestFullRoundtrip("BOCU-1");
3566 log_verbose("Running exhaustive round trip test for SCSU\n");
3567 TestFullRoundtrip("SCSU");
3568 log_verbose("Running exhaustive round trip test for UTF-8\n");
3569 TestFullRoundtrip("UTF-8");
3570 log_verbose("Running exhaustive round trip test for CESU-8\n");
3571 TestFullRoundtrip("CESU-8");
3572 log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3573 TestFullRoundtrip("UTF-16BE");
3574 log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3575 TestFullRoundtrip("UTF-16LE");
3576 log_verbose("Running exhaustive round trip test for UTF-16\n");
3577 TestFullRoundtrip("UTF-16");
3578 log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3579 TestFullRoundtrip("UTF-32BE");
3580 log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3581 TestFullRoundtrip("UTF-32LE");
3582 log_verbose("Running exhaustive round trip test for UTF-32\n");
3583 TestFullRoundtrip("UTF-32");
3584 log_verbose("Running exhaustive round trip test for UTF-7\n");
3585 TestFullRoundtrip("UTF-7");
3586 log_verbose("Running exhaustive round trip test for UTF-7\n");
3587 TestFullRoundtrip("UTF-7,version=1");
3588 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3589 TestFullRoundtrip("IMAP-mailbox-name");
3592 * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
3593 * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
3594 * The old mappings remain as fallbacks.
3595 * This test may be reintroduced at a later time.
3600 log_verbose("Running exhaustive round trip test for GB18030\n");
3601 TestFullRoundtrip("GB18030");
3609 static const uint16_t germanUTF16
[]={
3610 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3613 static const uint8_t germanSCSU
[]={
3614 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3617 static const uint16_t russianUTF16
[]={
3618 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3621 static const uint8_t russianSCSU
[]={
3622 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3625 static const uint16_t japaneseUTF16
[]={
3626 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3627 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3628 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3629 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3630 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3631 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3632 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3633 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3634 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3635 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3636 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3637 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3638 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3639 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3640 0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3643 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3644 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3645 static const uint8_t japaneseSCSU
[]={
3646 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3647 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3648 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3649 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3650 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3651 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3652 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3653 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3654 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3655 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3656 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3660 static const uint16_t allFeaturesUTF16
[]={
3661 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3662 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3663 0x01df, 0xf000, 0xdbff, 0xdfff
3666 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3667 * result here (34B vs. 35B)
3669 static const uint8_t allFeaturesSCSU
[]={
3670 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3671 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3672 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3673 0xdf, 0x14, 0x80, 0x15, 0xff
3675 static const uint16_t monkeyIn
[]={
3676 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3677 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3678 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3679 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3680 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3681 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3682 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3683 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3684 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3685 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3686 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3687 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3688 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3689 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3690 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3691 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3692 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3693 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3694 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3695 /* test non-BMP code points */
3696 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3697 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3698 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3699 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3700 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3701 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3702 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3703 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3704 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3705 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3706 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3709 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3710 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3711 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3712 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3713 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3715 static const char *fTestCases
[] = {
3716 "\\ud800\\udc00", /* smallest surrogate*/
3718 "\\udBff\\udFff", /* largest surrogate pair*/
3721 "Hello \\u9292 \\u9192 World!",
3722 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3723 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3725 "\\u0648\\u06c8", /* catch missing reset*/
3728 "\\u4444\\uE001", /* lowest quotable*/
3729 "\\u4444\\uf2FF", /* highest quotable*/
3730 "\\u4444\\uf188\\u4444",
3731 "\\u4444\\uf188\\uf288",
3732 "\\u4444\\uf188abc\\u0429\\uf288",
3734 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3735 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3736 "Hello World!123456",
3737 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3739 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/
3740 "abc\\u4411d", /* uses SQU*/
3741 "abc\\u4411\\u4412d",/* uses SCU*/
3742 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3743 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3745 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3746 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3747 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3749 "", /* empty input*/
3750 "\\u0000", /* smallest BMP character*/
3751 "\\uFFFF", /* largest BMP character*/
3753 /* regression tests*/
3754 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3755 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3756 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3757 "\\u0041\\u00df\\u0401\\u015f",
3758 "\\u9066\\u2123abc",
3759 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3760 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3763 for(;i
<UPRV_LENGTHOF(fTestCases
);i
++){
3764 const char* cSrc
= fTestCases
[i
];
3765 UErrorCode status
= U_ZERO_ERROR
;
3766 int32_t cSrcLen
,srcLen
;
3768 /* UConverter* cnv = ucnv_open("SCSU",&status); */
3769 cSrcLen
= srcLen
= (int32_t)uprv_strlen(fTestCases
[i
]);
3770 src
= (UChar
*) malloc((sizeof(UChar
) * srcLen
) + sizeof(UChar
));
3771 srcLen
=unescape(src
,srcLen
,cSrc
,cSrcLen
,&status
);
3772 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc
,i
);
3773 TestConv(src
,srcLen
,"SCSU","Coverage",NULL
,0);
3776 TestConv(allFeaturesUTF16
,(sizeof(allFeaturesUTF16
)/2),"SCSU","all features", (char *)allFeaturesSCSU
,sizeof(allFeaturesSCSU
));
3777 TestConv(allFeaturesUTF16
,(sizeof(allFeaturesUTF16
)/2),"SCSU","all features",(char *)allFeaturesSCSU
,sizeof(allFeaturesSCSU
));
3778 TestConv(japaneseUTF16
,(sizeof(japaneseUTF16
)/2),"SCSU","japaneese",(char *)japaneseSCSU
,sizeof(japaneseSCSU
));
3779 TestConv(japaneseUTF16
,(sizeof(japaneseUTF16
)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU
,sizeof(japaneseSCSU
));
3780 TestConv(germanUTF16
,(sizeof(germanUTF16
)/2),"SCSU","german",(char *)germanSCSU
,sizeof(germanSCSU
));
3781 TestConv(russianUTF16
,(sizeof(russianUTF16
)/2), "SCSU","russian",(char *)russianSCSU
,sizeof(russianSCSU
));
3782 TestConv(monkeyIn
,(sizeof(monkeyIn
)/2),"SCSU","monkey",NULL
,0);
3785 #if !UCONFIG_NO_LEGACY_CONVERSION
3786 static void TestJitterbug2346(){
3787 char source
[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3788 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3789 uint16_t expected
[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3791 UChar uTarget
[500]={'\0'};
3792 UChar
* utarget
=uTarget
;
3793 UChar
* utargetLimit
=uTarget
+sizeof(uTarget
)/2;
3795 char cTarget
[500]={'\0'};
3796 char* ctarget
=cTarget
;
3797 char* ctargetLimit
=cTarget
+sizeof(cTarget
);
3798 const char* csource
=source
;
3799 UChar
* temp
= expected
;
3800 UErrorCode err
=U_ZERO_ERROR
;
3802 UConverter
* conv
=ucnv_open("ISO_2022_JP",&err
);
3803 if(U_FAILURE(err
)) {
3804 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err
));
3807 ucnv_toUnicode(conv
,&utarget
,utargetLimit
,&csource
,csource
+sizeof(source
),NULL
,TRUE
,&err
);
3808 if(U_FAILURE(err
)) {
3809 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err
));
3812 utargetLimit
=utarget
;
3814 while(utarget
<utargetLimit
){
3815 if(*temp
!=*utarget
){
3817 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget
,(int)*temp
) ;
3822 ucnv_fromUnicode(conv
,&ctarget
,ctargetLimit
,(const UChar
**)&utarget
,utargetLimit
,NULL
,TRUE
,&err
);
3823 if(U_FAILURE(err
)) {
3824 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err
));
3827 ctargetLimit
=ctarget
;
3835 TestISO_2022_JP_1() {
3837 static const uint16_t in
[]={
3838 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3839 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3840 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3841 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3842 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3843 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3844 0x201D, 0x000D, 0x000A,
3845 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3846 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3847 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3848 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3849 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3850 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3852 const UChar
* uSource
;
3853 const UChar
* uSourceLimit
;
3854 const char* cSource
;
3855 const char* cSourceLimit
;
3856 UChar
*uTargetLimit
=NULL
;
3859 const char *cTargetLimit
;
3862 int32_t uBufSize
= 120;
3863 UErrorCode errorCode
=U_ZERO_ERROR
;
3866 cnv
=ucnv_open("ISO_2022_JP_1", &errorCode
);
3867 if(U_FAILURE(errorCode
)) {
3868 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
3872 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3873 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3874 uSource
= (const UChar
*)in
;
3875 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
3877 cTargetLimit
= cBuf
+uBufSize
*5;
3879 uTargetLimit
= uBuf
+ uBufSize
*5;
3880 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,TRUE
, &errorCode
);
3881 if(U_FAILURE(errorCode
)){
3882 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3886 cSourceLimit
=cTarget
;
3888 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,TRUE
,&errorCode
);
3889 if(U_FAILURE(errorCode
)){
3890 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3893 uSource
= (const UChar
*)in
;
3894 while(uSource
<uSourceLimit
){
3895 if(*test
!=*uSource
){
3897 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3903 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3904 /*Test for the condition where there is an invalid character*/
3907 static const uint8_t source2
[]={0x0e,0x24,0x053};
3908 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-JP-1]");
3910 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3911 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3918 TestISO_2022_JP_2() {
3920 static const uint16_t in
[]={
3921 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3922 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3923 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3924 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3925 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3926 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3927 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3928 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3929 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3930 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3931 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3932 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3933 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3934 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3935 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3936 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3937 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3938 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3939 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3941 const UChar
* uSource
;
3942 const UChar
* uSourceLimit
;
3943 const char* cSource
;
3944 const char* cSourceLimit
;
3945 UChar
*uTargetLimit
=NULL
;
3948 const char *cTargetLimit
;
3951 int32_t uBufSize
= 120;
3952 UErrorCode errorCode
=U_ZERO_ERROR
;
3954 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3955 int32_t* myOff
= offsets
;
3956 cnv
=ucnv_open("ISO_2022_JP_2", &errorCode
);
3957 if(U_FAILURE(errorCode
)) {
3958 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
3962 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3963 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3964 uSource
= (const UChar
*)in
;
3965 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
3967 cTargetLimit
= cBuf
+uBufSize
*5;
3969 uTargetLimit
= uBuf
+ uBufSize
*5;
3970 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3971 if(U_FAILURE(errorCode
)){
3972 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3976 cSourceLimit
=cTarget
;
3979 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3980 if(U_FAILURE(errorCode
)){
3981 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3984 uSource
= (const UChar
*)in
;
3985 while(uSource
<uSourceLimit
){
3986 if(*test
!=*uSource
){
3988 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3993 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3994 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3995 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3996 /*Test for the condition where there is an invalid character*/
3999 static const uint8_t source2
[]={0x0e,0x24,0x053};
4000 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-JP-2]");
4011 static const uint16_t in
[]={
4012 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4013 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4014 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4015 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4016 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4017 ,0x53E3,0x53E4,0x000A,0x000D};
4018 const UChar
* uSource
;
4019 const UChar
* uSourceLimit
;
4020 const char* cSource
;
4021 const char* cSourceLimit
;
4022 UChar
*uTargetLimit
=NULL
;
4025 const char *cTargetLimit
;
4028 int32_t uBufSize
= 120;
4029 UErrorCode errorCode
=U_ZERO_ERROR
;
4031 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4032 int32_t* myOff
= offsets
;
4033 cnv
=ucnv_open("ISO_2022,locale=kr", &errorCode
);
4034 if(U_FAILURE(errorCode
)) {
4035 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4039 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4040 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
4041 uSource
= (const UChar
*)in
;
4042 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
4044 cTargetLimit
= cBuf
+uBufSize
*5;
4046 uTargetLimit
= uBuf
+ uBufSize
*5;
4047 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4048 if(U_FAILURE(errorCode
)){
4049 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4053 cSourceLimit
=cTarget
;
4056 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4057 if(U_FAILURE(errorCode
)){
4058 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4061 uSource
= (const UChar
*)in
;
4062 while(uSource
<uSourceLimit
){
4063 if(*test
!=*uSource
){
4064 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,*test
) ;
4069 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-KR encoding");
4070 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4071 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4072 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4073 TestJitterbug930("csISO2022KR");
4074 /*Test for the condition where there is an invalid character*/
4077 static const uint8_t source2
[]={0x1b,0x24,0x053};
4078 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
4079 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ILLEGAL_ESCAPE_SEQUENCE
, "an invalid character [ISO-2022-KR]");
4088 TestISO_2022_KR_1() {
4090 static const uint16_t in
[]={
4091 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4092 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4093 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4094 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4095 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4096 ,0x53E3,0x53E4,0x000A,0x000D};
4097 const UChar
* uSource
;
4098 const UChar
* uSourceLimit
;
4099 const char* cSource
;
4100 const char* cSourceLimit
;
4101 UChar
*uTargetLimit
=NULL
;
4104 const char *cTargetLimit
;
4107 int32_t uBufSize
= 120;
4108 UErrorCode errorCode
=U_ZERO_ERROR
;
4110 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4111 int32_t* myOff
= offsets
;
4112 cnv
=ucnv_open("ibm-25546", &errorCode
);
4113 if(U_FAILURE(errorCode
)) {
4114 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4118 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4119 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
4120 uSource
= (const UChar
*)in
;
4121 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
4123 cTargetLimit
= cBuf
+uBufSize
*5;
4125 uTargetLimit
= uBuf
+ uBufSize
*5;
4126 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4127 if(U_FAILURE(errorCode
)){
4128 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4132 cSourceLimit
=cTarget
;
4135 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4136 if(U_FAILURE(errorCode
)){
4137 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4140 uSource
= (const UChar
*)in
;
4141 while(uSource
<uSourceLimit
){
4142 if(*test
!=*uSource
){
4143 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,*test
) ;
4149 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-KR encoding");
4150 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4151 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4153 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4154 /*Test for the condition where there is an invalid character*/
4157 static const uint8_t source2
[]={0x1b,0x24,0x053};
4158 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
4159 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ILLEGAL_ESCAPE_SEQUENCE
, "an invalid character [ISO-2022-KR]");
4167 static void TestJitterbug2411(){
4168 static const char* source
= "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4169 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4170 UConverter
* kr
=NULL
, *kr1
=NULL
;
4171 UErrorCode errorCode
= U_ZERO_ERROR
;
4172 UChar tgt
[100]={'\0'};
4173 UChar
* target
= tgt
;
4174 UChar
* targetLimit
= target
+100;
4175 kr
=ucnv_open("iso-2022-kr", &errorCode
);
4176 if(U_FAILURE(errorCode
)) {
4177 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode
));
4180 ucnv_toUnicode(kr
,&target
,targetLimit
,&source
,source
+uprv_strlen(source
),NULL
,TRUE
,&errorCode
);
4181 if(U_FAILURE(errorCode
)) {
4182 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode
));
4185 kr1
= ucnv_open("ibm-25546", &errorCode
);
4186 if(U_FAILURE(errorCode
)) {
4187 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode
));
4191 targetLimit
= target
+100;
4192 ucnv_toUnicode(kr
,&target
,targetLimit
,&source
,source
+uprv_strlen(source
),NULL
,TRUE
,&errorCode
);
4194 if(U_FAILURE(errorCode
)) {
4195 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode
));
4206 /* From Unicode moved to testdata/conversion.txt */
4209 static const uint8_t sampleTextJIS
[] = {
4210 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4211 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4212 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4214 static const uint16_t expectedISO2022JIS
[] = {
4219 static const int32_t toISO2022JISOffs
[]={
4225 static const uint8_t sampleTextJIS7
[] = {
4226 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4227 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4228 0x1b,0x24,0x42,0x21,0x21,
4229 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */
4231 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4233 static const uint16_t expectedISO2022JIS7
[] = {
4241 static const int32_t toISO2022JIS7Offs
[]={
4248 static const uint8_t sampleTextJIS8
[] = {
4249 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4250 0xa1,0xc8,0xd9,/*Katakana Set*/
4253 0xb1,0xc3, /*Katakana Set*/
4254 0x1b,0x24,0x42,0x21,0x21
4256 static const uint16_t expectedISO2022JIS8
[] = {
4258 0xff61, 0xff88, 0xff99,
4263 static const int32_t toISO2022JIS8Offs
[]={
4269 testConvertToU(sampleTextJIS
,sizeof(sampleTextJIS
),expectedISO2022JIS
,
4270 UPRV_LENGTHOF(expectedISO2022JIS
),"JIS", toISO2022JISOffs
,TRUE
);
4271 testConvertToU(sampleTextJIS7
,sizeof(sampleTextJIS7
),expectedISO2022JIS7
,
4272 UPRV_LENGTHOF(expectedISO2022JIS7
),"JIS7", toISO2022JIS7Offs
,TRUE
);
4273 testConvertToU(sampleTextJIS8
,sizeof(sampleTextJIS8
),expectedISO2022JIS8
,
4274 UPRV_LENGTHOF(expectedISO2022JIS8
),"JIS8", toISO2022JIS8Offs
,TRUE
);
4281 ICU
4.4 (ticket
#7314) removes mappings for CNS 11643 planes 3..7
4283 static void TestJitterbug915(){
4284 /* tests for roundtripping of the below sequence
4285 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * /
4286 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4287 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4288 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4289 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4290 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4291 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4293 static const char cSource
[]={
4294 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4295 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4296 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4297 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4298 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4299 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4300 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4301 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4302 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4303 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4304 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4305 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4306 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4307 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4308 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4309 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4310 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4311 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4312 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4313 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4314 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4315 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4316 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4317 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4318 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4319 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4320 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4321 0x37, 0x20, 0x2A, 0x2F
4323 UChar uTarget
[500]={'\0'};
4324 UChar
* utarget
=uTarget
;
4325 UChar
* utargetLimit
=uTarget
+sizeof(uTarget
)/2;
4327 char cTarget
[500]={'\0'};
4328 char* ctarget
=cTarget
;
4329 char* ctargetLimit
=cTarget
+sizeof(cTarget
);
4330 const char* csource
=cSource
;
4331 const char* tempSrc
= cSource
;
4332 UErrorCode err
=U_ZERO_ERROR
;
4334 UConverter
* conv
=ucnv_open("ISO_2022_CN_EXT",&err
);
4335 if(U_FAILURE(err
)) {
4336 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err
));
4339 ucnv_toUnicode(conv
,&utarget
,utargetLimit
,&csource
,csource
+sizeof(cSource
),NULL
,TRUE
,&err
);
4340 if(U_FAILURE(err
)) {
4341 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err
));
4344 utargetLimit
=utarget
;
4346 ucnv_fromUnicode(conv
,&ctarget
,ctargetLimit
,(const UChar
**)&utarget
,utargetLimit
,NULL
,TRUE
,&err
);
4347 if(U_FAILURE(err
)) {
4348 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err
));
4351 ctargetLimit
=ctarget
;
4353 while(ctarget
<ctargetLimit
){
4354 if(*ctarget
!= *tempSrc
){
4355 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget
-cTarget
), *ctarget
,(int)*tempSrc
) ;
4365 TestISO_2022_CN_EXT() {
4367 static const uint16_t in
[]={
4368 /* test Non-BMP code points */
4369 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4370 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4371 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4372 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4373 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4374 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4375 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4376 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4377 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4380 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4381 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4382 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4383 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4384 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4385 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4386 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4387 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4388 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4389 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4390 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4391 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4392 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4393 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4394 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4395 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4396 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4397 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4399 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4403 const UChar
* uSource
;
4404 const UChar
* uSourceLimit
;
4405 const char* cSource
;
4406 const char* cSourceLimit
;
4407 UChar
*uTargetLimit
=NULL
;
4410 const char *cTargetLimit
;
4413 int32_t uBufSize
= 180;
4414 UErrorCode errorCode
=U_ZERO_ERROR
;
4416 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4417 int32_t* myOff
= offsets
;
4418 cnv
=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode
);
4419 if(U_FAILURE(errorCode
)) {
4420 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4424 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4425 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
4426 uSource
= (const UChar
*)in
;
4427 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
4429 cTargetLimit
= cBuf
+uBufSize
*5;
4431 uTargetLimit
= uBuf
+ uBufSize
*5;
4432 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4433 if(U_FAILURE(errorCode
)){
4434 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4438 cSourceLimit
=cTarget
;
4441 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4442 if(U_FAILURE(errorCode
)){
4443 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4446 uSource
= (const UChar
*)in
;
4447 while(uSource
<uSourceLimit
){
4448 if(*test
!=*uSource
){
4449 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
4452 log_verbose(" Got: \\u%04X\n",(int)*test
) ;
4457 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4458 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4459 /*Test for the condition where there is an invalid character*/
4462 static const uint8_t source2
[]={0x0e,0x24,0x053};
4463 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-CN-EXT]");
4475 static const uint16_t in
[]={
4477 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4478 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4479 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4480 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4481 0x0020, 0x0045, 0x004e, 0x0044,
4483 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4484 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4485 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4486 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4487 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4488 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4489 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4490 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4491 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4492 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4493 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4494 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4495 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4496 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4497 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4498 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4499 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4502 const UChar
* uSource
;
4503 const UChar
* uSourceLimit
;
4504 const char* cSource
;
4505 const char* cSourceLimit
;
4506 UChar
*uTargetLimit
=NULL
;
4509 const char *cTargetLimit
;
4512 int32_t uBufSize
= 180;
4513 UErrorCode errorCode
=U_ZERO_ERROR
;
4515 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4516 int32_t* myOff
= offsets
;
4517 cnv
=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode
);
4518 if(U_FAILURE(errorCode
)) {
4519 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4523 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4524 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
4525 uSource
= (const UChar
*)in
;
4526 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
4528 cTargetLimit
= cBuf
+uBufSize
*5;
4530 uTargetLimit
= uBuf
+ uBufSize
*5;
4531 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4532 if(U_FAILURE(errorCode
)){
4533 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4537 cSourceLimit
=cTarget
;
4540 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4541 if(U_FAILURE(errorCode
)){
4542 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4545 uSource
= (const UChar
*)in
;
4546 while(uSource
<uSourceLimit
){
4547 if(*test
!=*uSource
){
4548 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
4551 log_verbose(" Got: \\u%04X\n",(int)*test
) ;
4556 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-CN encoding");
4557 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4558 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4559 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4560 TestJitterbug930("csISO2022CN");
4561 /*Test for the condition where there is an invalid character*/
4564 static const uint8_t source2
[]={0x0e,0x24,0x053};
4565 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-CN]");
4574 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4576 const char * converterName
;
4577 const char * inputText
;
4578 int inputTextLength
;
4581 /* Callback for TestJitterbug6175, should only get called for empty segment errors */
4582 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context
, UConverterToUnicodeArgs
*toArgs
, const char* codeUnits
,
4583 int32_t length
, UConverterCallbackReason reason
, UErrorCode
* err
) {
4584 if (reason
> UCNV_IRREGULAR
) {
4587 if (reason
!= UCNV_IRREGULAR
) {
4588 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4590 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4591 *err
= U_ZERO_ERROR
;
4592 ucnv_cbToUWriteSub(toArgs
,0,err
);
4595 enum { kEmptySegmentToUCharsMax
= 64 };
4596 static void TestJitterbug6175(void) {
4597 static const char iso2022jp_a
[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4598 static const char iso2022kr_a
[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4599 static const char iso2022cn_a
[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4600 static const char iso2022cn_b
[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4601 static const char hzGB2312_a
[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4602 static const EmptySegmentTest emptySegmentTests
[] = {
4603 /* converterName inputText inputTextLength */
4604 { "ISO-2022-JP", iso2022jp_a
, sizeof(iso2022jp_a
) },
4605 { "ISO-2022-KR", iso2022kr_a
, sizeof(iso2022kr_a
) },
4606 { "ISO-2022-CN", iso2022cn_a
, sizeof(iso2022cn_a
) },
4607 { "ISO-2022-CN", iso2022cn_b
, sizeof(iso2022cn_b
) },
4608 { "HZ-GB-2312", hzGB2312_a
, sizeof(hzGB2312_a
) },
4612 const EmptySegmentTest
* testPtr
;
4613 for (testPtr
= emptySegmentTests
; testPtr
->converterName
!= NULL
; ++testPtr
) {
4614 UErrorCode err
= U_ZERO_ERROR
;
4615 UConverter
* cnv
= ucnv_open(testPtr
->converterName
, &err
);
4616 if (U_FAILURE(err
)) {
4617 log_data_err("Unable to open %s converter: %s\n", testPtr
->converterName
, u_errorName(err
));
4620 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_EMPTYSEGMENT
, NULL
, NULL
, NULL
, &err
);
4621 if (U_FAILURE(err
)) {
4622 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr
->converterName
, u_errorName(err
));
4627 UChar toUChars
[kEmptySegmentToUCharsMax
];
4628 UChar
* toUCharsPtr
= toUChars
;
4629 const UChar
* toUCharsLimit
= toUCharsPtr
+ kEmptySegmentToUCharsMax
;
4630 const char * inCharsPtr
= testPtr
->inputText
;
4631 const char * inCharsLimit
= inCharsPtr
+ testPtr
->inputTextLength
;
4632 ucnv_toUnicode(cnv
, &toUCharsPtr
, toUCharsLimit
, &inCharsPtr
, inCharsLimit
, NULL
, TRUE
, &err
);
4639 TestEBCDIC_STATEFUL() {
4641 static const uint8_t in
[]={
4650 /* expected test results */
4651 static const int32_t results
[]={
4652 /* number of bytes read, code point */
4661 static const uint8_t in2
[]={
4667 /* expected test results */
4668 static const int32_t results2
[]={
4669 /* number of bytes read, code point */
4674 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
4675 UErrorCode errorCode
=U_ZERO_ERROR
;
4676 UConverter
*cnv
=ucnv_open("ibm-930", &errorCode
);
4677 if(U_FAILURE(errorCode
)) {
4678 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode
));
4681 TestNextUChar(cnv
, source
, limit
, results
, "EBCDIC_STATEFUL(ibm-930)");
4683 /* Test the condition when source >= sourceLimit */
4684 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
4686 /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4688 static const uint8_t source1
[]={0x0f};
4689 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_INDEX_OUTOFBOUNDS_ERROR
, "a character is truncated");
4691 /*Test for the condition where there is an invalid character*/
4694 static const uint8_t source2
[]={0x0e, 0x7F, 0xFF};
4695 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [EBCDIC STATEFUL]");
4698 source
=(const char*)in2
;
4699 limit
=(const char*)in2
+sizeof(in2
);
4700 TestNextUChar(cnv
,source
,limit
,results2
,"EBCDIC_STATEFUL(ibm-930),seq#2");
4708 static const uint8_t in
[]={
4711 0x81, 0x30, 0x81, 0x30,
4715 0x82, 0x35, 0x8f, 0x33,
4716 0x84, 0x31, 0xa4, 0x39,
4717 0x90, 0x30, 0x81, 0x30,
4718 0xe3, 0x32, 0x9a, 0x35
4721 * Feature removed markus 2000-oct-26
4722 * Only some codepages must match surrogate pairs into supplementary code points -
4723 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4724 * GB 18030 provides direct encodings for supplementary code points, therefore
4725 * it must not combine two single-encoded surrogates into one code point.
4727 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4731 /* expected test results */
4732 static const int32_t results
[]={
4733 /* number of bytes read, code point */
4745 /* Feature removed. See comment above. */
4750 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4751 UErrorCode errorCode
=U_ZERO_ERROR
;
4752 UConverter
*cnv
=ucnv_open("gb18030", &errorCode
);
4753 if(U_FAILURE(errorCode
)) {
4754 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode
));
4757 TestNextUChar(cnv
, (const char *)in
, (const char *)in
+sizeof(in
), results
, "gb18030");
4763 /* LMBCS-1 string */
4764 static const uint8_t pszLMBCS
[]={
4773 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4777 /* Unicode UChar32 equivalents */
4778 static const UChar32 pszUnicode32
[]={
4788 0x00023456, /* code point for surrogate pair */
4792 /* Unicode UChar equivalents */
4793 static const UChar pszUnicode
[]={
4803 0xD84D, /* low surrogate */
4804 0xDC56, /* high surrogate */
4808 /* expected test results */
4809 static const int offsets32
[]={
4810 /* number of bytes read, code point */
4824 /* expected test results */
4825 static const int offsets
[]={
4826 /* number of bytes read, code point */
4844 #define NAME_LMBCS_1 "LMBCS-1"
4845 #define NAME_LMBCS_2 "LMBCS-2"
4848 /* Some basic open/close/property tests on some LMBCS converters */
4851 char expected_subchars
[] = {0x3F}; /* ANSI Question Mark */
4852 char new_subchars
[] = {0x7F}; /* subst char used by SmartSuite..*/
4853 char get_subchars
[1];
4854 const char * get_name
;
4858 int8_t len
= sizeof(get_subchars
);
4860 UErrorCode errorCode
=U_ZERO_ERROR
;
4863 cnv1
=ucnv_open(NAME_LMBCS_1
, &errorCode
);
4864 if(U_FAILURE(errorCode
)) {
4865 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
4868 cnv2
=ucnv_open(NAME_LMBCS_2
, &errorCode
);
4869 if(U_FAILURE(errorCode
)) {
4870 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode
));
4875 get_name
= ucnv_getName (cnv1
, &errorCode
);
4876 if (strcmp(NAME_LMBCS_1
,get_name
)){
4877 log_err("Unexpected converter name: %s\n", get_name
);
4879 get_name
= ucnv_getName (cnv2
, &errorCode
);
4880 if (strcmp(NAME_LMBCS_2
,get_name
)){
4881 log_err("Unexpected converter name: %s\n", get_name
);
4884 /* substitution chars */
4885 ucnv_getSubstChars (cnv1
, get_subchars
, &len
, &errorCode
);
4886 if(U_FAILURE(errorCode
)) {
4887 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode
));
4890 log_err("Unexpected length of sub chars\n");
4892 if (get_subchars
[0] != expected_subchars
[0]){
4893 log_err("Unexpected value of sub chars\n");
4895 ucnv_setSubstChars (cnv2
,new_subchars
, len
, &errorCode
);
4896 if(U_FAILURE(errorCode
)) {
4897 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode
));
4899 ucnv_getSubstChars (cnv2
, get_subchars
, &len
, &errorCode
);
4900 if(U_FAILURE(errorCode
)) {
4901 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode
));
4904 log_err("Unexpected length of sub chars\n");
4906 if (get_subchars
[0] != new_subchars
[0]){
4907 log_err("Unexpected value of sub chars\n");
4914 /* LMBCS to Unicode - offsets */
4916 UErrorCode errorCode
=U_ZERO_ERROR
;
4918 const char * pSource
= (const char *)pszLMBCS
;
4919 const char * sourceLimit
= (const char *)pszLMBCS
+ sizeof(pszLMBCS
);
4921 UChar Out
[sizeof(pszUnicode
) + 1];
4923 UChar
* OutLimit
= Out
+ UPRV_LENGTHOF(pszUnicode
);
4925 int32_t off
[sizeof(offsets
)];
4927 /* last 'offset' in expected results is just the final size.
4928 (Makes other tests easier). Compensate here: */
4930 off
[UPRV_LENGTHOF(offsets
)-1] = sizeof(pszLMBCS
);
4934 cnv
=ucnv_open("lmbcs", &errorCode
); /* use generic name for LMBCS-1 */
4935 if(U_FAILURE(errorCode
)) {
4936 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode
));
4942 ucnv_toUnicode (cnv
,
4952 if (memcmp(off
,offsets
,sizeof(offsets
)))
4954 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4956 if (memcmp(Out
,pszUnicode
,sizeof(pszUnicode
)))
4958 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4963 /* LMBCS to Unicode - getNextUChar */
4964 const char * sourceStart
;
4965 const char *source
=(const char *)pszLMBCS
;
4966 const char *limit
=(const char *)pszLMBCS
+sizeof(pszLMBCS
);
4967 const UChar32
*results
= pszUnicode32
;
4968 const int *off
= offsets32
;
4970 UErrorCode errorCode
=U_ZERO_ERROR
;
4973 cnv
=ucnv_open("LMBCS-1", &errorCode
);
4974 if(U_FAILURE(errorCode
)) {
4975 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
4981 while(source
<limit
) {
4983 uniChar
=ucnv_getNextUChar(cnv
, &source
, source
+ (off
[1] - off
[0]), &errorCode
);
4984 if(U_FAILURE(errorCode
)) {
4985 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode
));
4987 } else if(source
-sourceStart
!= off
[1] - off
[0] || uniChar
!= *results
) {
4988 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4989 uniChar
, (source
-sourceStart
), *results
, *off
);
4998 { /* test locale & optimization group operations: Unicode to LMBCS */
5000 UErrorCode errorCode
=U_ZERO_ERROR
;
5001 UConverter
*cnv16he
= ucnv_open("LMBCS-16,locale=he", &errorCode
);
5002 UConverter
*cnv16jp
= ucnv_open("LMBCS-16,locale=ja_JP", &errorCode
);
5003 UConverter
*cnv01us
= ucnv_open("LMBCS-1,locale=us_EN", &errorCode
);
5004 UChar uniString
[] = {0x0192}; /* Latin Small letter f with hook */
5005 const UChar
* pUniOut
= uniString
;
5006 UChar
* pUniIn
= uniString
;
5007 uint8_t lmbcsString
[4];
5008 const char * pLMBCSOut
= (const char *)lmbcsString
;
5009 char * pLMBCSIn
= (char *)lmbcsString
;
5011 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
5012 ucnv_fromUnicode (cnv16he
,
5013 &pLMBCSIn
, (pLMBCSIn
+ UPRV_LENGTHOF(lmbcsString
)),
5014 &pUniOut
, pUniOut
+ UPRV_LENGTHOF(uniString
),
5015 NULL
, 1, &errorCode
);
5017 if (lmbcsString
[0] != 0x3 || lmbcsString
[1] != 0x83)
5019 log_err("LMBCS-16,locale=he gives unexpected translation\n");
5022 pLMBCSIn
= (char *)lmbcsString
;
5023 pUniOut
= uniString
;
5024 ucnv_fromUnicode (cnv01us
,
5025 &pLMBCSIn
, (const char *)(lmbcsString
+ UPRV_LENGTHOF(lmbcsString
)),
5026 &pUniOut
, pUniOut
+ UPRV_LENGTHOF(uniString
),
5027 NULL
, 1, &errorCode
);
5029 if (lmbcsString
[0] != 0x9F)
5031 log_err("LMBCS-1,locale=US gives unexpected translation\n");
5034 /* single byte char from mbcs char set */
5035 lmbcsString
[0] = 0xAE; /* 1/2 width katakana letter small Yo */
5036 pLMBCSOut
= (const char *)lmbcsString
;
5038 ucnv_toUnicode (cnv16jp
,
5039 &pUniIn
, pUniIn
+ 1,
5040 &pLMBCSOut
, (pLMBCSOut
+ 1),
5041 NULL
, 1, &errorCode
);
5042 if (U_FAILURE(errorCode
) || pLMBCSOut
!= (const char *)lmbcsString
+1 || pUniIn
!= uniString
+1 || uniString
[0] != 0xFF6E)
5044 log_err("Unexpected results from LMBCS-16 single byte char\n");
5046 /* convert to group 1: should be 3 bytes */
5047 pLMBCSIn
= (char *)lmbcsString
;
5048 pUniOut
= uniString
;
5049 ucnv_fromUnicode (cnv01us
,
5050 &pLMBCSIn
, (const char *)(pLMBCSIn
+ 3),
5051 &pUniOut
, pUniOut
+ 1,
5052 NULL
, 1, &errorCode
);
5053 if (U_FAILURE(errorCode
) || pLMBCSIn
!= (const char *)lmbcsString
+3 || pUniOut
!= uniString
+1
5054 || lmbcsString
[0] != 0x10 || lmbcsString
[1] != 0x10 || lmbcsString
[2] != 0xAE)
5056 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5058 pLMBCSOut
= (const char *)lmbcsString
;
5060 ucnv_toUnicode (cnv01us
,
5061 &pUniIn
, pUniIn
+ 1,
5062 &pLMBCSOut
, (const char *)(pLMBCSOut
+ 3),
5063 NULL
, 1, &errorCode
);
5064 if (U_FAILURE(errorCode
) || pLMBCSOut
!= (const char *)lmbcsString
+3 || pUniIn
!= uniString
+1 || uniString
[0] != 0xFF6E)
5066 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5068 pLMBCSIn
= (char *)lmbcsString
;
5069 pUniOut
= uniString
;
5070 ucnv_fromUnicode (cnv16jp
,
5071 &pLMBCSIn
, (const char *)(pLMBCSIn
+ 1),
5072 &pUniOut
, pUniOut
+ 1,
5073 NULL
, 1, &errorCode
);
5074 if (U_FAILURE(errorCode
) || pLMBCSIn
!= (const char *)lmbcsString
+1 || pUniOut
!= uniString
+1 || lmbcsString
[0] != 0xAE)
5076 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5078 ucnv_close(cnv16he
);
5079 ucnv_close(cnv16jp
);
5080 ucnv_close(cnv01us
);
5083 /* Small source buffer testing, LMBCS -> Unicode */
5085 UErrorCode errorCode
=U_ZERO_ERROR
;
5087 const char * pSource
= (const char *)pszLMBCS
;
5088 const char * sourceLimit
= (const char *)pszLMBCS
+ sizeof(pszLMBCS
);
5089 int codepointCount
= 0;
5091 UChar Out
[sizeof(pszUnicode
) + 1];
5093 UChar
* OutLimit
= Out
+ UPRV_LENGTHOF(pszUnicode
);
5096 cnv
= ucnv_open(NAME_LMBCS_1
, &errorCode
);
5097 if(U_FAILURE(errorCode
)) {
5098 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
5103 while ((pSource
< sourceLimit
) && U_SUCCESS (errorCode
))
5105 ucnv_toUnicode (cnv
,
5109 (pSource
+1), /* claim that this is a 1- byte buffer */
5111 FALSE
, /* FALSE means there might be more chars in the next buffer */
5114 if (U_SUCCESS (errorCode
))
5116 if ((pSource
- (const char *)pszLMBCS
) == offsets
[codepointCount
+1])
5118 /* we are on to the next code point: check value */
5120 if (Out
[0] != pszUnicode
[codepointCount
]){
5121 log_err("LMBCS->Uni result %lx should have been %lx \n",
5122 Out
[0], pszUnicode
[codepointCount
]);
5125 pOut
= Out
; /* reset for accumulating next code point */
5131 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode
));
5135 /* limits & surrogate error testing */
5136 char LIn
[sizeof(pszLMBCS
)];
5137 const char * pLIn
= LIn
;
5139 char LOut
[sizeof(pszLMBCS
)];
5140 char * pLOut
= LOut
;
5142 UChar UOut
[sizeof(pszUnicode
)];
5143 UChar
* pUOut
= UOut
;
5145 UChar UIn
[sizeof(pszUnicode
)];
5146 const UChar
* pUIn
= UIn
;
5148 int32_t off
[sizeof(offsets
)];
5151 errorCode
=U_ZERO_ERROR
;
5153 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5155 ucnv_fromUnicode(cnv
, &pLOut
, pLOut
+1, &pUIn
, pUIn
-1, off
, FALSE
, &errorCode
);
5156 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
5158 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode
));
5162 errorCode
=U_ZERO_ERROR
;
5163 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+1,(const char **)&pLIn
,(const char *)(pLIn
-1),off
,FALSE
, &errorCode
);
5164 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
5166 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode
));
5168 errorCode
=U_ZERO_ERROR
;
5170 uniChar
= ucnv_getNextUChar(cnv
, (const char **)&pLIn
, (const char *)(pLIn
-1), &errorCode
);
5171 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
5173 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode
));
5175 errorCode
=U_ZERO_ERROR
;
5177 /* 0 byte source request - no error, no pointer movement */
5178 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+1,(const char **)&pLIn
,(const char *)pLIn
,off
,FALSE
, &errorCode
);
5179 ucnv_fromUnicode(cnv
, &pLOut
,pLOut
+1,&pUIn
,pUIn
,off
,FALSE
, &errorCode
);
5180 if(U_FAILURE(errorCode
)) {
5181 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode
));
5183 if ((pUOut
!= UOut
) || (pUIn
!= UIn
) || (pLOut
!= LOut
) || (pLIn
!= LIn
))
5185 log_err("Unexpected pointer move in 0 byte source request \n");
5187 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5188 uniChar
= ucnv_getNextUChar(cnv
, (const char **)&pLIn
, (const char *)pLIn
, &errorCode
);
5189 if (errorCode
!= U_INDEX_OUTOFBOUNDS_ERROR
)
5191 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode
));
5193 if (((uint32_t)uniChar
- 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5195 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5197 errorCode
= U_ZERO_ERROR
;
5199 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5202 ucnv_fromUnicode(cnv
, &pLOut
,pLOut
+offsets
[4],&pUIn
,pUIn
+UPRV_LENGTHOF(pszUnicode
),off
,FALSE
, &errorCode
);
5203 if (errorCode
!= U_BUFFER_OVERFLOW_ERROR
|| pLOut
!= LOut
+ offsets
[4] || pUIn
!= pszUnicode
+4 )
5205 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5208 errorCode
= U_ZERO_ERROR
;
5210 pLIn
= (const char *)pszLMBCS
;
5211 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+4,&pLIn
,(pLIn
+sizeof(pszLMBCS
)),off
,FALSE
, &errorCode
);
5212 if (errorCode
!= U_BUFFER_OVERFLOW_ERROR
|| pUOut
!= UOut
+ 4 || pLIn
!= (const char *)pszLMBCS
+offsets
[4])
5214 log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5217 /* unpaired or chopped LMBCS surrogates */
5219 /* OK high surrogate, Low surrogate is chopped */
5220 LIn
[0] = (char)0x14;
5221 LIn
[1] = (char)0xD8;
5222 LIn
[2] = (char)0x01;
5223 LIn
[3] = (char)0x14;
5224 LIn
[4] = (char)0xDC;
5226 errorCode
= U_ZERO_ERROR
;
5229 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
5230 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
5231 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 5)
5233 log_err("Unexpected results on chopped low surrogate\n");
5236 /* chopped at surrogate boundary */
5237 LIn
[0] = (char)0x14;
5238 LIn
[1] = (char)0xD8;
5239 LIn
[2] = (char)0x01;
5241 errorCode
= U_ZERO_ERROR
;
5244 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+3),off
,TRUE
, &errorCode
);
5245 if (UOut
[0] != 0xD801 || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 3)
5247 log_err("Unexpected results on chopped at surrogate boundary \n");
5250 /* unpaired surrogate plus valid Unichar */
5251 LIn
[0] = (char)0x14;
5252 LIn
[1] = (char)0xD8;
5253 LIn
[2] = (char)0x01;
5254 LIn
[3] = (char)0x14;
5255 LIn
[4] = (char)0xC9;
5256 LIn
[5] = (char)0xD0;
5258 errorCode
= U_ZERO_ERROR
;
5261 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+6),off
,TRUE
, &errorCode
);
5262 if (UOut
[0] != 0xD801 || UOut
[1] != 0xC9D0 || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 2 || pLIn
!= LIn
+ 6)
5264 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5267 /* unpaired surrogate plus chopped Unichar */
5268 LIn
[0] = (char)0x14;
5269 LIn
[1] = (char)0xD8;
5270 LIn
[2] = (char)0x01;
5271 LIn
[3] = (char)0x14;
5272 LIn
[4] = (char)0xC9;
5275 errorCode
= U_ZERO_ERROR
;
5278 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
5279 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 5)
5281 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5284 /* unpaired surrogate plus valid non-Unichar */
5285 LIn
[0] = (char)0x14;
5286 LIn
[1] = (char)0xD8;
5287 LIn
[2] = (char)0x01;
5288 LIn
[3] = (char)0x0F;
5289 LIn
[4] = (char)0x3B;
5292 errorCode
= U_ZERO_ERROR
;
5295 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
5296 if (UOut
[0] != 0xD801 || UOut
[1] != 0x1B || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 2 || pLIn
!= LIn
+ 5)
5298 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5301 /* unpaired surrogate plus chopped non-Unichar */
5302 LIn
[0] = (char)0x14;
5303 LIn
[1] = (char)0xD8;
5304 LIn
[2] = (char)0x01;
5305 LIn
[3] = (char)0x0F;
5308 errorCode
= U_ZERO_ERROR
;
5311 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+4),off
,TRUE
, &errorCode
);
5313 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 4)
5315 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5319 ucnv_close(cnv
); /* final cleanup */
5323 static void TestJitterbug255()
5325 static const uint8_t testBytes
[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5326 const char *testBuffer
= (const char *)testBytes
;
5327 const char *testEnd
= (const char *)testBytes
+ sizeof(testBytes
);
5328 UErrorCode status
= U_ZERO_ERROR
;
5330 UConverter
*cnv
= 0;
5332 cnv
= ucnv_open("shift-jis", &status
);
5333 if (U_FAILURE(status
) || cnv
== 0) {
5334 log_data_err("Failed to open the converter for SJIS.\n");
5337 while (testBuffer
!= testEnd
)
5339 /*result = */ucnv_getNextUChar (cnv
, &testBuffer
, testEnd
, &status
);
5340 if (U_FAILURE(status
))
5342 log_err("Failed to convert the next UChar for SJIS.\n");
5349 static void TestEBCDICUS4XML()
5351 UChar unicodes_x
[] = {0x0000, 0x0000, 0x0000, 0x0000};
5352 static const UChar toUnicodeMaps_x
[] = {0x000A, 0x000A, 0x000D, 0x0000};
5353 static const char fromUnicodeMaps_x
[] = {0x25, 0x25, 0x0D, 0x00};
5354 static const char newLines_x
[] = {0x25, 0x15, 0x0D, 0x00};
5355 char target_x
[] = {0x00, 0x00, 0x00, 0x00};
5356 UChar
*unicodes
= unicodes_x
;
5357 const UChar
*toUnicodeMaps
= toUnicodeMaps_x
;
5358 char *target
= target_x
;
5359 const char* fromUnicodeMaps
= fromUnicodeMaps_x
, *newLines
= newLines_x
;
5360 UErrorCode status
= U_ZERO_ERROR
;
5361 UConverter
*cnv
= 0;
5363 cnv
= ucnv_open("ebcdic-xml-us", &status
);
5364 if (U_FAILURE(status
) || cnv
== 0) {
5365 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5368 ucnv_toUnicode(cnv
, &unicodes
, unicodes
+3, (const char**)&newLines
, newLines
+3, NULL
, TRUE
, &status
);
5369 if (U_FAILURE(status
) || memcmp(unicodes_x
, toUnicodeMaps
, sizeof(UChar
)*3) != 0) {
5370 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5371 u_errorName(status
));
5372 printUSeqErr(unicodes_x
, 3);
5373 printUSeqErr(toUnicodeMaps
, 3);
5375 status
= U_ZERO_ERROR
;
5376 ucnv_fromUnicode(cnv
, &target
, target
+3, (const UChar
**)&toUnicodeMaps
, toUnicodeMaps
+3, NULL
, TRUE
, &status
);
5377 if (U_FAILURE(status
) || memcmp(target_x
, fromUnicodeMaps
, sizeof(char)*3) != 0) {
5378 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5379 u_errorName(status
));
5380 printSeqErr((const unsigned char*)target_x
, 3);
5381 printSeqErr((const unsigned char*)fromUnicodeMaps
, 3);
5385 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5387 #if !UCONFIG_NO_COLLATION
5389 static void TestJitterbug981(){
5391 int32_t rules_length
, target_cap
, bytes_needed
, buff_size
;
5392 UErrorCode status
= U_ZERO_ERROR
;
5393 UConverter
*utf8cnv
;
5394 UCollator
* myCollator
;
5397 utf8cnv
= ucnv_open ("utf8", &status
);
5398 if(U_FAILURE(status
)){
5399 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status
));
5402 myCollator
= ucol_open("zh", &status
);
5403 if(U_FAILURE(status
)){
5404 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status
));
5405 ucnv_close(utf8cnv
);
5409 rules
= ucol_getRules(myCollator
, &rules_length
);
5410 if(rules_length
== 0) {
5411 log_data_err("missing zh tailoring rule string\n");
5412 ucol_close(myCollator
);
5413 ucnv_close(utf8cnv
);
5416 buff_size
= rules_length
* ucnv_getMaxCharSize(utf8cnv
);
5417 buff
= malloc(buff_size
);
5421 ucnv_reset(utf8cnv
);
5422 status
= U_ZERO_ERROR
;
5423 if(target_cap
>= buff_size
) {
5424 log_err("wanted %d bytes, only %d available\n", target_cap
, buff_size
);
5427 bytes_needed
= ucnv_fromUChars(utf8cnv
, buff
, target_cap
,
5428 rules
, rules_length
, &status
);
5429 target_cap
= (bytes_needed
> target_cap
) ? bytes_needed
: target_cap
+1;
5430 if(numNeeded
!=0 && numNeeded
!= bytes_needed
){
5431 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5434 numNeeded
= bytes_needed
;
5435 } while (status
== U_BUFFER_OVERFLOW_ERROR
);
5436 ucol_close(myCollator
);
5437 ucnv_close(utf8cnv
);
5443 #if !UCONFIG_NO_LEGACY_CONVERSION
5444 static void TestJitterbug1293(){
5445 static const UChar src
[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5447 UErrorCode status
= U_ZERO_ERROR
;
5448 UConverter
* conv
=NULL
;
5449 int32_t target_cap
, bytes_needed
, numNeeded
= 0;
5450 conv
= ucnv_open("shift-jis",&status
);
5451 if(U_FAILURE(status
)){
5452 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status
));
5458 bytes_needed
= ucnv_fromUChars(conv
,target
,256,src
,u_strlen(src
),&status
);
5459 target_cap
= (bytes_needed
> target_cap
) ? bytes_needed
: target_cap
+1;
5460 if(numNeeded
!=0 && numNeeded
!= bytes_needed
){
5461 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5463 numNeeded
= bytes_needed
;
5464 } while (status
== U_BUFFER_OVERFLOW_ERROR
);
5465 if(U_FAILURE(status
)){
5466 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status
));
5473 static void TestJB5275_1(){
5475 static const char* data
= "\x3B\xB3\x0A" /* Easy characters */
5476 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5477 /* Switch script: */
5478 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5479 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5480 "\xEF\x40\x3B\xB3\x0A";
5481 static const UChar expected
[] ={
5482 0x003b, 0x0a15, 0x000a, /* Easy characters */
5483 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5484 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5485 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5486 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5489 UErrorCode status
= U_ZERO_ERROR
;
5490 UConverter
* conv
= ucnv_open("iscii-gur", &status
);
5491 UChar dest
[100] = {'\0'};
5492 UChar
* target
= dest
;
5493 UChar
* targetLimit
= dest
+100;
5494 const char* source
= data
;
5495 const char* sourceLimit
= data
+strlen(data
);
5496 const UChar
* exp
= expected
;
5498 if (U_FAILURE(status
)) {
5499 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status
));
5503 log_verbose("Testing switching back to default script when new line is encountered.\n");
5504 ucnv_toUnicode(conv
, &target
, targetLimit
, &source
, sourceLimit
, NULL
, TRUE
, &status
);
5505 if(U_FAILURE(status
)){
5506 log_err("conversion failed: %s \n", u_errorName(status
));
5508 targetLimit
= target
;
5510 printUSeq(target
, targetLimit
-target
);
5511 while(target
<targetLimit
){
5513 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp
, *target
);
5521 static void TestJB5275(){
5522 static const char* data
=
5523 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */
5524 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */
5525 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */
5526 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5527 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */
5528 "\xEF\x48\x38\xB3\x0A" /* Kannada test */
5529 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */
5530 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */
5531 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */
5532 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */;
5533 static const UChar expected
[] ={
5534 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5535 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */
5536 0x0038, 0x0C95, 0x000A, /* Kannada test */
5537 0x0039, 0x0D15, 0x000A, /* Malayalam test */
5538 0x003A, 0x0A95, 0x000A, /* Gujarati test */
5539 0x003B, 0x0A15, 0x000A, /* Punjabi test */
5542 UErrorCode status
= U_ZERO_ERROR
;
5543 UConverter
* conv
= ucnv_open("iscii", &status
);
5544 UChar dest
[100] = {'\0'};
5545 UChar
* target
= dest
;
5546 UChar
* targetLimit
= dest
+100;
5547 const char* source
= data
;
5548 const char* sourceLimit
= data
+strlen(data
);
5549 const UChar
* exp
= expected
;
5550 ucnv_toUnicode(conv
, &target
, targetLimit
, &source
, sourceLimit
, NULL
, TRUE
, &status
);
5551 if(U_FAILURE(status
)){
5552 log_data_err("conversion failed: %s \n", u_errorName(status
));
5554 targetLimit
= target
;
5557 printUSeq(target
, targetLimit
-target
);
5559 while(target
<targetLimit
){
5561 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp
, *target
);
5570 TestIsFixedWidth() {
5571 UErrorCode status
= U_ZERO_ERROR
;
5572 UConverter
*cnv
= NULL
;
5575 const char *fixedWidth
[] = {
5578 "ibm-5478_P100-1995"
5581 const char *notFixedWidth
[] = {
5588 for (i
= 0; i
< UPRV_LENGTHOF(fixedWidth
); i
++) {
5589 cnv
= ucnv_open(fixedWidth
[i
], &status
);
5590 if (cnv
== NULL
|| U_FAILURE(status
)) {
5591 log_data_err("Error open converter: %s - %s \n", fixedWidth
[i
], u_errorName(status
));
5595 if (!ucnv_isFixedWidth(cnv
, &status
)) {
5596 log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth
[i
]);
5601 for (i
= 0; i
< UPRV_LENGTHOF(notFixedWidth
); i
++) {
5602 cnv
= ucnv_open(notFixedWidth
[i
], &status
);
5603 if (cnv
== NULL
|| U_FAILURE(status
)) {
5604 log_data_err("Error open converter: %s - %s \n", notFixedWidth
[i
], u_errorName(status
));
5608 if (ucnv_isFixedWidth(cnv
, &status
)) {
5609 log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth
[i
]);