1 /********************************************************************
3 * Copyright (c) 1997-2010, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /*******************************************************************************
10 * Modification History:
12 * Steven R. Loomis 7/8/1999 Adding input buffer test
13 ********************************************************************************
17 #include "unicode/uloc.h"
18 #include "unicode/ucnv.h"
19 #include "unicode/ucnv_err.h"
20 #include "unicode/ucnv_cb.h"
22 #include "unicode/utypes.h"
23 #include "unicode/ustring.h"
24 #include "unicode/ucol.h"
28 static void TestNextUChar(UConverter
* cnv
, const char* source
, const char* limit
, const int32_t results
[], const char* message
);
29 static void TestNextUCharError(UConverter
* cnv
, const char* source
, const char* limit
, UErrorCode expected
, const char* message
);
30 #if !UCONFIG_NO_COLLATION
31 static void TestJitterbug981(void);
33 static void TestJitterbug1293(void);
34 static void TestNewConvertWithBufferSizes(int32_t osize
, int32_t isize
) ;
35 static void TestConverterTypesAndStarters(void);
36 static void TestAmbiguous(void);
37 static void TestSignatureDetection(void);
38 static void TestUTF7(void);
39 static void TestIMAP(void);
40 static void TestUTF8(void);
41 static void TestCESU8(void);
42 static void TestUTF16(void);
43 static void TestUTF16BE(void);
44 static void TestUTF16LE(void);
45 static void TestUTF32(void);
46 static void TestUTF32BE(void);
47 static void TestUTF32LE(void);
48 static void TestLATIN1(void);
50 #if !UCONFIG_NO_LEGACY_CONVERSION
51 static void TestSBCS(void);
52 static void TestDBCS(void);
53 static void TestMBCS(void);
54 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
55 static void TestICCRunout(void);
58 #ifdef U_ENABLE_GENERIC_ISO_2022
59 static void TestISO_2022(void);
62 static void TestISO_2022_JP(void);
63 static void TestISO_2022_JP_1(void);
64 static void TestISO_2022_JP_2(void);
65 static void TestISO_2022_KR(void);
66 static void TestISO_2022_KR_1(void);
67 static void TestISO_2022_CN(void);
70 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
72 static void TestISO_2022_CN_EXT(void);
74 static void TestJIS(void);
75 static void TestHZ(void);
78 static void TestSCSU(void);
80 #if !UCONFIG_NO_LEGACY_CONVERSION
81 static void TestEBCDIC_STATEFUL(void);
82 static void TestGB18030(void);
83 static void TestLMBCS(void);
84 static void TestJitterbug255(void);
85 static void TestEBCDICUS4XML(void);
88 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
90 static void TestJitterbug915(void);
92 static void TestISCII(void);
94 static void TestCoverageMBCS(void);
95 static void TestJitterbug2346(void);
96 static void TestJitterbug2411(void);
97 static void TestJB5275(void);
98 static void TestJB5275_1(void);
99 static void TestJitterbug6175(void);
102 static void TestInBufSizes(void);
104 static void TestRoundTrippingAllUTF(void);
105 static void TestConv(const uint16_t in
[],
112 /* open a converter, using test data if it begins with '@' */
113 static UConverter
*my_ucnv_open(const char *cnv
, UErrorCode
*err
);
116 #define NEW_MAX_BUFFER 999
118 static int32_t gInBufferSize
= NEW_MAX_BUFFER
;
119 static int32_t gOutBufferSize
= NEW_MAX_BUFFER
;
120 static char gNuConvTestName
[1024];
122 #define nct_min(x,y) ((x<y) ? x : y)
124 static UConverter
*my_ucnv_open(const char *cnv
, UErrorCode
*err
)
126 if(cnv
&& cnv
[0] == '@') {
127 return ucnv_openPackage(loadTestData(err
), cnv
+1, err
);
129 return ucnv_open(cnv
, err
);
133 static void printSeq(const unsigned char* a
, int len
)
138 log_verbose("0x%02x ", a
[i
++]);
142 static void printUSeq(const UChar
* a
, int len
)
146 while (i
<len
) log_verbose("0x%04x ", a
[i
++]);
150 static void printSeqErr(const unsigned char* a
, int len
)
153 fprintf(stderr
, "{");
155 fprintf(stderr
, "0x%02x ", a
[i
++]);
156 fprintf(stderr
, "}\n");
159 static void printUSeqErr(const UChar
* a
, int len
)
162 fprintf(stderr
, "{U+");
164 fprintf(stderr
, "0x%04x ", a
[i
++]);
165 fprintf(stderr
,"}\n");
169 TestNextUChar(UConverter
* cnv
, const char* source
, const char* limit
, const int32_t results
[], const char* message
)
172 const char* s
=(char*)source
;
173 const int32_t *r
=results
;
174 UErrorCode errorCode
=U_ZERO_ERROR
;
179 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
180 if(errorCode
==U_INDEX_OUTOFBOUNDS_ERROR
) {
181 break; /* no more significant input */
182 } else if(U_FAILURE(errorCode
)) {
183 log_err("%s ucnv_getNextUChar() failed: %s\n", message
, u_errorName(errorCode
));
186 /* test the expected number of input bytes only if >=0 */
187 (*r
>=0 && (int32_t)(s
-s0
)!=*r
) ||
190 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
191 message
, c
, (s
-s0
), *(r
+1), *r
);
199 TestNextUCharError(UConverter
* cnv
, const char* source
, const char* limit
, UErrorCode expected
, const char* message
)
201 const char* s
=(char*)source
;
202 UErrorCode errorCode
=U_ZERO_ERROR
;
204 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
205 if(errorCode
!= expected
){
206 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected
), message
, myErrorName(errorCode
));
208 if(c
!= 0xFFFD && c
!= 0xffff){
209 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message
, c
);
214 static void TestInBufSizes(void)
216 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,1);
218 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,2);
219 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,3);
220 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,4);
221 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,5);
222 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,6);
223 TestNewConvertWithBufferSizes(1,1);
224 TestNewConvertWithBufferSizes(2,3);
225 TestNewConvertWithBufferSizes(3,2);
229 static void TestOutBufSizes(void)
232 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,NEW_MAX_BUFFER
);
233 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER
);
234 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER
);
235 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER
);
236 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER
);
237 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER
);
243 void addTestNewConvert(TestNode
** root
)
245 #if !UCONFIG_NO_FILE_IO
246 addTest(root
, &TestInBufSizes
, "tsconv/nucnvtst/TestInBufSizes");
247 addTest(root
, &TestOutBufSizes
, "tsconv/nucnvtst/TestOutBufSizes");
249 addTest(root
, &TestConverterTypesAndStarters
, "tsconv/nucnvtst/TestConverterTypesAndStarters");
250 addTest(root
, &TestAmbiguous
, "tsconv/nucnvtst/TestAmbiguous");
251 addTest(root
, &TestSignatureDetection
, "tsconv/nucnvtst/TestSignatureDetection");
252 addTest(root
, &TestUTF7
, "tsconv/nucnvtst/TestUTF7");
253 addTest(root
, &TestIMAP
, "tsconv/nucnvtst/TestIMAP");
254 addTest(root
, &TestUTF8
, "tsconv/nucnvtst/TestUTF8");
256 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
257 addTest(root
, &TestCESU8
, "tsconv/nucnvtst/TestCESU8");
258 addTest(root
, &TestUTF16
, "tsconv/nucnvtst/TestUTF16");
259 addTest(root
, &TestUTF16BE
, "tsconv/nucnvtst/TestUTF16BE");
260 addTest(root
, &TestUTF16LE
, "tsconv/nucnvtst/TestUTF16LE");
261 addTest(root
, &TestUTF32
, "tsconv/nucnvtst/TestUTF32");
262 addTest(root
, &TestUTF32BE
, "tsconv/nucnvtst/TestUTF32BE");
263 addTest(root
, &TestUTF32LE
, "tsconv/nucnvtst/TestUTF32LE");
265 #if !UCONFIG_NO_LEGACY_CONVERSION
266 addTest(root
, &TestLMBCS
, "tsconv/nucnvtst/TestLMBCS");
269 addTest(root
, &TestLATIN1
, "tsconv/nucnvtst/TestLATIN1");
271 #if !UCONFIG_NO_LEGACY_CONVERSION
272 addTest(root
, &TestSBCS
, "tsconv/nucnvtst/TestSBCS");
273 #if !UCONFIG_NO_FILE_IO
274 addTest(root
, &TestDBCS
, "tsconv/nucnvtst/TestDBCS");
275 addTest(root
, &TestICCRunout
, "tsconv/nucnvtst/TestICCRunout");
277 addTest(root
, &TestMBCS
, "tsconv/nucnvtst/TestMBCS");
279 #ifdef U_ENABLE_GENERIC_ISO_2022
280 addTest(root
, &TestISO_2022
, "tsconv/nucnvtst/TestISO_2022");
283 addTest(root
, &TestISO_2022_JP
, "tsconv/nucnvtst/TestISO_2022_JP");
284 addTest(root
, &TestJIS
, "tsconv/nucnvtst/TestJIS");
285 addTest(root
, &TestISO_2022_JP_1
, "tsconv/nucnvtst/TestISO_2022_JP_1");
286 addTest(root
, &TestISO_2022_JP_2
, "tsconv/nucnvtst/TestISO_2022_JP_2");
287 addTest(root
, &TestISO_2022_KR
, "tsconv/nucnvtst/TestISO_2022_KR");
288 addTest(root
, &TestISO_2022_KR_1
, "tsconv/nucnvtst/TestISO_2022_KR_1");
289 addTest(root
, &TestISO_2022_CN
, "tsconv/nucnvtst/TestISO_2022_CN");
291 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
292 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
293 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
295 addTest(root
, &TestHZ
, "tsconv/nucnvtst/TestHZ");
298 addTest(root
, &TestSCSU
, "tsconv/nucnvtst/TestSCSU");
300 #if !UCONFIG_NO_LEGACY_CONVERSION
301 addTest(root
, &TestEBCDIC_STATEFUL
, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
302 addTest(root
, &TestGB18030
, "tsconv/nucnvtst/TestGB18030");
303 addTest(root
, &TestJitterbug255
, "tsconv/nucnvtst/TestJitterbug255");
304 addTest(root
, &TestEBCDICUS4XML
, "tsconv/nucnvtst/TestEBCDICUS4XML");
305 addTest(root
, &TestISCII
, "tsconv/nucnvtst/TestISCII");
306 addTest(root
, &TestJB5275
, "tsconv/nucnvtst/TestJB5275");
307 addTest(root
, &TestJB5275_1
, "tsconv/nucnvtst/TestJB5275_1");
308 #if !UCONFIG_NO_COLLATION
309 addTest(root
, &TestJitterbug981
, "tsconv/nucnvtst/TestJitterbug981");
312 addTest(root
, &TestJitterbug1293
, "tsconv/nucnvtst/TestJitterbug1293");
316 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
317 addTest(root
, &TestCoverageMBCS
, "tsconv/nucnvtst/TestCoverageMBCS");
320 addTest(root
, &TestRoundTrippingAllUTF
, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
322 #if !UCONFIG_NO_LEGACY_CONVERSION
323 addTest(root
, &TestJitterbug2346
, "tsconv/nucnvtst/TestJitterbug2346");
324 addTest(root
, &TestJitterbug2411
, "tsconv/nucnvtst/TestJitterbug2411");
325 addTest(root
, &TestJitterbug6175
, "tsconv/nucnvtst/TestJitterbug6175");
331 /* Note that this test already makes use of statics, so it's not really
333 This convenience function lets us make the error messages actually useful.
336 static void setNuConvTestName(const char *codepage
, const char *direction
)
338 sprintf(gNuConvTestName
, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
342 (int)gOutBufferSize
);
347 TC_OK
= 0, /* test was OK */
348 TC_MISMATCH
= 1, /* Match failed - err was printed */
349 TC_FAIL
= 2 /* Test failed, don't print an err because it was already printed. */
350 } ETestConvertResult
;
352 /* Note: This function uses global variables and it will not do offset
353 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
354 static ETestConvertResult
testConvertFromU( const UChar
*source
, int sourceLen
, const uint8_t *expect
, int expectLen
,
355 const char *codepage
, const int32_t *expectOffsets
, UBool useFallback
)
357 UErrorCode status
= U_ZERO_ERROR
;
358 UConverter
*conv
= 0;
359 char junkout
[NEW_MAX_BUFFER
]; /* FIX */
360 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
367 int32_t realBufferSize
;
369 const UChar
*realSourceEnd
;
370 const UChar
*sourceLimit
;
371 UBool checkOffsets
= TRUE
;
374 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
375 junkout
[i
] = (char)0xF0;
376 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
379 setNuConvTestName(codepage
, "FROM");
381 log_verbose("\n========= %s\n", gNuConvTestName
);
383 conv
= my_ucnv_open(codepage
, &status
);
385 if(U_FAILURE(status
))
387 log_data_err("Couldn't open converter %s\n",codepage
);
391 ucnv_setFallback(conv
,useFallback
);
394 log_verbose("Converter opened..\n");
400 realBufferSize
= (sizeof(junkout
)/sizeof(junkout
[0]));
401 realBufferEnd
= junkout
+ realBufferSize
;
402 realSourceEnd
= source
+ sourceLen
;
404 if ( gOutBufferSize
!= realBufferSize
|| gInBufferSize
!= NEW_MAX_BUFFER
)
405 checkOffsets
= FALSE
;
409 end
= nct_min(targ
+ gOutBufferSize
, realBufferEnd
);
410 sourceLimit
= nct_min(src
+ gInBufferSize
, realSourceEnd
);
412 doFlush
= (UBool
)(sourceLimit
== realSourceEnd
);
414 if(targ
== realBufferEnd
) {
415 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ
, gNuConvTestName
);
418 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src
,sourceLimit
, targ
,end
, doFlush
?"TRUE":"FALSE");
421 status
= U_ZERO_ERROR
;
423 ucnv_fromUnicode (conv
,
428 checkOffsets
? offs
: NULL
,
429 doFlush
, /* flush if we're at the end of the input data */
431 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && sourceLimit
< realSourceEnd
) );
433 if(U_FAILURE(status
)) {
434 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage
, myErrorName(status
), gNuConvTestName
);
438 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
439 sourceLen
, targ
-junkout
);
441 if(getTestOption(VERBOSITY_OPTION
))
444 char offset_str
[9999];
449 for(ptr
= junkout
;ptr
<targ
;ptr
++) {
450 sprintf(junk
+ strlen(junk
), "0x%02x, ", (int)(0xFF & *ptr
));
451 sprintf(offset_str
+ strlen(offset_str
), "0x%02x, ", (int)(0xFF & junokout
[ptr
-junkout
]));
455 printSeq((const uint8_t *)expect
, expectLen
);
456 if ( checkOffsets
) {
457 log_verbose("\nOffsets:");
458 log_verbose(offset_str
);
464 if(expectLen
!= targ
-junkout
) {
465 log_err("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
466 log_verbose("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
468 printSeqErr((const unsigned char*)junkout
, (int32_t)(targ
-junkout
));
469 printf("\nExpected:");
470 printSeqErr((const unsigned char*)expect
, expectLen
);
474 if (checkOffsets
&& (expectOffsets
!= 0) ) {
475 log_verbose("comparing %d offsets..\n", targ
-junkout
);
476 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t) )){
477 log_err("did not get the expected offsets. %s\n", gNuConvTestName
);
478 printSeqErr((const unsigned char*)junkout
, (int32_t)(targ
-junkout
));
481 for(p
=junkout
;p
<targ
;p
++) {
482 log_err("%d,", junokout
[p
-junkout
]);
485 log_err("Expected: ");
486 for(i
=0; i
<(targ
-junkout
); i
++) {
487 log_err("%d,", expectOffsets
[i
]);
493 log_verbose("comparing..\n");
494 if(!memcmp(junkout
, expect
, expectLen
)) {
495 log_verbose("Matches!\n");
498 log_err("String does not match u->%s\n", gNuConvTestName
);
499 printUSeqErr(source
, sourceLen
);
501 printSeqErr((const unsigned char *)junkout
, expectLen
);
502 printf("\nExpected:");
503 printSeqErr((const unsigned char *)expect
, expectLen
);
509 /* Note: This function uses global variables and it will not do offset
510 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
511 static ETestConvertResult
testConvertToU( const uint8_t *source
, int sourcelen
, const UChar
*expect
, int expectlen
,
512 const char *codepage
, const int32_t *expectOffsets
, UBool useFallback
)
514 UErrorCode status
= U_ZERO_ERROR
;
515 UConverter
*conv
= 0;
516 UChar junkout
[NEW_MAX_BUFFER
]; /* FIX */
517 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
519 const char *realSourceEnd
;
520 const char *srcLimit
;
526 UBool checkOffsets
= TRUE
;
528 int32_t realBufferSize
;
529 UChar
*realBufferEnd
;
532 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
535 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
538 setNuConvTestName(codepage
, "TO");
540 log_verbose("\n========= %s\n", gNuConvTestName
);
542 conv
= my_ucnv_open(codepage
, &status
);
544 if(U_FAILURE(status
))
546 log_data_err("Couldn't open converter %s\n",gNuConvTestName
);
550 ucnv_setFallback(conv
,useFallback
);
552 log_verbose("Converter opened..\n");
554 src
= (const char *)source
;
558 realBufferSize
= (sizeof(junkout
)/sizeof(junkout
[0]));
559 realBufferEnd
= junkout
+ realBufferSize
;
560 realSourceEnd
= src
+ sourcelen
;
562 if ( gOutBufferSize
!= realBufferSize
|| gInBufferSize
!= NEW_MAX_BUFFER
)
563 checkOffsets
= FALSE
;
567 end
= nct_min( targ
+ gOutBufferSize
, realBufferEnd
);
568 srcLimit
= nct_min(realSourceEnd
, src
+ gInBufferSize
);
570 if(targ
== realBufferEnd
)
572 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ
,gNuConvTestName
);
575 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ
,end
);
577 /* oldTarg = targ; */
579 status
= U_ZERO_ERROR
;
581 ucnv_toUnicode (conv
,
586 checkOffsets
? offs
: NULL
,
587 (UBool
)(srcLimit
== realSourceEnd
), /* flush if we're at the end of hte source data */
590 /* offs += (targ-oldTarg); */
592 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (srcLimit
< realSourceEnd
)) ); /* while we just need another buffer */
594 if(U_FAILURE(status
))
596 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage
, myErrorName(status
), gNuConvTestName
);
600 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
601 sourcelen
, targ
-junkout
);
602 if(getTestOption(VERBOSITY_OPTION
))
605 char offset_str
[9999];
611 for(ptr
= junkout
;ptr
<targ
;ptr
++)
613 sprintf(junk
+ strlen(junk
), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr
);
614 sprintf(offset_str
+ strlen(offset_str
), "0x%04x, ", (0xFFFF) & (unsigned int)junokout
[ptr
-junkout
]);
618 printUSeq(expect
, expectlen
);
621 log_verbose("\nOffsets:");
622 log_verbose(offset_str
);
628 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen
,expectlen
*2);
630 if (checkOffsets
&& (expectOffsets
!= 0))
632 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t))){
633 log_err("did not get the expected offsets. %s\n",gNuConvTestName
);
635 for(p
=junkout
;p
<targ
;p
++) {
636 log_err("%d,", junokout
[p
-junkout
]);
639 log_err("Expected: ");
640 for(i
=0; i
<(targ
-junkout
); i
++) {
641 log_err("%d,", expectOffsets
[i
]);
645 for(i
=0; i
<(targ
-junkout
); i
++) {
646 log_err("%X,", junkout
[i
]);
650 for(i
=0; i
<(src
-(const char *)source
); i
++) {
651 log_err("%X,", (unsigned char)source
[i
]);
657 if(!memcmp(junkout
, expect
, expectlen
*2))
659 log_verbose("Matches!\n");
664 log_err("String does not match. %s\n", gNuConvTestName
);
665 log_verbose("String does not match. %s\n", gNuConvTestName
);
667 printUSeqErr(junkout
, expectlen
);
668 printf("\nExpected:");
669 printUSeqErr(expect
, expectlen
);
675 static void TestNewConvertWithBufferSizes(int32_t outsize
, int32_t insize
)
678 /* 1 2 3 1Han 2Han 3Han . */
679 static const UChar sampleText
[] =
680 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
681 static const UChar sampleTextRoundTripUnmappable
[] =
682 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
685 static const uint8_t expectedUTF8
[] =
686 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
687 static const int32_t toUTF8Offs
[] =
688 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
689 static const int32_t fmUTF8Offs
[] =
690 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
692 #ifdef U_ENABLE_GENERIC_ISO_2022
693 /* Same as UTF8, but with ^[%B preceeding */
694 static const const uint8_t expectedISO2022
[] =
695 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
696 static const int32_t toISO2022Offs
[] =
697 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
698 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
699 static const int32_t fmISO2022Offs
[] =
700 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
703 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
704 static const uint8_t expectedIBM930
[] =
705 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
706 static const int32_t toIBM930Offs
[] =
707 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
708 static const int32_t fmIBM930Offs
[] =
709 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
711 /* 1 2 3 0 h1 h2 h3 . MBCS*/
712 static const uint8_t expectedIBM943
[] =
713 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
714 static const int32_t toIBM943Offs
[] =
715 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
716 static const int32_t fmIBM943Offs
[] =
717 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
719 /* 1 2 3 0 h1 h2 h3 . DBCS*/
720 static const uint8_t expectedIBM9027
[] =
721 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
722 static const int32_t toIBM9027Offs
[] =
723 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
725 /* 1 2 3 0 <?> <?> <?> . SBCS*/
726 static const uint8_t expectedIBM920
[] =
727 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
728 static const int32_t toIBM920Offs
[] =
729 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
731 /* 1 2 3 0 <?> <?> <?> . SBCS*/
732 static const uint8_t expectedISO88593
[] =
733 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
734 static const int32_t toISO88593Offs
[] =
735 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
737 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
738 static const uint8_t expectedLATIN1
[] =
739 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
740 static const int32_t toLATIN1Offs
[] =
741 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
745 static const uint8_t expectedUTF16BE
[] =
746 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
747 static const int32_t toUTF16BEOffs
[]=
748 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
749 static const int32_t fmUTF16BEOffs
[] =
750 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
752 static const uint8_t expectedUTF16LE
[] =
753 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
754 static const int32_t toUTF16LEOffs
[]=
755 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
756 static const int32_t fmUTF16LEOffs
[] =
757 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
759 static const uint8_t expectedUTF32BE
[] =
760 { 0x00, 0x00, 0x00, 0x31,
761 0x00, 0x00, 0x00, 0x32,
762 0x00, 0x00, 0x00, 0x33,
763 0x00, 0x00, 0x00, 0x00,
764 0x00, 0x00, 0x4e, 0x00,
765 0x00, 0x00, 0x4e, 0x8c,
766 0x00, 0x00, 0x4e, 0x09,
767 0x00, 0x00, 0x00, 0x2e,
768 0x00, 0x02, 0x00, 0x21 };
769 static const int32_t toUTF32BEOffs
[]=
770 { 0x00, 0x00, 0x00, 0x00,
771 0x01, 0x01, 0x01, 0x01,
772 0x02, 0x02, 0x02, 0x02,
773 0x03, 0x03, 0x03, 0x03,
774 0x04, 0x04, 0x04, 0x04,
775 0x05, 0x05, 0x05, 0x05,
776 0x06, 0x06, 0x06, 0x06,
777 0x07, 0x07, 0x07, 0x07,
778 0x08, 0x08, 0x08, 0x08,
779 0x08, 0x08, 0x08, 0x08 };
780 static const int32_t fmUTF32BEOffs
[] =
781 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
783 static const uint8_t expectedUTF32LE
[] =
784 { 0x31, 0x00, 0x00, 0x00,
785 0x32, 0x00, 0x00, 0x00,
786 0x33, 0x00, 0x00, 0x00,
787 0x00, 0x00, 0x00, 0x00,
788 0x00, 0x4e, 0x00, 0x00,
789 0x8c, 0x4e, 0x00, 0x00,
790 0x09, 0x4e, 0x00, 0x00,
791 0x2e, 0x00, 0x00, 0x00,
792 0x21, 0x00, 0x02, 0x00 };
793 static const int32_t toUTF32LEOffs
[]=
794 { 0x00, 0x00, 0x00, 0x00,
795 0x01, 0x01, 0x01, 0x01,
796 0x02, 0x02, 0x02, 0x02,
797 0x03, 0x03, 0x03, 0x03,
798 0x04, 0x04, 0x04, 0x04,
799 0x05, 0x05, 0x05, 0x05,
800 0x06, 0x06, 0x06, 0x06,
801 0x07, 0x07, 0x07, 0x07,
802 0x08, 0x08, 0x08, 0x08,
803 0x08, 0x08, 0x08, 0x08 };
804 static const int32_t fmUTF32LEOffs
[] =
805 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
810 /** Test chars #2 **/
812 /* Sahha [health], slashed h's */
813 static const UChar malteseUChars
[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
814 static const uint8_t expectedMaltese913
[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
817 static const UChar LMBCSUChars
[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
818 static const uint8_t expectedLMBCS
[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
819 static const int32_t toLMBCSOffs
[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
820 static const int32_t fmLMBCSOffs
[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
821 /*********************************** START OF CODE finally *************/
823 gInBufferSize
= insize
;
824 gOutBufferSize
= outsize
;
826 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize
, gOutBufferSize
);
830 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
831 expectedUTF8
, sizeof(expectedUTF8
), "UTF8", toUTF8Offs
,FALSE
);
833 log_verbose("Test surrogate behaviour for UTF8\n");
835 static const UChar testinput
[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
836 static const uint8_t expectedUTF8test2
[]= { 0xe2, 0x82, 0xac,
837 0xf0, 0x90, 0x90, 0x81,
840 static const int32_t offsets
[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
841 testConvertFromU(testinput
, sizeof(testinput
)/sizeof(testinput
[0]),
842 expectedUTF8test2
, sizeof(expectedUTF8test2
), "UTF8", offsets
,FALSE
);
847 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
849 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
850 expectedISO2022
, sizeof(expectedISO2022
), "ISO_2022", toISO2022Offs
,FALSE
);
854 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
855 expectedUTF16LE
, sizeof(expectedUTF16LE
), "utf-16le", toUTF16LEOffs
,FALSE
);
857 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
858 expectedUTF16BE
, sizeof(expectedUTF16BE
), "utf-16be", toUTF16BEOffs
,FALSE
);
860 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
861 expectedUTF32LE
, sizeof(expectedUTF32LE
), "utf-32le", toUTF32LEOffs
,FALSE
);
863 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
864 expectedUTF32BE
, sizeof(expectedUTF32BE
), "utf-32be", toUTF32BEOffs
,FALSE
);
867 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
868 expectedLATIN1
, sizeof(expectedLATIN1
), "LATIN_1", toLATIN1Offs
,FALSE
);
870 #if !UCONFIG_NO_LEGACY_CONVERSION
872 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
873 expectedIBM930
, sizeof(expectedIBM930
), "ibm-930", toIBM930Offs
,FALSE
);
875 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
876 expectedISO88593
, sizeof(expectedISO88593
), "iso-8859-3", toISO88593Offs
,FALSE
);
880 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
881 expectedIBM943
, sizeof(expectedIBM943
), "ibm-943", toIBM943Offs
,FALSE
);
883 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
884 expectedIBM9027
, sizeof(expectedIBM9027
), "@ibm9027", toIBM9027Offs
,FALSE
);
886 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
887 expectedIBM920
, sizeof(expectedIBM920
), "ibm-920", toIBM920Offs
,FALSE
);
889 testConvertFromU(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
890 expectedISO88593
, sizeof(expectedISO88593
), "iso-8859-3", toISO88593Offs
,FALSE
);
897 testConvertToU(expectedUTF8
, sizeof(expectedUTF8
),
898 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf8", fmUTF8Offs
,FALSE
);
899 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
901 testConvertToU(expectedISO2022
, sizeof(expectedISO2022
),
902 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "ISO_2022", fmISO2022Offs
,FALSE
);
906 testConvertToU(expectedUTF16LE
, sizeof(expectedUTF16LE
),
907 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf-16le", fmUTF16LEOffs
,FALSE
);
909 testConvertToU(expectedUTF16BE
, sizeof(expectedUTF16BE
),
910 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf-16be", fmUTF16BEOffs
,FALSE
);
912 testConvertToU(expectedUTF32LE
, sizeof(expectedUTF32LE
),
913 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf-32le", fmUTF32LEOffs
,FALSE
);
915 testConvertToU(expectedUTF32BE
, sizeof(expectedUTF32BE
),
916 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf-32be", fmUTF32BEOffs
,FALSE
);
918 #if !UCONFIG_NO_LEGACY_CONVERSION
920 testConvertToU(expectedIBM930
, sizeof(expectedIBM930
), sampleTextRoundTripUnmappable
,
921 sizeof(sampleTextRoundTripUnmappable
)/sizeof(sampleTextRoundTripUnmappable
[0]), "ibm-930", fmIBM930Offs
,FALSE
);
923 testConvertToU(expectedIBM943
, sizeof(expectedIBM943
),sampleTextRoundTripUnmappable
,
924 sizeof(sampleTextRoundTripUnmappable
)/sizeof(sampleTextRoundTripUnmappable
[0]), "ibm-943", fmIBM943Offs
,FALSE
);
927 /* Try it again to make sure it still works */
928 testConvertToU(expectedUTF16LE
, sizeof(expectedUTF16LE
),
929 sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]), "utf-16le", fmUTF16LEOffs
,FALSE
);
931 #if !UCONFIG_NO_LEGACY_CONVERSION
932 testConvertToU(expectedMaltese913
, sizeof(expectedMaltese913
),
933 malteseUChars
, sizeof(malteseUChars
)/sizeof(malteseUChars
[0]), "latin3", NULL
,FALSE
);
935 testConvertFromU(malteseUChars
, sizeof(malteseUChars
)/sizeof(malteseUChars
[0]),
936 expectedMaltese913
, sizeof(expectedMaltese913
), "iso-8859-3", NULL
,FALSE
);
939 testConvertFromU(LMBCSUChars
, sizeof(LMBCSUChars
)/sizeof(LMBCSUChars
[0]),
940 expectedLMBCS
, sizeof(expectedLMBCS
), "LMBCS-1", toLMBCSOffs
,FALSE
);
941 testConvertToU(expectedLMBCS
, sizeof(expectedLMBCS
),
942 LMBCSUChars
, sizeof(LMBCSUChars
)/sizeof(LMBCSUChars
[0]), "LMBCS-1", fmLMBCSOffs
,FALSE
);
945 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
947 /* encode directly set D and set O */
948 static const uint8_t utf7
[] = {
955 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
956 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
958 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
960 static const UChar unicode
[] = {
962 Hi Mom -<WHITE SMILING FACE>-!
963 A<NOT IDENTICAL TO><ALPHA>.
965 [Japanese word "nihongo"]
967 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
968 0x41, 0x2262, 0x0391, 0x2e,
970 0x65e5, 0x672c, 0x8a9e
972 static const int32_t toUnicodeOffsets
[] = {
973 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
978 static const int32_t fromUnicodeOffsets
[] = {
979 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
980 11, 12, 12, 12, 13, 13, 13, 13, 14,
982 16, 16, 16, 17, 17, 17, 18, 18, 18
985 /* same but escaping set O (the exclamation mark) */
986 static const uint8_t utf7Restricted
[] = {
993 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
994 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
996 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
998 static const int32_t toUnicodeOffsetsR
[] = {
999 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1004 static const int32_t fromUnicodeOffsetsR
[] = {
1005 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1006 11, 12, 12, 12, 13, 13, 13, 13, 14,
1008 16, 16, 16, 17, 17, 17, 18, 18, 18
1011 testConvertFromU(unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, utf7
, sizeof(utf7
), "UTF-7", fromUnicodeOffsets
,FALSE
);
1013 testConvertToU(utf7
, sizeof(utf7
), unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, "UTF-7", toUnicodeOffsets
,FALSE
);
1015 testConvertFromU(unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, utf7Restricted
, sizeof(utf7Restricted
), "UTF-7,version=1", fromUnicodeOffsetsR
,FALSE
);
1017 testConvertToU(utf7Restricted
, sizeof(utf7Restricted
), unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, "UTF-7,version=1", toUnicodeOffsetsR
,FALSE
);
1021 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1022 * modified according to RFC 2060,
1023 * and supplemented with the one example in RFC 2060 itself.
1026 static const uint8_t imap
[] = {
1037 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1038 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1040 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1042 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1043 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1044 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1045 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1047 static const UChar unicode
[] = {
1048 /* Hi Mom -<WHITE SMILING FACE>-!
1049 A<NOT IDENTICAL TO><ALPHA>.
1051 [Japanese word "nihongo"]
1058 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1059 0x41, 0x2262, 0x0391, 0x2e,
1061 0x65e5, 0x672c, 0x8a9e,
1063 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1064 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1065 0x2f, 0x65e5, 0x672c, 0x8a9e,
1066 0x2f, 0x53f0, 0x5317
1068 static const int32_t toUnicodeOffsets
[] = {
1069 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1074 38, 39, 40, 41, 42, 43,
1079 static const int32_t fromUnicodeOffsets
[] = {
1080 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1081 11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1083 16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1085 20, 21, 22, 23, 24, 25,
1087 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1088 35, 36, 36, 36, 37, 37, 37, 37, 37
1091 testConvertFromU(unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, imap
, sizeof(imap
), "IMAP-mailbox-name", fromUnicodeOffsets
,FALSE
);
1093 testConvertToU(imap
, sizeof(imap
), unicode
, sizeof(unicode
)/U_SIZEOF_UCHAR
, "IMAP-mailbox-name", toUnicodeOffsets
,FALSE
);
1096 /* Test UTF-8 bad data handling*/
1098 static const uint8_t utf8
[]={
1100 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1103 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1104 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1105 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */
1106 0xdf, 0xbf, /* 7ff */
1107 0xbf, /* truncated tail */
1108 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */
1112 static const uint16_t utf8Expected
[]={
1126 static const int32_t utf8Offsets
[]={
1127 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
1129 testConvertToU(utf8
, sizeof(utf8
),
1130 utf8Expected
, sizeof(utf8Expected
)/sizeof(utf8Expected
[0]), "utf-8", utf8Offsets
,FALSE
);
1134 /* Test UTF-32BE bad data handling*/
1136 static const uint8_t utf32
[]={
1137 0x00, 0x00, 0x00, 0x61,
1138 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
1139 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1140 0x00, 0x00, 0x00, 0x62,
1141 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1142 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
1143 0x00, 0x00, 0x01, 0x62,
1144 0x00, 0x00, 0x02, 0x62
1146 static const uint16_t utf32Expected
[]={
1148 0xfffd, /* 0x110000 out of range */
1149 0xDBFF, /* 0x10FFFF in range */
1152 0xfffd, /* 0xffffffff out of range */
1153 0xfffd, /* 0x7fffffff out of range */
1157 static const int32_t utf32Offsets
[]={
1158 0, 4, 8, 8, 12, 16, 20, 24, 28
1160 static const uint8_t utf32ExpectedBack
[]={
1161 0x00, 0x00, 0x00, 0x61,
1162 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */
1163 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1164 0x00, 0x00, 0x00, 0x62,
1165 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */
1166 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */
1167 0x00, 0x00, 0x01, 0x62,
1168 0x00, 0x00, 0x02, 0x62
1170 static const int32_t utf32OffsetsBack
[]={
1181 testConvertToU(utf32
, sizeof(utf32
),
1182 utf32Expected
, sizeof(utf32Expected
)/sizeof(utf32Expected
[0]), "utf-32be", utf32Offsets
,FALSE
);
1183 testConvertFromU(utf32Expected
, sizeof(utf32Expected
)/sizeof(utf32Expected
[0]),
1184 utf32ExpectedBack
, sizeof(utf32ExpectedBack
), "utf-32be", utf32OffsetsBack
, FALSE
);
1187 /* Test UTF-32LE bad data handling*/
1189 static const uint8_t utf32
[]={
1190 0x61, 0x00, 0x00, 0x00,
1191 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
1192 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1193 0x62, 0x00, 0x00, 0x00,
1194 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1195 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
1196 0x62, 0x01, 0x00, 0x00,
1197 0x62, 0x02, 0x00, 0x00,
1200 static const uint16_t utf32Expected
[]={
1202 0xfffd, /* 0x110000 out of range */
1203 0xDBFF, /* 0x10FFFF in range */
1206 0xfffd, /* 0xffffffff out of range */
1207 0xfffd, /* 0x7fffffff out of range */
1211 static const int32_t utf32Offsets
[]={
1212 0, 4, 8, 8, 12, 16, 20, 24, 28
1214 static const uint8_t utf32ExpectedBack
[]={
1215 0x61, 0x00, 0x00, 0x00,
1216 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */
1217 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1218 0x62, 0x00, 0x00, 0x00,
1219 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */
1220 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */
1221 0x62, 0x01, 0x00, 0x00,
1222 0x62, 0x02, 0x00, 0x00
1224 static const int32_t utf32OffsetsBack
[]={
1234 testConvertToU(utf32
, sizeof(utf32
),
1235 utf32Expected
, sizeof(utf32Expected
)/sizeof(utf32Expected
[0]), "utf-32le", utf32Offsets
,FALSE
);
1236 testConvertFromU(utf32Expected
, sizeof(utf32Expected
)/sizeof(utf32Expected
[0]),
1237 utf32ExpectedBack
, sizeof(utf32ExpectedBack
), "utf-32le", utf32OffsetsBack
, FALSE
);
1241 static void TestCoverageMBCS(){
1243 UErrorCode status
= U_ZERO_ERROR
;
1244 const char *directory
= loadTestData(&status
);
1245 char* tdpath
= NULL
;
1246 char* saveDirectory
= (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1247 int len
= strlen(directory
);
1250 tdpath
= (char*) malloc(sizeof(char) * (len
* 2));
1251 uprv_strcpy(saveDirectory
,u_getDataDirectory());
1252 log_verbose("Retrieved data directory %s \n",saveDirectory
);
1253 uprv_strcpy(tdpath
,directory
);
1254 index
=strrchr(tdpath
,(char)U_FILE_SEP_CHAR
);
1256 if((unsigned int)(index
-tdpath
) != (strlen(tdpath
)-1)){
1259 u_setDataDirectory(tdpath
);
1260 log_verbose("ICU data directory is set to: %s \n" ,tdpath
);
1263 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
1264 which is test file for MBCS conversion with single-byte codepage data.*/
1267 /* MBCS with single byte codepage data test1.ucm*/
1268 const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1269 const uint8_t expectedtest1
[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1270 int32_t totest1Offs
[] = { 0, 1, 2, 3, 5, };
1273 testConvertFromU(unicodeInput
, sizeof(unicodeInput
)/sizeof(unicodeInput
[0]),
1274 expectedtest1
, sizeof(expectedtest1
), "@test1", totest1Offs
,FALSE
);
1277 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
1278 which is test file for MBCS conversion with three-byte codepage data.*/
1281 /* MBCS with three byte codepage data test3.ucm*/
1282 const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1283 const uint8_t expectedtest3
[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,};
1284 int32_t totest3Offs
[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1286 const uint8_t test3input
[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1287 const UChar expectedUnicode
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1288 int32_t fromtest3Offs
[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1291 testConvertFromU(unicodeInput
, sizeof(unicodeInput
)/sizeof(unicodeInput
[0]),
1292 expectedtest3
, sizeof(expectedtest3
), "@test3", totest3Offs
,FALSE
);
1295 testConvertToU(test3input
, sizeof(test3input
),
1296 expectedUnicode
, sizeof(expectedUnicode
)/sizeof(expectedUnicode
[0]), "@test3", fromtest3Offs
,FALSE
);
1300 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
1301 which is test file for MBCS conversion with four-byte codepage data.*/
1304 /* MBCS with three byte codepage data test4.ucm*/
1305 static const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1306 static const uint8_t expectedtest4
[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,};
1307 static const int32_t totest4Offs
[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1309 static const uint8_t test4input
[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1310 static const UChar expectedUnicode
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1311 static const int32_t fromtest4Offs
[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1314 testConvertFromU(unicodeInput
, sizeof(unicodeInput
)/sizeof(unicodeInput
[0]),
1315 expectedtest4
, sizeof(expectedtest4
), "@test4", totest4Offs
,FALSE
);
1318 testConvertToU(test4input
, sizeof(test4input
),
1319 expectedUnicode
, sizeof(expectedUnicode
)/sizeof(expectedUnicode
[0]), "@test4", fromtest4Offs
,FALSE
);
1324 /* restore the original data directory */
1325 log_verbose("Setting the data directory to %s \n", saveDirectory
);
1326 u_setDataDirectory(saveDirectory
);
1327 free(saveDirectory
);
1332 static void TestConverterType(const char *convName
, UConverterType convType
) {
1333 UConverter
* myConverter
;
1334 UErrorCode err
= U_ZERO_ERROR
;
1336 myConverter
= my_ucnv_open(convName
, &err
);
1338 if (U_FAILURE(err
)) {
1339 log_data_err("Failed to create an %s converter\n", convName
);
1344 if (ucnv_getType(myConverter
)!=convType
) {
1345 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1346 convName
, convType
);
1349 log_verbose("ucnv_getType %s ok\n", convName
);
1352 ucnv_close(myConverter
);
1355 static void TestConverterTypesAndStarters()
1357 #if !UCONFIG_NO_LEGACY_CONVERSION
1358 UConverter
* myConverter
;
1359 UErrorCode err
= U_ZERO_ERROR
;
1360 UBool mystarters
[256];
1362 /* const UBool expectedKSCstarters[256] = {
1363 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1364 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1365 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1366 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1367 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1368 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1369 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1370 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1371 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1372 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1373 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1374 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1375 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1376 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1377 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1378 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1379 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1380 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1381 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1382 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1383 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1384 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1385 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1386 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1387 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1388 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1391 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types.");
1393 myConverter
= ucnv_open("ksc", &err
);
1394 if (U_FAILURE(err
)) {
1395 log_data_err("Failed to create an ibm-ksc converter\n");
1400 if (ucnv_getType(myConverter
)!=UCNV_MBCS
)
1401 log_err("ucnv_getType Failed for ibm-949\n");
1403 log_verbose("ucnv_getType ibm-949 ok\n");
1405 if(myConverter
!=NULL
)
1406 ucnv_getStarters(myConverter
, mystarters
, &err
);
1408 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1409 log_err("Failed ucnv_getStarters for ksc\n");
1411 log_verbose("ucnv_getStarters ok\n");*/
1414 ucnv_close(myConverter
);
1416 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL
);
1417 TestConverterType("ibm-878", UCNV_SBCS
);
1420 TestConverterType("iso-8859-1", UCNV_LATIN_1
);
1422 TestConverterType("ibm-1208", UCNV_UTF8
);
1424 TestConverterType("utf-8", UCNV_UTF8
);
1425 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian
);
1426 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian
);
1427 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian
);
1428 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian
);
1430 #if !UCONFIG_NO_LEGACY_CONVERSION
1432 #if defined(U_ENABLE_GENERIC_ISO_2022)
1433 TestConverterType("iso-2022", UCNV_ISO_2022
);
1436 TestConverterType("hz", UCNV_HZ
);
1439 TestConverterType("scsu", UCNV_SCSU
);
1441 #if !UCONFIG_NO_LEGACY_CONVERSION
1442 TestConverterType("x-iscii-de", UCNV_ISCII
);
1445 TestConverterType("ascii", UCNV_US_ASCII
);
1446 TestConverterType("utf-7", UCNV_UTF7
);
1447 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX
);
1448 TestConverterType("bocu-1", UCNV_BOCU1
);
1452 TestAmbiguousConverter(UConverter
*cnv
) {
1453 static const char inBytes
[3]={ 0x61, 0x5B, 0x5c };
1454 UChar outUnicode
[20]={ 0, 0, 0, 0 };
1458 UErrorCode errorCode
;
1461 /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1462 errorCode
=U_ZERO_ERROR
;
1465 ucnv_toUnicode(cnv
, &u
, u
+20, &s
, s
+3, NULL
, TRUE
, &errorCode
);
1466 if(U_FAILURE(errorCode
)) {
1467 /* we do not care about general failures in this test; the input may just not be mappable */
1471 if(outUnicode
[0]!=0x61 || outUnicode
[1]!=0x5B || outUnicode
[2]==0xfffd) {
1472 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1473 /* There are some encodings that are partially ASCII based,
1474 like the ISO-7 and GSM series of codepages, which we ignore. */
1478 isAmbiguous
=ucnv_isAmbiguous(cnv
);
1480 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1481 if((outUnicode
[2]!=0x5c)!=isAmbiguous
) {
1482 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1483 ucnv_getName(cnv
, &errorCode
), outUnicode
[2]!=0x5c, isAmbiguous
);
1487 if(outUnicode
[2]!=0x5c) {
1488 /* needs fixup, fix it */
1489 ucnv_fixFileSeparator(cnv
, outUnicode
, (int32_t)(u
-outUnicode
));
1490 if(outUnicode
[2]!=0x5c) {
1491 /* the fix failed */
1492 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv
, &errorCode
));
1498 static void TestAmbiguous()
1500 UErrorCode status
= U_ZERO_ERROR
;
1501 UConverter
*ascii_cnv
= 0, *sjis_cnv
= 0, *cnv
;
1502 static const char target
[] = {
1503 /* "\\usr\\local\\share\\data\\icutest.txt" */
1504 0x5c, 0x75, 0x73, 0x72,
1505 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1506 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1507 0x5c, 0x64, 0x61, 0x74, 0x61,
1508 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1511 UChar asciiResult
[200], sjisResult
[200];
1512 int32_t /*asciiLength = 0,*/ sjisLength
= 0, i
;
1515 /* enumerate all converters */
1516 status
=U_ZERO_ERROR
;
1517 for(i
=0; (name
=ucnv_getAvailableName(i
))!=NULL
; ++i
) {
1518 cnv
=ucnv_open(name
, &status
);
1519 if(U_SUCCESS(status
)) {
1520 TestAmbiguousConverter(cnv
);
1523 log_err("error: unable to open available converter \"%s\"\n", name
);
1524 status
=U_ZERO_ERROR
;
1528 #if !UCONFIG_NO_LEGACY_CONVERSION
1529 sjis_cnv
= ucnv_open("ibm-943", &status
);
1530 if (U_FAILURE(status
))
1532 log_data_err("Failed to create a SJIS converter\n");
1535 ascii_cnv
= ucnv_open("LATIN-1", &status
);
1536 if (U_FAILURE(status
))
1538 log_data_err("Failed to create a LATIN-1 converter\n");
1539 ucnv_close(sjis_cnv
);
1542 /* convert target from SJIS to Unicode */
1543 sjisLength
= ucnv_toUChars(sjis_cnv
, sjisResult
, sizeof(sjisResult
)/U_SIZEOF_UCHAR
, target
, (int32_t)strlen(target
), &status
);
1544 if (U_FAILURE(status
))
1546 log_err("Failed to convert the SJIS string.\n");
1547 ucnv_close(sjis_cnv
);
1548 ucnv_close(ascii_cnv
);
1551 /* convert target from Latin-1 to Unicode */
1552 /*asciiLength =*/ ucnv_toUChars(ascii_cnv
, asciiResult
, sizeof(asciiResult
)/U_SIZEOF_UCHAR
, target
, (int32_t)strlen(target
), &status
);
1553 if (U_FAILURE(status
))
1555 log_err("Failed to convert the Latin-1 string.\n");
1556 ucnv_close(sjis_cnv
);
1557 ucnv_close(ascii_cnv
);
1560 if (!ucnv_isAmbiguous(sjis_cnv
))
1562 log_err("SJIS converter should contain ambiguous character mappings.\n");
1563 ucnv_close(sjis_cnv
);
1564 ucnv_close(ascii_cnv
);
1567 if (u_strcmp(sjisResult
, asciiResult
) == 0)
1569 log_err("File separators for SJIS don't need to be fixed.\n");
1571 ucnv_fixFileSeparator(sjis_cnv
, sjisResult
, sjisLength
);
1572 if (u_strcmp(sjisResult
, asciiResult
) != 0)
1574 log_err("Fixing file separator for SJIS failed.\n");
1576 ucnv_close(sjis_cnv
);
1577 ucnv_close(ascii_cnv
);
1582 TestSignatureDetection(){
1583 /* with null terminated strings */
1585 static const char* data
[] = {
1586 "\xFE\xFF\x00\x00", /* UTF-16BE */
1587 "\xFF\xFE\x00\x00", /* UTF-16LE */
1588 "\xEF\xBB\xBF\x00", /* UTF-8 */
1589 "\x0E\xFE\xFF\x00", /* SCSU */
1591 "\xFE\xFF", /* UTF-16BE */
1592 "\xFF\xFE", /* UTF-16LE */
1593 "\xEF\xBB\xBF", /* UTF-8 */
1594 "\x0E\xFE\xFF", /* SCSU */
1596 "\xFE\xFF\x41\x42", /* UTF-16BE */
1597 "\xFF\xFE\x41\x41", /* UTF-16LE */
1598 "\xEF\xBB\xBF\x41", /* UTF-8 */
1599 "\x0E\xFE\xFF\x41", /* SCSU */
1601 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */
1602 "\x2B\x2F\x76\x38\x41", /* UTF-7 */
1603 "\x2B\x2F\x76\x39\x41", /* UTF-7 */
1604 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */
1605 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */
1607 "\xDD\x73\x66\x73" /* UTF-EBCDIC */
1609 static const char* expected
[] = {
1632 static const int32_t expectedLength
[] ={
1657 int32_t signatureLength
= -1;
1658 const char* source
= NULL
;
1659 const char* enc
= NULL
;
1660 for( ; i
<sizeof(data
)/sizeof(char*); i
++){
1663 enc
= ucnv_detectUnicodeSignature(source
, -1 , &signatureLength
, &err
);
1665 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source
,i
,u_errorName(err
));
1668 if(enc
== NULL
|| strcmp(enc
,expected
[i
]) !=0){
1669 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source
,i
,expected
[i
],enc
);
1672 if(signatureLength
!= expectedLength
[i
]){
1673 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source
,i
,signatureLength
,expectedLength
[i
]);
1678 static const char* data
[] = {
1679 "\xFE\xFF\x00", /* UTF-16BE */
1680 "\xFF\xFE\x00", /* UTF-16LE */
1681 "\xEF\xBB\xBF\x00", /* UTF-8 */
1682 "\x0E\xFE\xFF\x00", /* SCSU */
1683 "\x00\x00\xFE\xFF", /* UTF-32BE */
1684 "\xFF\xFE\x00\x00", /* UTF-32LE */
1685 "\xFE\xFF", /* UTF-16BE */
1686 "\xFF\xFE", /* UTF-16LE */
1687 "\xEF\xBB\xBF", /* UTF-8 */
1688 "\x0E\xFE\xFF", /* SCSU */
1689 "\x00\x00\xFE\xFF", /* UTF-32BE */
1690 "\xFF\xFE\x00\x00", /* UTF-32LE */
1691 "\xFE\xFF\x41\x42", /* UTF-16BE */
1692 "\xFF\xFE\x41\x41", /* UTF-16LE */
1693 "\xEF\xBB\xBF\x41", /* UTF-8 */
1694 "\x0E\xFE\xFF\x41", /* SCSU */
1695 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1696 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1697 "\xFB\xEE\x28", /* BOCU-1 */
1698 "\xFF\x41\x42" /* NULL */
1700 static const int len
[] = {
1723 static const char* expected
[] = {
1745 static const int32_t expectedLength
[] ={
1769 int32_t signatureLength
= -1;
1770 int32_t sourceLength
=-1;
1771 const char* source
= NULL
;
1772 const char* enc
= NULL
;
1773 for( ; i
<sizeof(data
)/sizeof(char*); i
++){
1776 sourceLength
= len
[i
];
1777 enc
= ucnv_detectUnicodeSignature(source
, sourceLength
, &signatureLength
, &err
);
1779 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source
,i
,u_errorName(err
));
1782 if(enc
== NULL
|| strcmp(enc
,expected
[i
]) !=0){
1783 if(expected
[i
] !=NULL
){
1784 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source
,i
,expected
[i
],enc
);
1788 if(signatureLength
!= expectedLength
[i
]){
1789 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source
,i
,signatureLength
,expectedLength
[i
]);
1795 static void TestUTF7() {
1797 static const uint8_t in
[]={
1798 /* H - +Jjo- - ! +- +2AHcAQ */
1801 0x2b, 0x4a, 0x6a, 0x6f,
1805 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1808 /* expected test results */
1809 static const int32_t results
[]={
1810 /* number of bytes read, code point */
1813 4, 0x263a, /* <WHITE SMILING FACE> */
1820 const char *cnvName
;
1821 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
1822 UErrorCode errorCode
=U_ZERO_ERROR
;
1823 UConverter
*cnv
=ucnv_open("UTF-7", &errorCode
);
1824 if(U_FAILURE(errorCode
)) {
1825 log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode
)); /* sholdn't be a data err */
1828 TestNextUChar(cnv
, source
, limit
, results
, "UTF-7");
1829 /* Test the condition when source >= sourceLimit */
1830 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1831 cnvName
= ucnv_getName(cnv
, &errorCode
);
1832 if (U_FAILURE(errorCode
) || uprv_strcmp(cnvName
, "UTF-7") != 0) {
1833 log_err("UTF-7 converter is called %s: %s\n", cnvName
, u_errorName(errorCode
));
1838 static void TestIMAP() {
1840 static const uint8_t in
[]={
1841 /* H - &Jjo- - ! &- &2AHcAQ- \ */
1844 0x26, 0x4a, 0x6a, 0x6f,
1848 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1851 /* expected test results */
1852 static const int32_t results
[]={
1853 /* number of bytes read, code point */
1856 4, 0x263a, /* <WHITE SMILING FACE> */
1863 const char *cnvName
;
1864 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
1865 UErrorCode errorCode
=U_ZERO_ERROR
;
1866 UConverter
*cnv
=ucnv_open("IMAP-mailbox-name", &errorCode
);
1867 if(U_FAILURE(errorCode
)) {
1868 log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode
)); /* sholdn't be a data err */
1871 TestNextUChar(cnv
, source
, limit
, results
, "IMAP-mailbox-name");
1872 /* Test the condition when source >= sourceLimit */
1873 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1874 cnvName
= ucnv_getName(cnv
, &errorCode
);
1875 if (U_FAILURE(errorCode
) || uprv_strcmp(cnvName
, "IMAP-mailbox-name") != 0) {
1876 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName
, u_errorName(errorCode
));
1881 static void TestUTF8() {
1883 static const uint8_t in
[]={
1887 0xf0, 0x90, 0x80, 0x80,
1888 0xf4, 0x84, 0x8c, 0xa1,
1889 0xf0, 0x90, 0x90, 0x81
1892 /* expected test results */
1893 static const int32_t results
[]={
1894 /* number of bytes read, code point */
1903 /* error test input */
1904 static const uint8_t in2
[]={
1906 0xc0, 0x80, /* illegal non-shortest form */
1907 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1908 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1909 0xc0, 0xc0, /* illegal trail byte */
1910 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1911 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1912 0xfe, /* illegal byte altogether */
1916 /* expected error test results */
1917 static const int32_t results2
[]={
1918 /* number of bytes read, code point */
1923 UConverterToUCallback cb
;
1926 const char *source
=(const char *)in
,*limit
=(const char *)in
+sizeof(in
);
1927 UErrorCode errorCode
=U_ZERO_ERROR
;
1928 UConverter
*cnv
=ucnv_open("UTF-8", &errorCode
);
1929 if(U_FAILURE(errorCode
)) {
1930 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode
));
1933 TestNextUChar(cnv
, source
, limit
, results
, "UTF-8");
1934 /* Test the condition when source >= sourceLimit */
1935 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1937 /* test error behavior with a skip callback */
1938 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
1939 source
=(const char *)in2
;
1940 limit
=(const char *)(in2
+sizeof(in2
));
1941 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-8");
1946 static void TestCESU8() {
1948 static const uint8_t in
[]={
1952 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1953 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1954 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1958 /* expected test results */
1959 static const int32_t results
[]={
1960 /* number of bytes read, code point */
1966 -1,0xd802, /* may read 3 or 6 bytes */
1967 -1,0x10ffff,/* may read 0 or 3 bytes */
1971 /* error test input */
1972 static const uint8_t in2
[]={
1974 0xc0, 0x80, /* illegal non-shortest form */
1975 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1976 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1977 0xc0, 0xc0, /* illegal trail byte */
1978 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */
1979 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */
1980 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */
1981 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1982 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1983 0xfe, /* illegal byte altogether */
1987 /* expected error test results */
1988 static const int32_t results2
[]={
1989 /* number of bytes read, code point */
1994 UConverterToUCallback cb
;
1997 const char *source
=(const char *)in
,*limit
=(const char *)in
+sizeof(in
);
1998 UErrorCode errorCode
=U_ZERO_ERROR
;
1999 UConverter
*cnv
=ucnv_open("CESU-8", &errorCode
);
2000 if(U_FAILURE(errorCode
)) {
2001 log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode
));
2004 TestNextUChar(cnv
, source
, limit
, results
, "CESU-8");
2005 /* Test the condition when source >= sourceLimit */
2006 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2008 /* test error behavior with a skip callback */
2009 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
2010 source
=(const char *)in2
;
2011 limit
=(const char *)(in2
+sizeof(in2
));
2012 TestNextUChar(cnv
, source
, limit
, results2
, "CESU-8");
2017 static void TestUTF16() {
2019 static const uint8_t in1
[]={
2020 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2022 static const uint8_t in2
[]={
2023 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2025 static const uint8_t in3
[]={
2026 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2029 /* expected test results */
2030 static const int32_t results1
[]={
2031 /* number of bytes read, code point */
2035 static const int32_t results2
[]={
2036 /* number of bytes read, code point */
2040 static const int32_t results3
[]={
2041 /* number of bytes read, code point */
2048 const char *source
, *limit
;
2050 UErrorCode errorCode
=U_ZERO_ERROR
;
2051 UConverter
*cnv
=ucnv_open("UTF-16", &errorCode
);
2052 if(U_FAILURE(errorCode
)) {
2053 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode
));
2057 source
=(const char *)in1
, limit
=(const char *)in1
+sizeof(in1
);
2058 TestNextUChar(cnv
, source
, limit
, results1
, "UTF-16");
2060 source
=(const char *)in2
, limit
=(const char *)in2
+sizeof(in2
);
2061 ucnv_resetToUnicode(cnv
);
2062 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-16");
2064 source
=(const char *)in3
, limit
=(const char *)in3
+sizeof(in3
);
2065 ucnv_resetToUnicode(cnv
);
2066 TestNextUChar(cnv
, source
, limit
, results3
, "UTF-16");
2068 /* Test the condition when source >= sourceLimit */
2069 ucnv_resetToUnicode(cnv
);
2070 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2075 static void TestUTF16BE() {
2077 static const uint8_t in
[]={
2083 0xd8, 0x01, 0xdc, 0x01
2086 /* expected test results */
2087 static const int32_t results
[]={
2088 /* number of bytes read, code point */
2097 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2098 UErrorCode errorCode
=U_ZERO_ERROR
;
2099 UConverter
*cnv
=ucnv_open("utf-16be", &errorCode
);
2100 if(U_FAILURE(errorCode
)) {
2101 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode
));
2104 TestNextUChar(cnv
, source
, limit
, results
, "UTF-16BE");
2105 /* Test the condition when source >= sourceLimit */
2106 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2107 /*Test for the condition where there is an invalid character*/
2109 static const uint8_t source2
[]={0x61};
2110 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2111 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an invalid character");
2115 * Test disabled because currently the UTF-16BE/LE converters are supposed
2116 * to not set errors for unpaired surrogates.
2117 * This may change with
2118 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2121 /*Test for the condition where there is a surrogate pair*/
2123 const uint8_t source2
[]={0xd8, 0x01};
2124 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an truncated surrogate character");
2133 static const uint8_t in
[]={
2138 0x01, 0xd8, 0x01, 0xdc
2141 /* expected test results */
2142 static const int32_t results
[]={
2143 /* number of bytes read, code point */
2151 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2152 UErrorCode errorCode
=U_ZERO_ERROR
;
2153 UConverter
*cnv
=ucnv_open("utf-16le", &errorCode
);
2154 if(U_FAILURE(errorCode
)) {
2155 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode
));
2158 TestNextUChar(cnv
, source
, limit
, results
, "UTF-16LE");
2159 /* Test the condition when source >= sourceLimit */
2160 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2161 /*Test for the condition where there is an invalid character*/
2163 static const uint8_t source2
[]={0x61};
2164 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2165 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an invalid character");
2169 * Test disabled because currently the UTF-16BE/LE converters are supposed
2170 * to not set errors for unpaired surrogates.
2171 * This may change with
2172 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2175 /*Test for the condition where there is a surrogate character*/
2177 static const uint8_t source2
[]={0x01, 0xd8};
2178 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an truncated surrogate character");
2185 static void TestUTF32() {
2187 static const uint8_t in1
[]={
2188 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff
2190 static const uint8_t in2
[]={
2191 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00
2193 static const uint8_t in3
[]={
2194 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01
2197 /* expected test results */
2198 static const int32_t results1
[]={
2199 /* number of bytes read, code point */
2203 static const int32_t results2
[]={
2204 /* number of bytes read, code point */
2208 static const int32_t results3
[]={
2209 /* number of bytes read, code point */
2212 4, 0xfffd, /* unmatched surrogate */
2213 4, 0xfffd /* unmatched surrogate */
2216 const char *source
, *limit
;
2218 UErrorCode errorCode
=U_ZERO_ERROR
;
2219 UConverter
*cnv
=ucnv_open("UTF-32", &errorCode
);
2220 if(U_FAILURE(errorCode
)) {
2221 log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode
));
2225 source
=(const char *)in1
, limit
=(const char *)in1
+sizeof(in1
);
2226 TestNextUChar(cnv
, source
, limit
, results1
, "UTF-32");
2228 source
=(const char *)in2
, limit
=(const char *)in2
+sizeof(in2
);
2229 ucnv_resetToUnicode(cnv
);
2230 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32");
2232 source
=(const char *)in3
, limit
=(const char *)in3
+sizeof(in3
);
2233 ucnv_resetToUnicode(cnv
);
2234 TestNextUChar(cnv
, source
, limit
, results3
, "UTF-32");
2236 /* Test the condition when source >= sourceLimit */
2237 ucnv_resetToUnicode(cnv
);
2238 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2246 static const uint8_t in
[]={
2247 0x00, 0x00, 0x00, 0x61,
2248 0x00, 0x00, 0x30, 0x61,
2249 0x00, 0x00, 0xdc, 0x00,
2250 0x00, 0x00, 0xd8, 0x00,
2251 0x00, 0x00, 0xdf, 0xff,
2252 0x00, 0x00, 0xff, 0xfe,
2253 0x00, 0x10, 0xab, 0xcd,
2254 0x00, 0x10, 0xff, 0xff
2257 /* expected test results */
2258 static const int32_t results
[]={
2259 /* number of bytes read, code point */
2270 /* error test input */
2271 static const uint8_t in2
[]={
2272 0x00, 0x00, 0x00, 0x61,
2273 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
2274 0x00, 0x00, 0x00, 0x62,
2275 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2276 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
2277 0x00, 0x00, 0x01, 0x62,
2278 0x00, 0x00, 0x02, 0x62
2281 /* expected error test results */
2282 static const int32_t results2
[]={
2283 /* number of bytes read, code point */
2290 UConverterToUCallback cb
;
2293 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2294 UErrorCode errorCode
=U_ZERO_ERROR
;
2295 UConverter
*cnv
=ucnv_open("UTF-32BE", &errorCode
);
2296 if(U_FAILURE(errorCode
)) {
2297 log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode
));
2300 TestNextUChar(cnv
, source
, limit
, results
, "UTF-32BE");
2302 /* Test the condition when source >= sourceLimit */
2303 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2305 /* test error behavior with a skip callback */
2306 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
2307 source
=(const char *)in2
;
2308 limit
=(const char *)(in2
+sizeof(in2
));
2309 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32BE");
2317 static const uint8_t in
[]={
2318 0x61, 0x00, 0x00, 0x00,
2319 0x61, 0x30, 0x00, 0x00,
2320 0x00, 0xdc, 0x00, 0x00,
2321 0x00, 0xd8, 0x00, 0x00,
2322 0xff, 0xdf, 0x00, 0x00,
2323 0xfe, 0xff, 0x00, 0x00,
2324 0xcd, 0xab, 0x10, 0x00,
2325 0xff, 0xff, 0x10, 0x00
2328 /* expected test results */
2329 static const int32_t results
[]={
2330 /* number of bytes read, code point */
2341 /* error test input */
2342 static const uint8_t in2
[]={
2343 0x61, 0x00, 0x00, 0x00,
2344 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
2345 0x62, 0x00, 0x00, 0x00,
2346 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2347 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
2348 0x62, 0x01, 0x00, 0x00,
2349 0x62, 0x02, 0x00, 0x00,
2352 /* expected error test results */
2353 static const int32_t results2
[]={
2354 /* number of bytes read, code point */
2361 UConverterToUCallback cb
;
2364 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2365 UErrorCode errorCode
=U_ZERO_ERROR
;
2366 UConverter
*cnv
=ucnv_open("UTF-32LE", &errorCode
);
2367 if(U_FAILURE(errorCode
)) {
2368 log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode
));
2371 TestNextUChar(cnv
, source
, limit
, results
, "UTF-32LE");
2373 /* Test the condition when source >= sourceLimit */
2374 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2376 /* test error behavior with a skip callback */
2377 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
2378 source
=(const char *)in2
;
2379 limit
=(const char *)(in2
+sizeof(in2
));
2380 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32LE");
2388 static const uint8_t in
[]={
2397 /* expected test results */
2398 static const int32_t results
[]={
2399 /* number of bytes read, code point */
2407 static const uint16_t in1
[] = {
2408 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2409 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2410 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2411 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2412 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2413 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2414 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2415 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2416 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2417 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2418 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2421 static const uint8_t out1
[] = {
2422 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2423 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2424 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2425 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2426 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2427 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2428 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2429 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2430 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2431 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2432 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2435 static const uint16_t in2
[]={
2436 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2437 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2438 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2439 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2440 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2441 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2442 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2443 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2444 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2445 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2446 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2447 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2448 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2449 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2450 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2451 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2452 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2453 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2454 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2455 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2456 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2457 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2458 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2459 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2460 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2461 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2462 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2463 0x37, 0x20, 0x2A, 0x2F,
2465 static const unsigned char out2
[]={
2466 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2467 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2468 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2469 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2470 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2471 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2472 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2473 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2474 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2475 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2476 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2477 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2478 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2479 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2480 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2481 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2482 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2483 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2484 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2485 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2486 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2487 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2488 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2489 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2490 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2491 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2492 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2493 0x37, 0x20, 0x2A, 0x2F,
2495 const char *source
=(const char *)in
;
2496 const char *limit
=(const char *)in
+sizeof(in
);
2498 UErrorCode errorCode
=U_ZERO_ERROR
;
2499 UConverter
*cnv
=ucnv_open("LATIN_1", &errorCode
);
2500 if(U_FAILURE(errorCode
)) {
2501 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode
));
2504 TestNextUChar(cnv
, source
, limit
, results
, "LATIN_1");
2505 /* Test the condition when source >= sourceLimit */
2506 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2507 TestConv((uint16_t*)in1
,sizeof(in1
)/2,"LATIN_1","LATIN-1",(char*)out1
,sizeof(out1
));
2508 TestConv((uint16_t*)in2
,sizeof(in2
)/2,"ASCII","ASCII",(char*)out2
,sizeof(out2
));
2516 static const uint8_t in
[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2517 /* expected test results */
2518 static const int32_t results
[]={
2519 /* number of bytes read, code point */
2528 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2529 UErrorCode errorCode
=U_ZERO_ERROR
;
2530 UConverter
*cnv
=ucnv_open("x-mac-turkish", &errorCode
);
2531 if(U_FAILURE(errorCode
)) {
2532 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode
));
2535 TestNextUChar(cnv
, source
, limit
, results
, "SBCS(x-mac-turkish)");
2536 /* Test the condition when source >= sourceLimit */
2537 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2538 /*Test for Illegal character */ /*
2540 static const uint8_t input1[]={ 0xA1 };
2541 const char* illegalsource=(const char*)input1;
2542 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2551 static const uint8_t in
[]={
2560 /* expected test results */
2561 static const int32_t results
[]={
2562 /* number of bytes read, code point */
2570 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2571 UErrorCode errorCode
=U_ZERO_ERROR
;
2573 UConverter
*cnv
=my_ucnv_open("@ibm9027", &errorCode
);
2574 if(U_FAILURE(errorCode
)) {
2575 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode
));
2578 TestNextUChar(cnv
, source
, limit
, results
, "DBCS(@ibm9027)");
2579 /* Test the condition when source >= sourceLimit */
2580 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2581 /*Test for the condition where there is an invalid character*/
2583 static const uint8_t source2
[]={0x1a, 0x1b};
2584 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character");
2586 /*Test for the condition where we have a truncated char*/
2588 static const uint8_t source1
[]={0xc4};
2589 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2590 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2598 static const uint8_t in
[]={
2609 /* expected test results */
2610 static const int32_t results
[]={
2611 /* number of bytes read, code point */
2621 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2622 UErrorCode errorCode
=U_ZERO_ERROR
;
2624 UConverter
*cnv
=ucnv_open("ibm-1363", &errorCode
);
2625 if(U_FAILURE(errorCode
)) {
2626 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode
));
2629 TestNextUChar(cnv
, source
, limit
, results
, "MBCS(ibm-1363)");
2630 /* Test the condition when source >= sourceLimit */
2631 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2632 /*Test for the condition where there is an invalid character*/
2634 static const uint8_t source2
[]={0xa1, 0x80};
2635 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character");
2637 /*Test for the condition where we have a truncated char*/
2639 static const uint8_t source1
[]={0xc4};
2640 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2641 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2647 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2650 /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2652 const char *cnvName
= "ibm-1363";
2653 UErrorCode status
= U_ZERO_ERROR
;
2654 const char sourceData
[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2655 /* UChar expectUData[] = { 0x00a1, 0x001a }; */
2656 const char *source
= sourceData
;
2657 const char *sourceLim
= sourceData
+sizeof(sourceData
);
2659 UConverter
*cnv
=ucnv_open(cnvName
, &status
);
2660 if(U_FAILURE(status
)) {
2661 log_data_err("Unable to open %s converter: %s\n", cnvName
, u_errorName(status
));
2667 UChar targetBuf
[256];
2668 UChar
*target
= targetBuf
;
2669 UChar
*targetLim
= target
+256;
2670 ucnv_toUnicode(cnv
, &target
, targetLim
, &source
, sourceLim
, NULL
, TRUE
, &status
);
2672 log_info("After convert: target@%d, source@%d, status%s\n",
2673 target
-targetBuf
, source
-sourceData
, u_errorName(status
));
2675 if(U_FAILURE(status
)) {
2676 log_err("Failed to convert: %s\n", u_errorName(status
));
2683 c1
=ucnv_getNextUChar(cnv
, &source
, sourceLim
, &status
);
2684 log_verbose("c1: U+%04X, source@%d, status %s\n", c1
, source
-sourceData
, u_errorName(status
));
2686 c2
=ucnv_getNextUChar(cnv
, &source
, sourceLim
, &status
);
2687 log_verbose("c2: U+%04X, source@%d, status %s\n", c2
, source
-sourceData
, u_errorName(status
));
2689 c3
=ucnv_getNextUChar(cnv
, &source
, sourceLim
, &status
);
2690 log_verbose("c3: U+%04X, source@%d, status %s\n", c3
, source
-sourceData
, u_errorName(status
));
2692 if(status
==U_INDEX_OUTOFBOUNDS_ERROR
&& c3
==0xFFFF) {
2693 log_verbose("OK\n");
2695 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2703 #ifdef U_ENABLE_GENERIC_ISO_2022
2708 static const uint8_t in
[]={
2715 0xf0, 0x90, 0x80, 0x80
2720 /* expected test results */
2721 static const int32_t results
[]={
2722 /* number of bytes read, code point */
2723 4, 0x0031, /* 4 bytes including the escape sequence */
2731 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2732 UErrorCode errorCode
=U_ZERO_ERROR
;
2735 cnv
=ucnv_open("ISO_2022", &errorCode
);
2736 if(U_FAILURE(errorCode
)) {
2737 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
2740 TestNextUChar(cnv
, source
, limit
, results
, "ISO_2022");
2742 /* Test the condition when source >= sourceLimit */
2743 TestNextUCharError(cnv
, source
, source
-1, U_ILLEGAL_ARGUMENT_ERROR
, "sourceLimit < source");
2744 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2745 /*Test for the condition where we have a truncated char*/
2747 static const uint8_t source1
[]={0xc4};
2748 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2749 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2751 /*Test for the condition where there is an invalid character*/
2753 static const uint8_t source2
[]={0xa1, 0x01};
2754 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ILLEGAL_CHAR_FOUND
, "an invalid character");
2762 TestSmallTargetBuffer(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2763 const UChar
* uSource
;
2764 const UChar
* uSourceLimit
;
2765 const char* cSource
;
2766 const char* cSourceLimit
;
2767 UChar
*uTargetLimit
=NULL
;
2770 const char *cTargetLimit
;
2772 UChar
*uBuf
; /*,*test;*/
2773 int32_t uBufSize
= 120;
2776 UErrorCode errorCode
=U_ZERO_ERROR
;
2777 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2778 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
2781 uSource
= (UChar
*) source
;
2782 uSourceLimit
=(const UChar
*)sourceLimit
;
2786 cTargetLimit
= cBuf
;
2787 uTargetLimit
= uBuf
;
2791 cTargetLimit
= cTargetLimit
+ i
;
2792 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,FALSE
, &errorCode
);
2793 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2794 errorCode
=U_ZERO_ERROR
;
2798 if(U_FAILURE(errorCode
)){
2799 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2803 }while (uSource
<uSourceLimit
);
2805 cSourceLimit
=cTarget
;
2807 uTargetLimit
=uTargetLimit
+i
;
2808 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,FALSE
,&errorCode
);
2809 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2810 errorCode
=U_ZERO_ERROR
;
2813 if(U_FAILURE(errorCode
)){
2814 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2817 }while(cSource
<cSourceLimit
);
2821 for(len
=0;len
<(int)(source
- sourceLimit
);len
++){
2822 if(uBuf
[len
]!=uSource
[len
]){
2823 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource
[len
],(int)uBuf
[len
]) ;
2830 /* Test for Jitterbug 778 */
2831 static void TestToAndFromUChars(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2832 const UChar
* uSource
;
2833 const UChar
* uSourceLimit
;
2834 const char* cSource
;
2835 UChar
*uTargetLimit
=NULL
;
2838 const char *cTargetLimit
;
2841 int32_t uBufSize
= 120;
2842 int numCharsInTarget
=0;
2843 UErrorCode errorCode
=U_ZERO_ERROR
;
2844 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2845 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
2847 uSourceLimit
=sourceLimit
;
2849 cTargetLimit
= cBuf
+uBufSize
*5;
2851 uTargetLimit
= uBuf
+ uBufSize
*5;
2853 numCharsInTarget
=ucnv_fromUChars(cnv
, cTarget
, (int32_t)(cTargetLimit
-cTarget
), uSource
, (int32_t)(uSourceLimit
-uSource
), &errorCode
);
2854 if(U_FAILURE(errorCode
)){
2855 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2860 ucnv_toUChars(cnv
,uTarget
,(int32_t)(uTargetLimit
-uTarget
),cSource
,numCharsInTarget
,&errorCode
);
2861 if(U_FAILURE(errorCode
)){
2862 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode
));
2866 while(uSource
<uSourceLimit
){
2867 if(*test
!=*uSource
){
2869 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
2878 static void TestSmallSourceBuffer(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2879 const UChar
* uSource
;
2880 const UChar
* uSourceLimit
;
2881 const char* cSource
;
2882 const char* cSourceLimit
;
2883 UChar
*uTargetLimit
=NULL
;
2886 const char *cTargetLimit
;
2888 UChar
*uBuf
; /*,*test;*/
2889 int32_t uBufSize
= 120;
2892 const UChar
*temp
= sourceLimit
;
2893 UErrorCode errorCode
=U_ZERO_ERROR
;
2894 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2895 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
2899 uSource
= (UChar
*) source
;
2903 cTargetLimit
= cBuf
;
2904 uTargetLimit
= uBuf
+uBufSize
*5;
2905 cTargetLimit
= cTargetLimit
+uBufSize
*10;
2906 uSourceLimit
=uSource
;
2909 if (uSourceLimit
< sourceLimit
) {
2910 uSourceLimit
= uSourceLimit
+1;
2912 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,FALSE
, &errorCode
);
2913 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2914 errorCode
=U_ZERO_ERROR
;
2918 if(U_FAILURE(errorCode
)){
2919 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2923 }while (uSource
<temp
);
2927 if (cSourceLimit
< cBuf
+ (cTarget
- cBuf
)) {
2928 cSourceLimit
= cSourceLimit
+1;
2930 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,FALSE
,&errorCode
);
2931 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2932 errorCode
=U_ZERO_ERROR
;
2935 if(U_FAILURE(errorCode
)){
2936 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2939 }while(cSource
<cTarget
);
2943 for(;len
<(int)(source
- sourceLimit
);len
++){
2944 if(uBuf
[len
]!=uSource
[len
]){
2945 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource
[len
],(int)uBuf
[len
]) ;
2953 TestGetNextUChar2022(UConverter
* cnv
, const char* source
, const char* limit
,
2954 const uint16_t results
[], const char* message
){
2955 /* const char* s0; */
2956 const char* s
=(char*)source
;
2957 const uint16_t *r
=results
;
2958 UErrorCode errorCode
=U_ZERO_ERROR
;
2963 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
2964 if(errorCode
==U_INDEX_OUTOFBOUNDS_ERROR
) {
2965 break; /* no more significant input */
2966 } else if(U_FAILURE(errorCode
)) {
2967 log_err("%s ucnv_getNextUChar() failed: %s\n", message
, u_errorName(errorCode
));
2970 if(UTF_IS_FIRST_SURROGATE(*r
)){
2972 UTF_NEXT_CHAR_SAFE(r
, i
, len
, exC
, FALSE
);
2977 if(c
!=(uint32_t)(exC
))
2978 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message
,(uint32_t) (*r
),c
);
2984 static int TestJitterbug930(const char* enc
){
2985 UErrorCode err
= U_ZERO_ERROR
;
2986 UConverter
*converter
;
2990 const UChar
*source
= in
;
2992 int32_t* offsets
= off
;
2993 int numOffWritten
=0;
2995 converter
= my_ucnv_open(enc
, &err
);
2997 in
[0] = 0x41; /* 0x4E00;*/
3002 memset(off
, '*', sizeof(off
));
3004 ucnv_fromUnicode (converter
,
3013 /* writes three bytes into the output buffer: 41 1B 24
3014 * but offsets contains 0 1 1
3016 while(*offsets
< off
[10]){
3020 log_verbose("Testing Jitterbug 930 for encoding %s",enc
);
3021 if(numOffWritten
!= (int)(target
-out
)){
3022 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc
, (int)(target
-out
),numOffWritten
);
3027 memset(off
,'*' , sizeof(off
));
3031 ucnv_fromUnicode (converter
,
3040 while(*offsets
< off
[10]){
3043 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc
,-1,*offsets
) ;
3048 /* writes 42 43 7A into output buffer,
3049 * offsets contains -1 -1 -1
3051 ucnv_close(converter
);
3058 static const uint16_t in
[]={
3059 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3060 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3061 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3062 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3063 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3064 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3065 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3066 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3067 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3068 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3069 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3070 0x005A, 0x005B, 0x005C, 0x000A
3072 const UChar
* uSource
;
3073 const UChar
* uSourceLimit
;
3074 const char* cSource
;
3075 const char* cSourceLimit
;
3076 UChar
*uTargetLimit
=NULL
;
3079 const char *cTargetLimit
;
3082 int32_t uBufSize
= 120;
3083 UErrorCode errorCode
=U_ZERO_ERROR
;
3085 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3086 int32_t* myOff
= offsets
;
3087 cnv
=ucnv_open("HZ", &errorCode
);
3088 if(U_FAILURE(errorCode
)) {
3089 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode
));
3093 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3094 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3095 uSource
= (const UChar
*)in
;
3096 uSourceLimit
=(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0]));
3098 cTargetLimit
= cBuf
+uBufSize
*5;
3100 uTargetLimit
= uBuf
+ uBufSize
*5;
3101 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3102 if(U_FAILURE(errorCode
)){
3103 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3107 cSourceLimit
=cTarget
;
3110 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3111 if(U_FAILURE(errorCode
)){
3112 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3115 uSource
= (const UChar
*)in
;
3116 while(uSource
<uSourceLimit
){
3117 if(*test
!=*uSource
){
3119 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3124 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "HZ encoding");
3125 TestSmallTargetBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3126 TestSmallSourceBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3127 TestToAndFromUChars(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3128 TestJitterbug930("csISO2022JP");
3138 static const uint16_t in
[]={
3139 /* test full range of Devanagari */
3140 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3141 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3142 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3143 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3144 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3145 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3146 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3147 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3148 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3149 0x096D,0x096E,0x096F,
3150 /* test Soft halant*/
3151 0x0915,0x094d, 0x200D,
3152 /* test explicit halant */
3153 0x0915,0x094d, 0x200c,
3154 /* test double danda */
3157 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3158 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3159 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3160 /* tests from Lotus */
3161 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3162 0x0930,0x094D,0x200D,
3163 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3164 0x0915,0x0921,0x002B,0x095F,
3166 0x0B86, 0xB87, 0xB88,
3168 0x0C05, 0x0C02, 0x0C03,0x0c31,
3170 0x0C85, 0xC82, 0x0C83,
3171 /* test Abbr sign and Anudatta */
3181 0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3182 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3185 0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3186 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3187 0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3188 0x093D /* Avagraha 0xEA, 0xE9*/,
3196 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3198 static const unsigned char byteArr
[]={
3200 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3201 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3202 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3203 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3204 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3205 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3206 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3207 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3208 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3210 /* test soft halant */
3212 /* test explicit halant */
3214 /* test double danda */
3217 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3218 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3219 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3222 /* tests from Lotus */
3223 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3224 0xEF,0x42,0xCF,0xE8,0xD9,
3225 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3226 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3228 0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3230 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3232 0xEF, 0x48,0xa4, 0xa2, 0xa3,
3233 /* anudatta and abbreviation sign */
3234 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3237 0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3239 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3241 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3243 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3245 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3247 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3249 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3251 0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3253 0xB3, 0xE9, /* Ka + NUKTA */
3255 0xB4, 0xE9, /* Kha + NUKTA */
3257 0xB5, 0xE9, /* Ga + NUKTA */
3269 /* just consume unhandled codepoints */
3273 testConvertToU(byteArr
,(sizeof(byteArr
)),in
,(sizeof(in
)/U_SIZEOF_UCHAR
),"x-iscii-de",NULL
,TRUE
);
3274 TestConv(in
,(sizeof(in
)/2),"ISCII,version=0","hindi", (char *)byteArr
,sizeof(byteArr
));
3281 static const uint16_t in
[]={
3282 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3283 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3284 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3285 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3286 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3287 0x201D, 0x3014, 0x000D, 0x000A,
3288 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3289 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3291 const UChar
* uSource
;
3292 const UChar
* uSourceLimit
;
3293 const char* cSource
;
3294 const char* cSourceLimit
;
3295 UChar
*uTargetLimit
=NULL
;
3298 const char *cTargetLimit
;
3301 int32_t uBufSize
= 120;
3302 UErrorCode errorCode
=U_ZERO_ERROR
;
3304 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3305 int32_t* myOff
= offsets
;
3306 cnv
=ucnv_open("ISO_2022_JP_1", &errorCode
);
3307 if(U_FAILURE(errorCode
)) {
3308 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode
));
3312 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3313 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3314 uSource
= (const UChar
*)in
;
3315 uSourceLimit
=(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0]));
3317 cTargetLimit
= cBuf
+uBufSize
*5;
3319 uTargetLimit
= uBuf
+ uBufSize
*5;
3320 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3321 if(U_FAILURE(errorCode
)){
3322 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3326 cSourceLimit
=cTarget
;
3329 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3330 if(U_FAILURE(errorCode
)){
3331 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3335 uSource
= (const UChar
*)in
;
3336 while(uSource
<uSourceLimit
){
3337 if(*test
!=*uSource
){
3339 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3345 TestSmallTargetBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3346 TestSmallSourceBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3347 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-JP encoding");
3348 TestToAndFromUChars(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3349 TestJitterbug930("csISO2022JP");
3356 static void TestConv(const uint16_t in
[],int len
, const char* conv
, const char* lang
, char byteArr
[],int byteArrLen
){
3357 const UChar
* uSource
;
3358 const UChar
* uSourceLimit
;
3359 const char* cSource
;
3360 const char* cSourceLimit
;
3361 UChar
*uTargetLimit
=NULL
;
3364 const char *cTargetLimit
;
3367 int32_t uBufSize
= 120*10;
3368 UErrorCode errorCode
=U_ZERO_ERROR
;
3370 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) );
3371 int32_t* myOff
= offsets
;
3372 cnv
=my_ucnv_open(conv
, &errorCode
);
3373 if(U_FAILURE(errorCode
)) {
3374 log_data_err("Unable to open a %s converter: %s\n", conv
, u_errorName(errorCode
));
3378 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
));
3379 cBuf
=(char*)malloc(uBufSize
* sizeof(char));
3380 uSource
= (const UChar
*)in
;
3381 uSourceLimit
=uSource
+len
;
3383 cTargetLimit
= cBuf
+uBufSize
;
3385 uTargetLimit
= uBuf
+ uBufSize
;
3386 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3387 if(U_FAILURE(errorCode
)){
3388 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3391 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3393 cSourceLimit
=cTarget
;
3396 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3397 if(U_FAILURE(errorCode
)){
3398 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode
));
3402 uSource
= (const UChar
*)in
;
3403 while(uSource
<uSourceLimit
){
3404 if(*test
!=*uSource
){
3405 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv
,*uSource
,(int)*test
) ;
3410 TestSmallTargetBuffer(in
,(const UChar
*)&in
[len
],cnv
);
3411 TestSmallSourceBuffer(in
,(const UChar
*)&in
[len
],cnv
);
3412 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, conv
);
3413 if(byteArr
&& byteArrLen
!=0){
3414 TestGetNextUChar2022(cnv
, byteArr
, (byteArr
+byteArrLen
), in
, lang
);
3415 TestToAndFromUChars(in
,(const UChar
*)&in
[len
],cnv
);
3418 cSourceLimit
= cSource
+byteArrLen
;
3421 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3422 if(U_FAILURE(errorCode
)){
3423 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3427 uSource
= (const UChar
*)in
;
3428 while(uSource
<uSourceLimit
){
3429 if(*test
!=*uSource
){
3430 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3443 static UChar U_CALLCONV
3444 _charAt(int32_t offset
, void *context
) {
3445 return ((char*)context
)[offset
];
3449 unescape(UChar
* dst
, int32_t dstLen
,const char* src
,int32_t srcLen
,UErrorCode
*status
){
3452 if(U_FAILURE(*status
)){
3455 if((dst
==NULL
&& dstLen
>0) || (src
==NULL
) || dstLen
< -1 || srcLen
<-1 ){
3456 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
3460 srcLen
= (int32_t)uprv_strlen(src
);
3463 for (; srcIndex
<srcLen
; ) {
3464 UChar32 c
= src
[srcIndex
++];
3465 if (c
== 0x005C /*'\\'*/) {
3466 c
= u_unescapeAt(_charAt
,&srcIndex
,srcLen
,(void*)src
); /* advances i*/
3467 if (c
== (UChar32
)0xFFFFFFFF) {
3468 *status
=U_INVALID_CHAR_FOUND
; /* return empty string */
3469 break; /* invalid escape sequence */
3472 if(dstIndex
< dstLen
){
3474 dst
[dstIndex
++] = UTF16_LEAD(c
);
3475 if(dstIndex
<dstLen
){
3476 dst
[dstIndex
]=UTF16_TRAIL(c
);
3478 *status
=U_BUFFER_OVERFLOW_ERROR
;
3481 dst
[dstIndex
]=(UChar
)c
;
3485 *status
= U_BUFFER_OVERFLOW_ERROR
;
3487 dstIndex
++; /* for preflighting */
3493 TestFullRoundtrip(const char* cp
){
3494 UChar usource
[10] ={0};
3495 UChar nsrc
[10] = {0};
3499 /* Test codepoint 0 */
3500 TestConv(usource
,1,cp
,"",NULL
,0);
3501 TestConv(usource
,2,cp
,"",NULL
,0);
3503 TestConv(nsrc
,3,cp
,"",NULL
,0);
3505 for(;i
<=0x10FFFF;i
++){
3511 usource
[0] =(UChar
) i
;
3514 usource
[0]=UTF16_LEAD(i
);
3515 usource
[1]=UTF16_TRAIL(i
);
3522 /* Test only single code points */
3523 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3524 /* Test codepoint repeated twice */
3525 usource
[ulen
]=usource
[0];
3526 usource
[ulen
+1]=usource
[1];
3528 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3529 /* Test codepoint repeated 3 times */
3530 usource
[ulen
]=usource
[0];
3531 usource
[ulen
+1]=usource
[1];
3533 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3534 /* Test codepoint in between 2 codepoints */
3538 TestConv(nsrc
,len
+2,cp
,"",NULL
,0);
3539 uprv_memset(usource
,0,sizeof(UChar
)*10);
3544 TestRoundTrippingAllUTF(void){
3545 if(!getTestOption(QUICK_OPTION
)){
3546 log_verbose("Running exhaustive round trip test for BOCU-1\n");
3547 TestFullRoundtrip("BOCU-1");
3548 log_verbose("Running exhaustive round trip test for SCSU\n");
3549 TestFullRoundtrip("SCSU");
3550 log_verbose("Running exhaustive round trip test for UTF-8\n");
3551 TestFullRoundtrip("UTF-8");
3552 log_verbose("Running exhaustive round trip test for CESU-8\n");
3553 TestFullRoundtrip("CESU-8");
3554 log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3555 TestFullRoundtrip("UTF-16BE");
3556 log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3557 TestFullRoundtrip("UTF-16LE");
3558 log_verbose("Running exhaustive round trip test for UTF-16\n");
3559 TestFullRoundtrip("UTF-16");
3560 log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3561 TestFullRoundtrip("UTF-32BE");
3562 log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3563 TestFullRoundtrip("UTF-32LE");
3564 log_verbose("Running exhaustive round trip test for UTF-32\n");
3565 TestFullRoundtrip("UTF-32");
3566 log_verbose("Running exhaustive round trip test for UTF-7\n");
3567 TestFullRoundtrip("UTF-7");
3568 log_verbose("Running exhaustive round trip test for UTF-7\n");
3569 TestFullRoundtrip("UTF-7,version=1");
3570 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3571 TestFullRoundtrip("IMAP-mailbox-name");
3572 log_verbose("Running exhaustive round trip test for GB18030\n");
3573 TestFullRoundtrip("GB18030");
3580 static const uint16_t germanUTF16
[]={
3581 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3584 static const uint8_t germanSCSU
[]={
3585 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3588 static const uint16_t russianUTF16
[]={
3589 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3592 static const uint8_t russianSCSU
[]={
3593 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3596 static const uint16_t japaneseUTF16
[]={
3597 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3598 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3599 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3600 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3601 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3602 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3603 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3604 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3605 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3606 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3607 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3608 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3609 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3610 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3611 0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3614 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3615 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3616 static const uint8_t japaneseSCSU
[]={
3617 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3618 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3619 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3620 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3621 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3622 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3623 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3624 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3625 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3626 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3627 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3631 static const uint16_t allFeaturesUTF16
[]={
3632 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3633 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3634 0x01df, 0xf000, 0xdbff, 0xdfff
3637 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3638 * result here (34B vs. 35B)
3640 static const uint8_t allFeaturesSCSU
[]={
3641 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3642 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3643 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3644 0xdf, 0x14, 0x80, 0x15, 0xff
3646 static const uint16_t monkeyIn
[]={
3647 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3648 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3649 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3650 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3651 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3652 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3653 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3654 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3655 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3656 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3657 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3658 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3659 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3660 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3661 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3662 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3663 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3664 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3665 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3666 /* test non-BMP code points */
3667 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3668 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3669 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3670 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3671 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3672 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3673 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3674 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3675 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3676 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3677 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3680 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3681 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3682 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3683 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3684 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3686 static const char *fTestCases
[] = {
3687 "\\ud800\\udc00", /* smallest surrogate*/
3689 "\\udBff\\udFff", /* largest surrogate pair*/
3692 "Hello \\u9292 \\u9192 World!",
3693 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3694 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3696 "\\u0648\\u06c8", /* catch missing reset*/
3699 "\\u4444\\uE001", /* lowest quotable*/
3700 "\\u4444\\uf2FF", /* highest quotable*/
3701 "\\u4444\\uf188\\u4444",
3702 "\\u4444\\uf188\\uf288",
3703 "\\u4444\\uf188abc\\u0429\\uf288",
3705 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3706 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3707 "Hello World!123456",
3708 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3710 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/
3711 "abc\\u4411d", /* uses SQU*/
3712 "abc\\u4411\\u4412d",/* uses SCU*/
3713 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3714 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3716 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3717 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3718 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3720 "", /* empty input*/
3721 "\\u0000", /* smallest BMP character*/
3722 "\\uFFFF", /* largest BMP character*/
3724 /* regression tests*/
3725 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3726 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3727 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3728 "\\u0041\\u00df\\u0401\\u015f",
3729 "\\u9066\\u2123abc",
3730 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3731 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3734 for(;i
<sizeof(fTestCases
)/sizeof(*fTestCases
);i
++){
3735 const char* cSrc
= fTestCases
[i
];
3736 UErrorCode status
= U_ZERO_ERROR
;
3737 int32_t cSrcLen
,srcLen
;
3739 /* UConverter* cnv = ucnv_open("SCSU",&status); */
3740 cSrcLen
= srcLen
= (int32_t)uprv_strlen(fTestCases
[i
]);
3741 src
= (UChar
*) malloc((sizeof(UChar
) * srcLen
) + sizeof(UChar
));
3742 srcLen
=unescape(src
,srcLen
,cSrc
,cSrcLen
,&status
);
3743 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc
,i
);
3744 TestConv(src
,srcLen
,"SCSU","Coverage",NULL
,0);
3747 TestConv(allFeaturesUTF16
,(sizeof(allFeaturesUTF16
)/2),"SCSU","all features", (char *)allFeaturesSCSU
,sizeof(allFeaturesSCSU
));
3748 TestConv(allFeaturesUTF16
,(sizeof(allFeaturesUTF16
)/2),"SCSU","all features",(char *)allFeaturesSCSU
,sizeof(allFeaturesSCSU
));
3749 TestConv(japaneseUTF16
,(sizeof(japaneseUTF16
)/2),"SCSU","japaneese",(char *)japaneseSCSU
,sizeof(japaneseSCSU
));
3750 TestConv(japaneseUTF16
,(sizeof(japaneseUTF16
)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU
,sizeof(japaneseSCSU
));
3751 TestConv(germanUTF16
,(sizeof(germanUTF16
)/2),"SCSU","german",(char *)germanSCSU
,sizeof(germanSCSU
));
3752 TestConv(russianUTF16
,(sizeof(russianUTF16
)/2), "SCSU","russian",(char *)russianSCSU
,sizeof(russianSCSU
));
3753 TestConv(monkeyIn
,(sizeof(monkeyIn
)/2),"SCSU","monkey",NULL
,0);
3756 #if !UCONFIG_NO_LEGACY_CONVERSION
3757 static void TestJitterbug2346(){
3758 char source
[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3759 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3760 uint16_t expected
[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3762 UChar uTarget
[500]={'\0'};
3763 UChar
* utarget
=uTarget
;
3764 UChar
* utargetLimit
=uTarget
+sizeof(uTarget
)/2;
3766 char cTarget
[500]={'\0'};
3767 char* ctarget
=cTarget
;
3768 char* ctargetLimit
=cTarget
+sizeof(cTarget
);
3769 const char* csource
=source
;
3770 UChar
* temp
= expected
;
3771 UErrorCode err
=U_ZERO_ERROR
;
3773 UConverter
* conv
=ucnv_open("ISO_2022_JP",&err
);
3774 if(U_FAILURE(err
)) {
3775 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err
));
3778 ucnv_toUnicode(conv
,&utarget
,utargetLimit
,&csource
,csource
+sizeof(source
),NULL
,TRUE
,&err
);
3779 if(U_FAILURE(err
)) {
3780 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err
));
3783 utargetLimit
=utarget
;
3785 while(utarget
<utargetLimit
){
3786 if(*temp
!=*utarget
){
3788 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget
,(int)*temp
) ;
3793 ucnv_fromUnicode(conv
,&ctarget
,ctargetLimit
,(const UChar
**)&utarget
,utargetLimit
,NULL
,TRUE
,&err
);
3794 if(U_FAILURE(err
)) {
3795 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err
));
3798 ctargetLimit
=ctarget
;
3806 TestISO_2022_JP_1() {
3808 static const uint16_t in
[]={
3809 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3810 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3811 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3812 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3813 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3814 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3815 0x201D, 0x000D, 0x000A,
3816 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3817 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3818 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3819 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3820 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3821 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3823 const UChar
* uSource
;
3824 const UChar
* uSourceLimit
;
3825 const char* cSource
;
3826 const char* cSourceLimit
;
3827 UChar
*uTargetLimit
=NULL
;
3830 const char *cTargetLimit
;
3833 int32_t uBufSize
= 120;
3834 UErrorCode errorCode
=U_ZERO_ERROR
;
3837 cnv
=ucnv_open("ISO_2022_JP_1", &errorCode
);
3838 if(U_FAILURE(errorCode
)) {
3839 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
3843 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3844 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3845 uSource
= (const UChar
*)in
;
3846 uSourceLimit
=(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0]));
3848 cTargetLimit
= cBuf
+uBufSize
*5;
3850 uTargetLimit
= uBuf
+ uBufSize
*5;
3851 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,TRUE
, &errorCode
);
3852 if(U_FAILURE(errorCode
)){
3853 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3857 cSourceLimit
=cTarget
;
3859 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,TRUE
,&errorCode
);
3860 if(U_FAILURE(errorCode
)){
3861 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3864 uSource
= (const UChar
*)in
;
3865 while(uSource
<uSourceLimit
){
3866 if(*test
!=*uSource
){
3868 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3874 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3875 /*Test for the condition where there is an invalid character*/
3878 static const uint8_t source2
[]={0x0e,0x24,0x053};
3879 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-JP-1]");
3881 TestSmallTargetBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3882 TestSmallSourceBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3889 TestISO_2022_JP_2() {
3891 static const uint16_t in
[]={
3892 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3893 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3894 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3895 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3896 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3897 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3898 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3899 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3900 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3901 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3902 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3903 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3904 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3905 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3906 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3907 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3908 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3909 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3910 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3912 const UChar
* uSource
;
3913 const UChar
* uSourceLimit
;
3914 const char* cSource
;
3915 const char* cSourceLimit
;
3916 UChar
*uTargetLimit
=NULL
;
3919 const char *cTargetLimit
;
3922 int32_t uBufSize
= 120;
3923 UErrorCode errorCode
=U_ZERO_ERROR
;
3925 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3926 int32_t* myOff
= offsets
;
3927 cnv
=ucnv_open("ISO_2022_JP_2", &errorCode
);
3928 if(U_FAILURE(errorCode
)) {
3929 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
3933 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3934 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3935 uSource
= (const UChar
*)in
;
3936 uSourceLimit
=(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0]));
3938 cTargetLimit
= cBuf
+uBufSize
*5;
3940 uTargetLimit
= uBuf
+ uBufSize
*5;
3941 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3942 if(U_FAILURE(errorCode
)){
3943 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3947 cSourceLimit
=cTarget
;
3950 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3951 if(U_FAILURE(errorCode
)){
3952 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3955 uSource
= (const UChar
*)in
;
3956 while(uSource
<uSourceLimit
){
3957 if(*test
!=*uSource
){
3959 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3964 TestSmallTargetBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3965 TestSmallSourceBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3966 TestToAndFromUChars(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
3967 /*Test for the condition where there is an invalid character*/
3970 static const uint8_t source2
[]={0x0e,0x24,0x053};
3971 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-JP-2]");
3982 static const uint16_t in
[]={
3983 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
3984 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
3985 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3986 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3987 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
3988 ,0x53E3,0x53E4,0x000A,0x000D};
3989 const UChar
* uSource
;
3990 const UChar
* uSourceLimit
;
3991 const char* cSource
;
3992 const char* cSourceLimit
;
3993 UChar
*uTargetLimit
=NULL
;
3996 const char *cTargetLimit
;
3999 int32_t uBufSize
= 120;
4000 UErrorCode errorCode
=U_ZERO_ERROR
;
4002 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4003 int32_t* myOff
= offsets
;
4004 cnv
=ucnv_open("ISO_2022,locale=kr", &errorCode
);
4005 if(U_FAILURE(errorCode
)) {
4006 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4010 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4011 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
4012 uSource
= (const UChar
*)in
;
4013 uSourceLimit
=(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0]));
4015 cTargetLimit
= cBuf
+uBufSize
*5;
4017 uTargetLimit
= uBuf
+ uBufSize
*5;
4018 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4019 if(U_FAILURE(errorCode
)){
4020 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4024 cSourceLimit
=cTarget
;
4027 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4028 if(U_FAILURE(errorCode
)){
4029 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4032 uSource
= (const UChar
*)in
;
4033 while(uSource
<uSourceLimit
){
4034 if(*test
!=*uSource
){
4035 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,*test
) ;
4040 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-KR encoding");
4041 TestSmallTargetBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
4042 TestSmallSourceBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
4043 TestToAndFromUChars(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
4044 TestJitterbug930("csISO2022KR");
4045 /*Test for the condition where there is an invalid character*/
4048 static const uint8_t source2
[]={0x1b,0x24,0x053};
4049 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
4050 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ILLEGAL_ESCAPE_SEQUENCE
, "an invalid character [ISO-2022-KR]");
4059 TestISO_2022_KR_1() {
4061 static const uint16_t in
[]={
4062 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4063 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4064 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4065 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4066 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4067 ,0x53E3,0x53E4,0x000A,0x000D};
4068 const UChar
* uSource
;
4069 const UChar
* uSourceLimit
;
4070 const char* cSource
;
4071 const char* cSourceLimit
;
4072 UChar
*uTargetLimit
=NULL
;
4075 const char *cTargetLimit
;
4078 int32_t uBufSize
= 120;
4079 UErrorCode errorCode
=U_ZERO_ERROR
;
4081 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4082 int32_t* myOff
= offsets
;
4083 cnv
=ucnv_open("ibm-25546", &errorCode
);
4084 if(U_FAILURE(errorCode
)) {
4085 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4089 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4090 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
4091 uSource
= (const UChar
*)in
;
4092 uSourceLimit
=(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0]));
4094 cTargetLimit
= cBuf
+uBufSize
*5;
4096 uTargetLimit
= uBuf
+ uBufSize
*5;
4097 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4098 if(U_FAILURE(errorCode
)){
4099 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4103 cSourceLimit
=cTarget
;
4106 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4107 if(U_FAILURE(errorCode
)){
4108 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4111 uSource
= (const UChar
*)in
;
4112 while(uSource
<uSourceLimit
){
4113 if(*test
!=*uSource
){
4114 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,*test
) ;
4120 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-KR encoding");
4121 TestSmallTargetBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
4122 TestSmallSourceBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
4124 TestToAndFromUChars(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
4125 /*Test for the condition where there is an invalid character*/
4128 static const uint8_t source2
[]={0x1b,0x24,0x053};
4129 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
4130 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ILLEGAL_ESCAPE_SEQUENCE
, "an invalid character [ISO-2022-KR]");
4138 static void TestJitterbug2411(){
4139 static const char* source
= "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4140 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4141 UConverter
* kr
=NULL
, *kr1
=NULL
;
4142 UErrorCode errorCode
= U_ZERO_ERROR
;
4143 UChar tgt
[100]={'\0'};
4144 UChar
* target
= tgt
;
4145 UChar
* targetLimit
= target
+100;
4146 kr
=ucnv_open("iso-2022-kr", &errorCode
);
4147 if(U_FAILURE(errorCode
)) {
4148 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode
));
4151 ucnv_toUnicode(kr
,&target
,targetLimit
,&source
,source
+uprv_strlen(source
),NULL
,TRUE
,&errorCode
);
4152 if(U_FAILURE(errorCode
)) {
4153 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode
));
4156 kr1
= ucnv_open("ibm-25546", &errorCode
);
4157 if(U_FAILURE(errorCode
)) {
4158 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode
));
4162 targetLimit
= target
+100;
4163 ucnv_toUnicode(kr
,&target
,targetLimit
,&source
,source
+uprv_strlen(source
),NULL
,TRUE
,&errorCode
);
4165 if(U_FAILURE(errorCode
)) {
4166 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode
));
4177 /* From Unicode moved to testdata/conversion.txt */
4180 static const uint8_t sampleTextJIS
[] = {
4181 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4182 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4183 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4185 static const uint16_t expectedISO2022JIS
[] = {
4190 static const int32_t toISO2022JISOffs
[]={
4196 static const uint8_t sampleTextJIS7
[] = {
4197 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4198 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4199 0x1b,0x24,0x42,0x21,0x21,
4200 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */
4202 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4204 static const uint16_t expectedISO2022JIS7
[] = {
4212 static const int32_t toISO2022JIS7Offs
[]={
4219 static const uint8_t sampleTextJIS8
[] = {
4220 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4221 0xa1,0xc8,0xd9,/*Katakana Set*/
4224 0xb1,0xc3, /*Katakana Set*/
4225 0x1b,0x24,0x42,0x21,0x21
4227 static const uint16_t expectedISO2022JIS8
[] = {
4229 0xff61, 0xff88, 0xff99,
4234 static const int32_t toISO2022JIS8Offs
[]={
4240 testConvertToU(sampleTextJIS
,sizeof(sampleTextJIS
),expectedISO2022JIS
,
4241 sizeof(expectedISO2022JIS
)/sizeof(expectedISO2022JIS
[0]),"JIS", toISO2022JISOffs
,TRUE
);
4242 testConvertToU(sampleTextJIS7
,sizeof(sampleTextJIS7
),expectedISO2022JIS7
,
4243 sizeof(expectedISO2022JIS7
)/sizeof(expectedISO2022JIS7
[0]),"JIS7", toISO2022JIS7Offs
,TRUE
);
4244 testConvertToU(sampleTextJIS8
,sizeof(sampleTextJIS8
),expectedISO2022JIS8
,
4245 sizeof(expectedISO2022JIS8
)/sizeof(expectedISO2022JIS8
[0]),"JIS8", toISO2022JIS8Offs
,TRUE
);
4252 ICU
4.4 (ticket
#7314) removes mappings for CNS 11643 planes 3..7
4254 static void TestJitterbug915(){
4255 /* tests for roundtripping of the below sequence
4256 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * /
4257 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4258 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4259 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4260 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4261 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4262 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4264 static const char cSource
[]={
4265 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4266 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4267 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4268 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4269 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4270 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4271 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4272 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4273 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4274 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4275 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4276 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4277 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4278 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4279 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4280 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4281 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4282 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4283 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4284 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4285 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4286 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4287 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4288 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4289 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4290 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4291 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4292 0x37, 0x20, 0x2A, 0x2F
4294 UChar uTarget
[500]={'\0'};
4295 UChar
* utarget
=uTarget
;
4296 UChar
* utargetLimit
=uTarget
+sizeof(uTarget
)/2;
4298 char cTarget
[500]={'\0'};
4299 char* ctarget
=cTarget
;
4300 char* ctargetLimit
=cTarget
+sizeof(cTarget
);
4301 const char* csource
=cSource
;
4302 const char* tempSrc
= cSource
;
4303 UErrorCode err
=U_ZERO_ERROR
;
4305 UConverter
* conv
=ucnv_open("ISO_2022_CN_EXT",&err
);
4306 if(U_FAILURE(err
)) {
4307 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err
));
4310 ucnv_toUnicode(conv
,&utarget
,utargetLimit
,&csource
,csource
+sizeof(cSource
),NULL
,TRUE
,&err
);
4311 if(U_FAILURE(err
)) {
4312 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err
));
4315 utargetLimit
=utarget
;
4317 ucnv_fromUnicode(conv
,&ctarget
,ctargetLimit
,(const UChar
**)&utarget
,utargetLimit
,NULL
,TRUE
,&err
);
4318 if(U_FAILURE(err
)) {
4319 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err
));
4322 ctargetLimit
=ctarget
;
4324 while(ctarget
<ctargetLimit
){
4325 if(*ctarget
!= *tempSrc
){
4326 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget
-cTarget
), *ctarget
,(int)*tempSrc
) ;
4336 TestISO_2022_CN_EXT() {
4338 static const uint16_t in
[]={
4339 /* test Non-BMP code points */
4340 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4341 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4342 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4343 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4344 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4345 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4346 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4347 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4348 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4351 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4352 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4353 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4354 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4355 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4356 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4357 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4358 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4359 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4360 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4361 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4362 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4363 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4364 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4365 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4366 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4367 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4368 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4370 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4374 const UChar
* uSource
;
4375 const UChar
* uSourceLimit
;
4376 const char* cSource
;
4377 const char* cSourceLimit
;
4378 UChar
*uTargetLimit
=NULL
;
4381 const char *cTargetLimit
;
4384 int32_t uBufSize
= 180;
4385 UErrorCode errorCode
=U_ZERO_ERROR
;
4387 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4388 int32_t* myOff
= offsets
;
4389 cnv
=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode
);
4390 if(U_FAILURE(errorCode
)) {
4391 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4395 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4396 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
4397 uSource
= (const UChar
*)in
;
4398 uSourceLimit
=(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0]));
4400 cTargetLimit
= cBuf
+uBufSize
*5;
4402 uTargetLimit
= uBuf
+ uBufSize
*5;
4403 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4404 if(U_FAILURE(errorCode
)){
4405 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4409 cSourceLimit
=cTarget
;
4412 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4413 if(U_FAILURE(errorCode
)){
4414 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4417 uSource
= (const UChar
*)in
;
4418 while(uSource
<uSourceLimit
){
4419 if(*test
!=*uSource
){
4420 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
4423 log_verbose(" Got: \\u%04X\n",(int)*test
) ;
4428 TestSmallTargetBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
4429 TestSmallSourceBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
4430 /*Test for the condition where there is an invalid character*/
4433 static const uint8_t source2
[]={0x0e,0x24,0x053};
4434 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-CN-EXT]");
4446 static const uint16_t in
[]={
4448 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4449 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4450 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4451 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4452 0x0020, 0x0045, 0x004e, 0x0044,
4454 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4455 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4456 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4457 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4458 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4459 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4460 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4461 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4462 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4463 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4464 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4465 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4466 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4467 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4468 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4469 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4470 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4473 const UChar
* uSource
;
4474 const UChar
* uSourceLimit
;
4475 const char* cSource
;
4476 const char* cSourceLimit
;
4477 UChar
*uTargetLimit
=NULL
;
4480 const char *cTargetLimit
;
4483 int32_t uBufSize
= 180;
4484 UErrorCode errorCode
=U_ZERO_ERROR
;
4486 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4487 int32_t* myOff
= offsets
;
4488 cnv
=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode
);
4489 if(U_FAILURE(errorCode
)) {
4490 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4494 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4495 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
4496 uSource
= (const UChar
*)in
;
4497 uSourceLimit
=(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0]));
4499 cTargetLimit
= cBuf
+uBufSize
*5;
4501 uTargetLimit
= uBuf
+ uBufSize
*5;
4502 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4503 if(U_FAILURE(errorCode
)){
4504 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4508 cSourceLimit
=cTarget
;
4511 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4512 if(U_FAILURE(errorCode
)){
4513 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4516 uSource
= (const UChar
*)in
;
4517 while(uSource
<uSourceLimit
){
4518 if(*test
!=*uSource
){
4519 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
4522 log_verbose(" Got: \\u%04X\n",(int)*test
) ;
4527 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-CN encoding");
4528 TestSmallTargetBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
4529 TestSmallSourceBuffer(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
4530 TestToAndFromUChars(in
,(const UChar
*)in
+ (sizeof(in
)/sizeof(in
[0])),cnv
);
4531 TestJitterbug930("csISO2022CN");
4532 /*Test for the condition where there is an invalid character*/
4535 static const uint8_t source2
[]={0x0e,0x24,0x053};
4536 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-CN]");
4545 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4547 const char * converterName
;
4548 const char * inputText
;
4549 int inputTextLength
;
4552 /* Callback for TestJitterbug6175, should only get called for empty segment errors */
4553 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context
, UConverterToUnicodeArgs
*toArgs
, const char* codeUnits
,
4554 int32_t length
, UConverterCallbackReason reason
, UErrorCode
* err
) {
4555 if (reason
> UCNV_IRREGULAR
) {
4558 if (reason
!= UCNV_IRREGULAR
) {
4559 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4561 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4562 *err
= U_ZERO_ERROR
;
4563 ucnv_cbToUWriteSub(toArgs
,0,err
);
4566 enum { kEmptySegmentToUCharsMax
= 64 };
4567 static void TestJitterbug6175(void) {
4568 static const char iso2022jp_a
[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4569 static const char iso2022kr_a
[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4570 static const char iso2022cn_a
[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4571 static const char iso2022cn_b
[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4572 static const char hzGB2312_a
[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4573 static const EmptySegmentTest emptySegmentTests
[] = {
4574 /* converterName inputText inputTextLength */
4575 { "ISO-2022-JP", iso2022jp_a
, sizeof(iso2022jp_a
) },
4576 { "ISO-2022-KR", iso2022kr_a
, sizeof(iso2022kr_a
) },
4577 { "ISO-2022-CN", iso2022cn_a
, sizeof(iso2022cn_a
) },
4578 { "ISO-2022-CN", iso2022cn_b
, sizeof(iso2022cn_b
) },
4579 { "HZ-GB-2312", hzGB2312_a
, sizeof(hzGB2312_a
) },
4583 const EmptySegmentTest
* testPtr
;
4584 for (testPtr
= emptySegmentTests
; testPtr
->converterName
!= NULL
; ++testPtr
) {
4585 UErrorCode err
= U_ZERO_ERROR
;
4586 UConverter
* cnv
= ucnv_open(testPtr
->converterName
, &err
);
4587 if (U_FAILURE(err
)) {
4588 log_data_err("Unable to open %s converter: %s\n", testPtr
->converterName
, u_errorName(err
));
4591 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_EMPTYSEGMENT
, NULL
, NULL
, NULL
, &err
);
4592 if (U_FAILURE(err
)) {
4593 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr
->converterName
, u_errorName(err
));
4598 UChar toUChars
[kEmptySegmentToUCharsMax
];
4599 UChar
* toUCharsPtr
= toUChars
;
4600 const UChar
* toUCharsLimit
= toUCharsPtr
+ kEmptySegmentToUCharsMax
;
4601 const char * inCharsPtr
= testPtr
->inputText
;
4602 const char * inCharsLimit
= inCharsPtr
+ testPtr
->inputTextLength
;
4603 ucnv_toUnicode(cnv
, &toUCharsPtr
, toUCharsLimit
, &inCharsPtr
, inCharsLimit
, NULL
, TRUE
, &err
);
4610 TestEBCDIC_STATEFUL() {
4612 static const uint8_t in
[]={
4621 /* expected test results */
4622 static const int32_t results
[]={
4623 /* number of bytes read, code point */
4632 static const uint8_t in2
[]={
4638 /* expected test results */
4639 static const int32_t results2
[]={
4640 /* number of bytes read, code point */
4645 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
4646 UErrorCode errorCode
=U_ZERO_ERROR
;
4647 UConverter
*cnv
=ucnv_open("ibm-930", &errorCode
);
4648 if(U_FAILURE(errorCode
)) {
4649 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode
));
4652 TestNextUChar(cnv
, source
, limit
, results
, "EBCDIC_STATEFUL(ibm-930)");
4654 /* Test the condition when source >= sourceLimit */
4655 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
4657 /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4659 static const uint8_t source1
[]={0x0f};
4660 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_INDEX_OUTOFBOUNDS_ERROR
, "a character is truncated");
4662 /*Test for the condition where there is an invalid character*/
4665 static const uint8_t source2
[]={0x0e, 0x7F, 0xFF};
4666 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [EBCDIC STATEFUL]");
4669 source
=(const char*)in2
;
4670 limit
=(const char*)in2
+sizeof(in2
);
4671 TestNextUChar(cnv
,source
,limit
,results2
,"EBCDIC_STATEFUL(ibm-930),seq#2");
4679 static const uint8_t in
[]={
4682 0x81, 0x30, 0x81, 0x30,
4686 0x82, 0x35, 0x8f, 0x33,
4687 0x84, 0x31, 0xa4, 0x39,
4688 0x90, 0x30, 0x81, 0x30,
4689 0xe3, 0x32, 0x9a, 0x35
4692 * Feature removed markus 2000-oct-26
4693 * Only some codepages must match surrogate pairs into supplementary code points -
4694 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4695 * GB 18030 provides direct encodings for supplementary code points, therefore
4696 * it must not combine two single-encoded surrogates into one code point.
4698 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4702 /* expected test results */
4703 static const int32_t results
[]={
4704 /* number of bytes read, code point */
4716 /* Feature removed. See comment above. */
4721 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4722 UErrorCode errorCode
=U_ZERO_ERROR
;
4723 UConverter
*cnv
=ucnv_open("gb18030", &errorCode
);
4724 if(U_FAILURE(errorCode
)) {
4725 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode
));
4728 TestNextUChar(cnv
, (const char *)in
, (const char *)in
+sizeof(in
), results
, "gb18030");
4734 /* LMBCS-1 string */
4735 static const uint8_t pszLMBCS
[]={
4744 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4748 /* Unicode UChar32 equivalents */
4749 static const UChar32 pszUnicode32
[]={
4759 0x00023456, /* code point for surrogate pair */
4763 /* Unicode UChar equivalents */
4764 static const UChar pszUnicode
[]={
4774 0xD84D, /* low surrogate */
4775 0xDC56, /* high surrogate */
4779 /* expected test results */
4780 static const int offsets32
[]={
4781 /* number of bytes read, code point */
4795 /* expected test results */
4796 static const int offsets
[]={
4797 /* number of bytes read, code point */
4815 #define NAME_LMBCS_1 "LMBCS-1"
4816 #define NAME_LMBCS_2 "LMBCS-2"
4819 /* Some basic open/close/property tests on some LMBCS converters */
4822 char expected_subchars
[] = {0x3F}; /* ANSI Question Mark */
4823 char new_subchars
[] = {0x7F}; /* subst char used by SmartSuite..*/
4824 char get_subchars
[1];
4825 const char * get_name
;
4829 int8_t len
= sizeof(get_subchars
);
4831 UErrorCode errorCode
=U_ZERO_ERROR
;
4834 cnv1
=ucnv_open(NAME_LMBCS_1
, &errorCode
);
4835 if(U_FAILURE(errorCode
)) {
4836 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
4839 cnv2
=ucnv_open(NAME_LMBCS_2
, &errorCode
);
4840 if(U_FAILURE(errorCode
)) {
4841 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode
));
4846 get_name
= ucnv_getName (cnv1
, &errorCode
);
4847 if (strcmp(NAME_LMBCS_1
,get_name
)){
4848 log_err("Unexpected converter name: %s\n", get_name
);
4850 get_name
= ucnv_getName (cnv2
, &errorCode
);
4851 if (strcmp(NAME_LMBCS_2
,get_name
)){
4852 log_err("Unexpected converter name: %s\n", get_name
);
4855 /* substitution chars */
4856 ucnv_getSubstChars (cnv1
, get_subchars
, &len
, &errorCode
);
4857 if(U_FAILURE(errorCode
)) {
4858 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode
));
4861 log_err("Unexpected length of sub chars\n");
4863 if (get_subchars
[0] != expected_subchars
[0]){
4864 log_err("Unexpected value of sub chars\n");
4866 ucnv_setSubstChars (cnv2
,new_subchars
, len
, &errorCode
);
4867 if(U_FAILURE(errorCode
)) {
4868 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode
));
4870 ucnv_getSubstChars (cnv2
, get_subchars
, &len
, &errorCode
);
4871 if(U_FAILURE(errorCode
)) {
4872 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode
));
4875 log_err("Unexpected length of sub chars\n");
4877 if (get_subchars
[0] != new_subchars
[0]){
4878 log_err("Unexpected value of sub chars\n");
4885 /* LMBCS to Unicode - offsets */
4887 UErrorCode errorCode
=U_ZERO_ERROR
;
4889 const char * pSource
= (const char *)pszLMBCS
;
4890 const char * sourceLimit
= (const char *)pszLMBCS
+ sizeof(pszLMBCS
);
4892 UChar Out
[sizeof(pszUnicode
) + 1];
4894 UChar
* OutLimit
= Out
+ sizeof(pszUnicode
)/sizeof(UChar
);
4896 int32_t off
[sizeof(offsets
)];
4898 /* last 'offset' in expected results is just the final size.
4899 (Makes other tests easier). Compensate here: */
4901 off
[(sizeof(offsets
)/sizeof(offsets
[0]))-1] = sizeof(pszLMBCS
);
4905 cnv
=ucnv_open("lmbcs", &errorCode
); /* use generic name for LMBCS-1 */
4906 if(U_FAILURE(errorCode
)) {
4907 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode
));
4913 ucnv_toUnicode (cnv
,
4923 if (memcmp(off
,offsets
,sizeof(offsets
)))
4925 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4927 if (memcmp(Out
,pszUnicode
,sizeof(pszUnicode
)))
4929 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4934 /* LMBCS to Unicode - getNextUChar */
4935 const char * sourceStart
;
4936 const char *source
=(const char *)pszLMBCS
;
4937 const char *limit
=(const char *)pszLMBCS
+sizeof(pszLMBCS
);
4938 const UChar32
*results
= pszUnicode32
;
4939 const int *off
= offsets32
;
4941 UErrorCode errorCode
=U_ZERO_ERROR
;
4944 cnv
=ucnv_open("LMBCS-1", &errorCode
);
4945 if(U_FAILURE(errorCode
)) {
4946 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
4952 while(source
<limit
) {
4954 uniChar
=ucnv_getNextUChar(cnv
, &source
, source
+ (off
[1] - off
[0]), &errorCode
);
4955 if(U_FAILURE(errorCode
)) {
4956 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode
));
4958 } else if(source
-sourceStart
!= off
[1] - off
[0] || uniChar
!= *results
) {
4959 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4960 uniChar
, (source
-sourceStart
), *results
, *off
);
4969 { /* test locale & optimization group operations: Unicode to LMBCS */
4971 UErrorCode errorCode
=U_ZERO_ERROR
;
4972 UConverter
*cnv16he
= ucnv_open("LMBCS-16,locale=he", &errorCode
);
4973 UConverter
*cnv16jp
= ucnv_open("LMBCS-16,locale=ja_JP", &errorCode
);
4974 UConverter
*cnv01us
= ucnv_open("LMBCS-1,locale=us_EN", &errorCode
);
4975 UChar uniString
[] = {0x0192}; /* Latin Small letter f with hook */
4976 const UChar
* pUniOut
= uniString
;
4977 UChar
* pUniIn
= uniString
;
4978 uint8_t lmbcsString
[4];
4979 const char * pLMBCSOut
= (const char *)lmbcsString
;
4980 char * pLMBCSIn
= (char *)lmbcsString
;
4982 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
4983 ucnv_fromUnicode (cnv16he
,
4984 &pLMBCSIn
, (pLMBCSIn
+ sizeof(lmbcsString
)/sizeof(lmbcsString
[0])),
4985 &pUniOut
, pUniOut
+ sizeof(uniString
)/sizeof(uniString
[0]),
4986 NULL
, 1, &errorCode
);
4988 if (lmbcsString
[0] != 0x3 || lmbcsString
[1] != 0x83)
4990 log_err("LMBCS-16,locale=he gives unexpected translation\n");
4993 pLMBCSIn
= (char *)lmbcsString
;
4994 pUniOut
= uniString
;
4995 ucnv_fromUnicode (cnv01us
,
4996 &pLMBCSIn
, (const char *)(lmbcsString
+ sizeof(lmbcsString
)/sizeof(lmbcsString
[0])),
4997 &pUniOut
, pUniOut
+ sizeof(uniString
)/sizeof(uniString
[0]),
4998 NULL
, 1, &errorCode
);
5000 if (lmbcsString
[0] != 0x9F)
5002 log_err("LMBCS-1,locale=US gives unexpected translation\n");
5005 /* single byte char from mbcs char set */
5006 lmbcsString
[0] = 0xAE; /* 1/2 width katakana letter small Yo */
5007 pLMBCSOut
= (const char *)lmbcsString
;
5009 ucnv_toUnicode (cnv16jp
,
5010 &pUniIn
, pUniIn
+ 1,
5011 &pLMBCSOut
, (pLMBCSOut
+ 1),
5012 NULL
, 1, &errorCode
);
5013 if (U_FAILURE(errorCode
) || pLMBCSOut
!= (const char *)lmbcsString
+1 || pUniIn
!= uniString
+1 || uniString
[0] != 0xFF6E)
5015 log_err("Unexpected results from LMBCS-16 single byte char\n");
5017 /* convert to group 1: should be 3 bytes */
5018 pLMBCSIn
= (char *)lmbcsString
;
5019 pUniOut
= uniString
;
5020 ucnv_fromUnicode (cnv01us
,
5021 &pLMBCSIn
, (const char *)(pLMBCSIn
+ 3),
5022 &pUniOut
, pUniOut
+ 1,
5023 NULL
, 1, &errorCode
);
5024 if (U_FAILURE(errorCode
) || pLMBCSIn
!= (const char *)lmbcsString
+3 || pUniOut
!= uniString
+1
5025 || lmbcsString
[0] != 0x10 || lmbcsString
[1] != 0x10 || lmbcsString
[2] != 0xAE)
5027 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5029 pLMBCSOut
= (const char *)lmbcsString
;
5031 ucnv_toUnicode (cnv01us
,
5032 &pUniIn
, pUniIn
+ 1,
5033 &pLMBCSOut
, (const char *)(pLMBCSOut
+ 3),
5034 NULL
, 1, &errorCode
);
5035 if (U_FAILURE(errorCode
) || pLMBCSOut
!= (const char *)lmbcsString
+3 || pUniIn
!= uniString
+1 || uniString
[0] != 0xFF6E)
5037 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5039 pLMBCSIn
= (char *)lmbcsString
;
5040 pUniOut
= uniString
;
5041 ucnv_fromUnicode (cnv16jp
,
5042 &pLMBCSIn
, (const char *)(pLMBCSIn
+ 1),
5043 &pUniOut
, pUniOut
+ 1,
5044 NULL
, 1, &errorCode
);
5045 if (U_FAILURE(errorCode
) || pLMBCSIn
!= (const char *)lmbcsString
+1 || pUniOut
!= uniString
+1 || lmbcsString
[0] != 0xAE)
5047 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5049 ucnv_close(cnv16he
);
5050 ucnv_close(cnv16jp
);
5051 ucnv_close(cnv01us
);
5054 /* Small source buffer testing, LMBCS -> Unicode */
5056 UErrorCode errorCode
=U_ZERO_ERROR
;
5058 const char * pSource
= (const char *)pszLMBCS
;
5059 const char * sourceLimit
= (const char *)pszLMBCS
+ sizeof(pszLMBCS
);
5060 int codepointCount
= 0;
5062 UChar Out
[sizeof(pszUnicode
) + 1];
5064 UChar
* OutLimit
= Out
+ sizeof(pszUnicode
)/sizeof(UChar
);
5067 cnv
= ucnv_open(NAME_LMBCS_1
, &errorCode
);
5068 if(U_FAILURE(errorCode
)) {
5069 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
5074 while ((pSource
< sourceLimit
) && U_SUCCESS (errorCode
))
5076 ucnv_toUnicode (cnv
,
5080 (pSource
+1), /* claim that this is a 1- byte buffer */
5082 FALSE
, /* FALSE means there might be more chars in the next buffer */
5085 if (U_SUCCESS (errorCode
))
5087 if ((pSource
- (const char *)pszLMBCS
) == offsets
[codepointCount
+1])
5089 /* we are on to the next code point: check value */
5091 if (Out
[0] != pszUnicode
[codepointCount
]){
5092 log_err("LMBCS->Uni result %lx should have been %lx \n",
5093 Out
[0], pszUnicode
[codepointCount
]);
5096 pOut
= Out
; /* reset for accumulating next code point */
5102 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode
));
5106 /* limits & surrogate error testing */
5107 char LIn
[sizeof(pszLMBCS
)];
5108 const char * pLIn
= LIn
;
5110 char LOut
[sizeof(pszLMBCS
)];
5111 char * pLOut
= LOut
;
5113 UChar UOut
[sizeof(pszUnicode
)];
5114 UChar
* pUOut
= UOut
;
5116 UChar UIn
[sizeof(pszUnicode
)];
5117 const UChar
* pUIn
= UIn
;
5119 int32_t off
[sizeof(offsets
)];
5122 errorCode
=U_ZERO_ERROR
;
5124 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5126 ucnv_fromUnicode(cnv
, &pLOut
, pLOut
+1, &pUIn
, pUIn
-1, off
, FALSE
, &errorCode
);
5127 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
5129 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode
));
5133 errorCode
=U_ZERO_ERROR
;
5134 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+1,(const char **)&pLIn
,(const char *)(pLIn
-1),off
,FALSE
, &errorCode
);
5135 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
5137 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode
));
5139 errorCode
=U_ZERO_ERROR
;
5141 uniChar
= ucnv_getNextUChar(cnv
, (const char **)&pLIn
, (const char *)(pLIn
-1), &errorCode
);
5142 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
5144 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode
));
5146 errorCode
=U_ZERO_ERROR
;
5148 /* 0 byte source request - no error, no pointer movement */
5149 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+1,(const char **)&pLIn
,(const char *)pLIn
,off
,FALSE
, &errorCode
);
5150 ucnv_fromUnicode(cnv
, &pLOut
,pLOut
+1,&pUIn
,pUIn
,off
,FALSE
, &errorCode
);
5151 if(U_FAILURE(errorCode
)) {
5152 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode
));
5154 if ((pUOut
!= UOut
) || (pUIn
!= UIn
) || (pLOut
!= LOut
) || (pLIn
!= LIn
))
5156 log_err("Unexpected pointer move in 0 byte source request \n");
5158 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5159 uniChar
= ucnv_getNextUChar(cnv
, (const char **)&pLIn
, (const char *)pLIn
, &errorCode
);
5160 if (errorCode
!= U_INDEX_OUTOFBOUNDS_ERROR
)
5162 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode
));
5164 if (((uint32_t)uniChar
- 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5166 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5168 errorCode
= U_ZERO_ERROR
;
5170 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5173 ucnv_fromUnicode(cnv
, &pLOut
,pLOut
+offsets
[4],&pUIn
,pUIn
+sizeof(pszUnicode
)/sizeof(UChar
),off
,FALSE
, &errorCode
);
5174 if (errorCode
!= U_BUFFER_OVERFLOW_ERROR
|| pLOut
!= LOut
+ offsets
[4] || pUIn
!= pszUnicode
+4 )
5176 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5179 errorCode
= U_ZERO_ERROR
;
5181 pLIn
= (const char *)pszLMBCS
;
5182 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+4,&pLIn
,(pLIn
+sizeof(pszLMBCS
)),off
,FALSE
, &errorCode
);
5183 if (errorCode
!= U_BUFFER_OVERFLOW_ERROR
|| pUOut
!= UOut
+ 4 || pLIn
!= (const char *)pszLMBCS
+offsets
[4])
5185 log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5188 /* unpaired or chopped LMBCS surrogates */
5190 /* OK high surrogate, Low surrogate is chopped */
5191 LIn
[0] = (char)0x14;
5192 LIn
[1] = (char)0xD8;
5193 LIn
[2] = (char)0x01;
5194 LIn
[3] = (char)0x14;
5195 LIn
[4] = (char)0xDC;
5197 errorCode
= U_ZERO_ERROR
;
5200 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
5201 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
5202 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 5)
5204 log_err("Unexpected results on chopped low surrogate\n");
5207 /* chopped at surrogate boundary */
5208 LIn
[0] = (char)0x14;
5209 LIn
[1] = (char)0xD8;
5210 LIn
[2] = (char)0x01;
5212 errorCode
= U_ZERO_ERROR
;
5215 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+3),off
,TRUE
, &errorCode
);
5216 if (UOut
[0] != 0xD801 || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 3)
5218 log_err("Unexpected results on chopped at surrogate boundary \n");
5221 /* unpaired surrogate plus valid Unichar */
5222 LIn
[0] = (char)0x14;
5223 LIn
[1] = (char)0xD8;
5224 LIn
[2] = (char)0x01;
5225 LIn
[3] = (char)0x14;
5226 LIn
[4] = (char)0xC9;
5227 LIn
[5] = (char)0xD0;
5229 errorCode
= U_ZERO_ERROR
;
5232 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+6),off
,TRUE
, &errorCode
);
5233 if (UOut
[0] != 0xD801 || UOut
[1] != 0xC9D0 || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 2 || pLIn
!= LIn
+ 6)
5235 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5238 /* unpaired surrogate plus chopped Unichar */
5239 LIn
[0] = (char)0x14;
5240 LIn
[1] = (char)0xD8;
5241 LIn
[2] = (char)0x01;
5242 LIn
[3] = (char)0x14;
5243 LIn
[4] = (char)0xC9;
5246 errorCode
= U_ZERO_ERROR
;
5249 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
5250 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 5)
5252 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5255 /* unpaired surrogate plus valid non-Unichar */
5256 LIn
[0] = (char)0x14;
5257 LIn
[1] = (char)0xD8;
5258 LIn
[2] = (char)0x01;
5259 LIn
[3] = (char)0x0F;
5260 LIn
[4] = (char)0x3B;
5263 errorCode
= U_ZERO_ERROR
;
5266 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
5267 if (UOut
[0] != 0xD801 || UOut
[1] != 0x1B || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 2 || pLIn
!= LIn
+ 5)
5269 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5272 /* unpaired surrogate plus chopped non-Unichar */
5273 LIn
[0] = (char)0x14;
5274 LIn
[1] = (char)0xD8;
5275 LIn
[2] = (char)0x01;
5276 LIn
[3] = (char)0x0F;
5279 errorCode
= U_ZERO_ERROR
;
5282 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+sizeof(UOut
)/sizeof(UChar
),(const char **)&pLIn
,(const char *)(pLIn
+4),off
,TRUE
, &errorCode
);
5284 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 4)
5286 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5290 ucnv_close(cnv
); /* final cleanup */
5294 static void TestJitterbug255()
5296 static const uint8_t testBytes
[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5297 const char *testBuffer
= (const char *)testBytes
;
5298 const char *testEnd
= (const char *)testBytes
+ sizeof(testBytes
);
5299 UErrorCode status
= U_ZERO_ERROR
;
5301 UConverter
*cnv
= 0;
5303 cnv
= ucnv_open("shift-jis", &status
);
5304 if (U_FAILURE(status
) || cnv
== 0) {
5305 log_data_err("Failed to open the converter for SJIS.\n");
5308 while (testBuffer
!= testEnd
)
5310 /*result = */ucnv_getNextUChar (cnv
, &testBuffer
, testEnd
, &status
);
5311 if (U_FAILURE(status
))
5313 log_err("Failed to convert the next UChar for SJIS.\n");
5320 static void TestEBCDICUS4XML()
5322 UChar unicodes_x
[] = {0x0000, 0x0000, 0x0000, 0x0000};
5323 static const UChar toUnicodeMaps_x
[] = {0x000A, 0x000A, 0x000D, 0x0000};
5324 static const char fromUnicodeMaps_x
[] = {0x25, 0x25, 0x0D, 0x00};
5325 static const char newLines_x
[] = {0x25, 0x15, 0x0D, 0x00};
5326 char target_x
[] = {0x00, 0x00, 0x00, 0x00};
5327 UChar
*unicodes
= unicodes_x
;
5328 const UChar
*toUnicodeMaps
= toUnicodeMaps_x
;
5329 char *target
= target_x
;
5330 const char* fromUnicodeMaps
= fromUnicodeMaps_x
, *newLines
= newLines_x
;
5331 UErrorCode status
= U_ZERO_ERROR
;
5332 UConverter
*cnv
= 0;
5334 cnv
= ucnv_open("ebcdic-xml-us", &status
);
5335 if (U_FAILURE(status
) || cnv
== 0) {
5336 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5339 ucnv_toUnicode(cnv
, &unicodes
, unicodes
+3, (const char**)&newLines
, newLines
+3, NULL
, TRUE
, &status
);
5340 if (U_FAILURE(status
) || memcmp(unicodes_x
, toUnicodeMaps
, sizeof(UChar
)*3) != 0) {
5341 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5342 u_errorName(status
));
5343 printUSeqErr(unicodes_x
, 3);
5344 printUSeqErr(toUnicodeMaps
, 3);
5346 status
= U_ZERO_ERROR
;
5347 ucnv_fromUnicode(cnv
, &target
, target
+3, (const UChar
**)&toUnicodeMaps
, toUnicodeMaps
+3, NULL
, TRUE
, &status
);
5348 if (U_FAILURE(status
) || memcmp(target_x
, fromUnicodeMaps
, sizeof(char)*3) != 0) {
5349 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5350 u_errorName(status
));
5351 printSeqErr((const unsigned char*)target_x
, 3);
5352 printSeqErr((const unsigned char*)fromUnicodeMaps
, 3);
5356 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5358 #if !UCONFIG_NO_COLLATION
5360 static void TestJitterbug981(){
5362 int32_t rules_length
, target_cap
, bytes_needed
, buff_size
;
5363 UErrorCode status
= U_ZERO_ERROR
;
5364 UConverter
*utf8cnv
;
5365 UCollator
* myCollator
;
5368 utf8cnv
= ucnv_open ("utf8", &status
);
5369 if(U_FAILURE(status
)){
5370 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status
));
5373 myCollator
= ucol_open("zh", &status
);
5374 if(U_FAILURE(status
)){
5375 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status
));
5376 ucnv_close(utf8cnv
);
5380 rules
= ucol_getRules(myCollator
, &rules_length
);
5381 buff_size
= rules_length
* ucnv_getMaxCharSize(utf8cnv
);
5382 buff
= malloc(buff_size
);
5386 ucnv_reset(utf8cnv
);
5387 status
= U_ZERO_ERROR
;
5388 if(target_cap
>= buff_size
) {
5389 log_err("wanted %d bytes, only %d available\n", target_cap
, buff_size
);
5392 bytes_needed
= ucnv_fromUChars(utf8cnv
, buff
, target_cap
,
5393 rules
, rules_length
, &status
);
5394 target_cap
= (bytes_needed
> target_cap
) ? bytes_needed
: target_cap
+1;
5395 if(numNeeded
!=0 && numNeeded
!= bytes_needed
){
5396 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5399 numNeeded
= bytes_needed
;
5400 } while (status
== U_BUFFER_OVERFLOW_ERROR
);
5401 ucol_close(myCollator
);
5402 ucnv_close(utf8cnv
);
5408 static void TestJitterbug1293(){
5409 static const UChar src
[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5411 UErrorCode status
= U_ZERO_ERROR
;
5412 UConverter
* conv
=NULL
;
5413 int32_t target_cap
, bytes_needed
, numNeeded
= 0;
5414 conv
= ucnv_open("shift-jis",&status
);
5415 if(U_FAILURE(status
)){
5416 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status
));
5422 bytes_needed
= ucnv_fromUChars(conv
,target
,256,src
,u_strlen(src
),&status
);
5423 target_cap
= (bytes_needed
> target_cap
) ? bytes_needed
: target_cap
+1;
5424 if(numNeeded
!=0 && numNeeded
!= bytes_needed
){
5425 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5427 numNeeded
= bytes_needed
;
5428 } while (status
== U_BUFFER_OVERFLOW_ERROR
);
5429 if(U_FAILURE(status
)){
5430 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status
));
5435 static void TestJB5275_1(){
5437 static const char* data
= "\x3B\xB3\x0A" /* Easy characters */
5438 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5439 /* Switch script: */
5440 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5441 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5442 "\xEF\x40\x3B\xB3\x0A";
5443 static const UChar expected
[] ={
5444 0x003b, 0x0a15, 0x000a, /* Easy characters */
5445 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5446 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5447 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5448 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5451 UErrorCode status
= U_ZERO_ERROR
;
5452 UConverter
* conv
= ucnv_open("iscii-gur", &status
);
5453 UChar dest
[100] = {'\0'};
5454 UChar
* target
= dest
;
5455 UChar
* targetLimit
= dest
+100;
5456 const char* source
= data
;
5457 const char* sourceLimit
= data
+strlen(data
);
5458 const UChar
* exp
= expected
;
5460 if (U_FAILURE(status
)) {
5461 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status
));
5465 log_verbose("Testing switching back to default script when new line is encountered.\n");
5466 ucnv_toUnicode(conv
, &target
, targetLimit
, &source
, sourceLimit
, NULL
, TRUE
, &status
);
5467 if(U_FAILURE(status
)){
5468 log_err("conversion failed: %s \n", u_errorName(status
));
5470 targetLimit
= target
;
5472 printUSeq(target
, targetLimit
-target
);
5473 while(target
<targetLimit
){
5475 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp
, *target
);
5483 static void TestJB5275(){
5484 static const char* data
=
5485 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */
5486 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */
5487 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */
5488 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5489 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */
5490 "\xEF\x48\x38\xB3\x0A" /* Kannada test */
5491 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */
5492 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */
5493 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */
5494 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */;
5495 static const UChar expected
[] ={
5496 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5497 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */
5498 0x0038, 0x0C95, 0x000A, /* Kannada test */
5499 0x0039, 0x0D15, 0x000A, /* Malayalam test */
5500 0x003A, 0x0A95, 0x000A, /* Gujarati test */
5501 0x003B, 0x0A15, 0x000A, /* Punjabi test */
5504 UErrorCode status
= U_ZERO_ERROR
;
5505 UConverter
* conv
= ucnv_open("iscii", &status
);
5506 UChar dest
[100] = {'\0'};
5507 UChar
* target
= dest
;
5508 UChar
* targetLimit
= dest
+100;
5509 const char* source
= data
;
5510 const char* sourceLimit
= data
+strlen(data
);
5511 const UChar
* exp
= expected
;
5512 ucnv_toUnicode(conv
, &target
, targetLimit
, &source
, sourceLimit
, NULL
, TRUE
, &status
);
5513 if(U_FAILURE(status
)){
5514 log_err("conversion failed: %s \n", u_errorName(status
));
5516 targetLimit
= target
;
5519 printUSeq(target
, targetLimit
-target
);
5521 while(target
<targetLimit
){
5523 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp
, *target
);