1 /********************************************************************
3 * Copyright (c) 1997-2016, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /*******************************************************************************
10 * Modification History:
12 * Steven R. Loomis 7/8/1999 Adding input buffer test
13 ********************************************************************************
17 #include "unicode/uloc.h"
18 #include "unicode/ucnv.h"
19 #include "unicode/ucnv_err.h"
20 #include "unicode/ucnv_cb.h"
22 #include "unicode/utypes.h"
23 #include "unicode/ustring.h"
24 #include "unicode/ucol.h"
25 #include "unicode/utf16.h"
29 static void TestNextUChar(UConverter
* cnv
, const char* source
, const char* limit
, const int32_t results
[], const char* message
);
30 static void TestNextUCharError(UConverter
* cnv
, const char* source
, const char* limit
, UErrorCode expected
, const char* message
);
31 #if !UCONFIG_NO_COLLATION
32 static void TestJitterbug981(void);
34 #if !UCONFIG_NO_LEGACY_CONVERSION
35 static void TestJitterbug1293(void);
37 static void TestNewConvertWithBufferSizes(int32_t osize
, int32_t isize
) ;
38 static void TestConverterTypesAndStarters(void);
39 static void TestAmbiguous(void);
40 static void TestSignatureDetection(void);
41 static void TestUTF7(void);
42 static void TestIMAP(void);
43 static void TestUTF8(void);
44 static void TestCESU8(void);
45 static void TestUTF16(void);
46 static void TestUTF16BE(void);
47 static void TestUTF16LE(void);
48 static void TestUTF32(void);
49 static void TestUTF32BE(void);
50 static void TestUTF32LE(void);
51 static void TestLATIN1(void);
53 #if !UCONFIG_NO_LEGACY_CONVERSION
54 static void TestSBCS(void);
55 static void TestDBCS(void);
56 static void TestMBCS(void);
57 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
58 static void TestICCRunout(void);
61 #ifdef U_ENABLE_GENERIC_ISO_2022
62 static void TestISO_2022(void);
65 static void TestISO_2022_JP(void);
66 static void TestISO_2022_JP_1(void);
67 static void TestISO_2022_JP_2(void);
68 static void TestISO_2022_KR(void);
69 static void TestISO_2022_KR_1(void);
70 static void TestISO_2022_CN(void);
73 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
75 static void TestISO_2022_CN_EXT(void);
77 static void TestJIS(void);
78 static void TestHZ(void);
81 static void TestSCSU(void);
83 #if !UCONFIG_NO_LEGACY_CONVERSION
84 static void TestEBCDIC_STATEFUL(void);
85 static void TestGB18030(void);
86 static void TestLMBCS(void);
87 static void TestJitterbug255(void);
88 static void TestEBCDICUS4XML(void);
91 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
93 static void TestJitterbug915(void);
95 static void TestISCII(void);
97 static void TestCoverageMBCS(void);
98 static void TestJitterbug2346(void);
99 static void TestJitterbug2411(void);
100 static void TestJB5275(void);
101 static void TestJB5275_1(void);
102 static void TestJitterbug6175(void);
104 static void TestIsFixedWidth(void);
107 static void TestInBufSizes(void);
109 static void TestRoundTrippingAllUTF(void);
110 static void TestConv(const uint16_t in
[],
117 /* open a converter, using test data if it begins with '@' */
118 static UConverter
*my_ucnv_open(const char *cnv
, UErrorCode
*err
);
121 #define NEW_MAX_BUFFER 999
123 static int32_t gInBufferSize
= NEW_MAX_BUFFER
;
124 static int32_t gOutBufferSize
= NEW_MAX_BUFFER
;
125 static char gNuConvTestName
[1024];
127 #define nct_min(x,y) ((x<y) ? x : y)
129 static UConverter
*my_ucnv_open(const char *cnv
, UErrorCode
*err
)
131 if(cnv
&& cnv
[0] == '@') {
132 return ucnv_openPackage(loadTestData(err
), cnv
+1, err
);
134 return ucnv_open(cnv
, err
);
138 static void printSeq(const unsigned char* a
, int len
)
143 log_verbose("0x%02x ", a
[i
++]);
147 static void printUSeq(const UChar
* a
, int len
)
151 while (i
<len
) log_verbose("0x%04x ", a
[i
++]);
155 static void printSeqErr(const unsigned char* a
, int len
)
158 fprintf(stderr
, "{");
160 fprintf(stderr
, "0x%02x ", a
[i
++]);
161 fprintf(stderr
, "}\n");
164 static void printUSeqErr(const UChar
* a
, int len
)
167 fprintf(stderr
, "{U+");
169 fprintf(stderr
, "0x%04x ", a
[i
++]);
170 fprintf(stderr
,"}\n");
174 TestNextUChar(UConverter
* cnv
, const char* source
, const char* limit
, const int32_t results
[], const char* message
)
177 const char* s
=(char*)source
;
178 const int32_t *r
=results
;
179 UErrorCode errorCode
=U_ZERO_ERROR
;
184 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
185 if(errorCode
==U_INDEX_OUTOFBOUNDS_ERROR
) {
186 break; /* no more significant input */
187 } else if(U_FAILURE(errorCode
)) {
188 log_err("%s ucnv_getNextUChar() failed: %s\n", message
, u_errorName(errorCode
));
191 /* test the expected number of input bytes only if >=0 */
192 (*r
>=0 && (int32_t)(s
-s0
)!=*r
) ||
195 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
196 message
, c
, (s
-s0
), *(r
+1), *r
);
204 TestNextUCharError(UConverter
* cnv
, const char* source
, const char* limit
, UErrorCode expected
, const char* message
)
206 const char* s
=(char*)source
;
207 UErrorCode errorCode
=U_ZERO_ERROR
;
209 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
210 if(errorCode
!= expected
){
211 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected
), message
, myErrorName(errorCode
));
213 if(c
!= 0xFFFD && c
!= 0xffff){
214 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message
, c
);
219 static void TestInBufSizes(void)
221 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,1);
223 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,2);
224 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,3);
225 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,4);
226 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,5);
227 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,6);
228 TestNewConvertWithBufferSizes(1,1);
229 TestNewConvertWithBufferSizes(2,3);
230 TestNewConvertWithBufferSizes(3,2);
234 static void TestOutBufSizes(void)
237 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER
,NEW_MAX_BUFFER
);
238 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER
);
239 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER
);
240 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER
);
241 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER
);
242 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER
);
248 void addTestNewConvert(TestNode
** root
)
250 #if !UCONFIG_NO_FILE_IO
251 addTest(root
, &TestInBufSizes
, "tsconv/nucnvtst/TestInBufSizes");
252 addTest(root
, &TestOutBufSizes
, "tsconv/nucnvtst/TestOutBufSizes");
254 addTest(root
, &TestConverterTypesAndStarters
, "tsconv/nucnvtst/TestConverterTypesAndStarters");
255 addTest(root
, &TestAmbiguous
, "tsconv/nucnvtst/TestAmbiguous");
256 addTest(root
, &TestSignatureDetection
, "tsconv/nucnvtst/TestSignatureDetection");
257 addTest(root
, &TestUTF7
, "tsconv/nucnvtst/TestUTF7");
258 addTest(root
, &TestIMAP
, "tsconv/nucnvtst/TestIMAP");
259 addTest(root
, &TestUTF8
, "tsconv/nucnvtst/TestUTF8");
261 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
262 addTest(root
, &TestCESU8
, "tsconv/nucnvtst/TestCESU8");
263 addTest(root
, &TestUTF16
, "tsconv/nucnvtst/TestUTF16");
264 addTest(root
, &TestUTF16BE
, "tsconv/nucnvtst/TestUTF16BE");
265 addTest(root
, &TestUTF16LE
, "tsconv/nucnvtst/TestUTF16LE");
266 addTest(root
, &TestUTF32
, "tsconv/nucnvtst/TestUTF32");
267 addTest(root
, &TestUTF32BE
, "tsconv/nucnvtst/TestUTF32BE");
268 addTest(root
, &TestUTF32LE
, "tsconv/nucnvtst/TestUTF32LE");
270 #if !UCONFIG_NO_LEGACY_CONVERSION
271 addTest(root
, &TestLMBCS
, "tsconv/nucnvtst/TestLMBCS");
274 addTest(root
, &TestLATIN1
, "tsconv/nucnvtst/TestLATIN1");
276 #if !UCONFIG_NO_LEGACY_CONVERSION
277 addTest(root
, &TestSBCS
, "tsconv/nucnvtst/TestSBCS");
278 #if !UCONFIG_NO_FILE_IO
279 addTest(root
, &TestDBCS
, "tsconv/nucnvtst/TestDBCS");
280 addTest(root
, &TestICCRunout
, "tsconv/nucnvtst/TestICCRunout");
282 addTest(root
, &TestMBCS
, "tsconv/nucnvtst/TestMBCS");
284 #ifdef U_ENABLE_GENERIC_ISO_2022
285 addTest(root
, &TestISO_2022
, "tsconv/nucnvtst/TestISO_2022");
288 addTest(root
, &TestISO_2022_JP
, "tsconv/nucnvtst/TestISO_2022_JP");
289 addTest(root
, &TestJIS
, "tsconv/nucnvtst/TestJIS");
290 addTest(root
, &TestISO_2022_JP_1
, "tsconv/nucnvtst/TestISO_2022_JP_1");
291 addTest(root
, &TestISO_2022_JP_2
, "tsconv/nucnvtst/TestISO_2022_JP_2");
292 addTest(root
, &TestISO_2022_KR
, "tsconv/nucnvtst/TestISO_2022_KR");
293 addTest(root
, &TestISO_2022_KR_1
, "tsconv/nucnvtst/TestISO_2022_KR_1");
294 addTest(root
, &TestISO_2022_CN
, "tsconv/nucnvtst/TestISO_2022_CN");
296 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
297 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
298 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
300 addTest(root
, &TestHZ
, "tsconv/nucnvtst/TestHZ");
303 addTest(root
, &TestSCSU
, "tsconv/nucnvtst/TestSCSU");
305 #if !UCONFIG_NO_LEGACY_CONVERSION
306 addTest(root
, &TestEBCDIC_STATEFUL
, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
307 addTest(root
, &TestGB18030
, "tsconv/nucnvtst/TestGB18030");
308 addTest(root
, &TestJitterbug255
, "tsconv/nucnvtst/TestJitterbug255");
309 addTest(root
, &TestEBCDICUS4XML
, "tsconv/nucnvtst/TestEBCDICUS4XML");
310 addTest(root
, &TestISCII
, "tsconv/nucnvtst/TestISCII");
311 addTest(root
, &TestJB5275
, "tsconv/nucnvtst/TestJB5275");
312 addTest(root
, &TestJB5275_1
, "tsconv/nucnvtst/TestJB5275_1");
313 #if !UCONFIG_NO_COLLATION
314 addTest(root
, &TestJitterbug981
, "tsconv/nucnvtst/TestJitterbug981");
317 addTest(root
, &TestJitterbug1293
, "tsconv/nucnvtst/TestJitterbug1293");
321 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
322 addTest(root
, &TestCoverageMBCS
, "tsconv/nucnvtst/TestCoverageMBCS");
325 addTest(root
, &TestRoundTrippingAllUTF
, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
327 #if !UCONFIG_NO_LEGACY_CONVERSION
328 addTest(root
, &TestJitterbug2346
, "tsconv/nucnvtst/TestJitterbug2346");
329 addTest(root
, &TestJitterbug2411
, "tsconv/nucnvtst/TestJitterbug2411");
330 addTest(root
, &TestJitterbug6175
, "tsconv/nucnvtst/TestJitterbug6175");
332 addTest(root
, &TestIsFixedWidth
, "tsconv/nucnvtst/TestIsFixedWidth");
337 /* Note that this test already makes use of statics, so it's not really
339 This convenience function lets us make the error messages actually useful.
342 static void setNuConvTestName(const char *codepage
, const char *direction
)
344 sprintf(gNuConvTestName
, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
348 (int)gOutBufferSize
);
353 TC_OK
= 0, /* test was OK */
354 TC_MISMATCH
= 1, /* Match failed - err was printed */
355 TC_FAIL
= 2 /* Test failed, don't print an err because it was already printed. */
356 } ETestConvertResult
;
358 /* Note: This function uses global variables and it will not do offset
359 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
360 static ETestConvertResult
testConvertFromU( const UChar
*source
, int sourceLen
, const uint8_t *expect
, int expectLen
,
361 const char *codepage
, const int32_t *expectOffsets
, UBool useFallback
)
363 UErrorCode status
= U_ZERO_ERROR
;
364 UConverter
*conv
= 0;
365 char junkout
[NEW_MAX_BUFFER
]; /* FIX */
366 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
373 int32_t realBufferSize
;
375 const UChar
*realSourceEnd
;
376 const UChar
*sourceLimit
;
377 UBool checkOffsets
= TRUE
;
380 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
381 junkout
[i
] = (char)0xF0;
382 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
385 setNuConvTestName(codepage
, "FROM");
387 log_verbose("\n========= %s\n", gNuConvTestName
);
389 conv
= my_ucnv_open(codepage
, &status
);
391 if(U_FAILURE(status
))
393 log_data_err("Couldn't open converter %s\n",codepage
);
397 ucnv_setFallback(conv
,useFallback
);
400 log_verbose("Converter opened..\n");
406 realBufferSize
= UPRV_LENGTHOF(junkout
);
407 realBufferEnd
= junkout
+ realBufferSize
;
408 realSourceEnd
= source
+ sourceLen
;
410 if ( gOutBufferSize
!= realBufferSize
|| gInBufferSize
!= NEW_MAX_BUFFER
)
411 checkOffsets
= FALSE
;
415 end
= nct_min(targ
+ gOutBufferSize
, realBufferEnd
);
416 sourceLimit
= nct_min(src
+ gInBufferSize
, realSourceEnd
);
418 doFlush
= (UBool
)(sourceLimit
== realSourceEnd
);
420 if(targ
== realBufferEnd
) {
421 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ
, gNuConvTestName
);
424 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src
,sourceLimit
, targ
,end
, doFlush
?"TRUE":"FALSE");
427 status
= U_ZERO_ERROR
;
429 ucnv_fromUnicode (conv
,
434 checkOffsets
? offs
: NULL
,
435 doFlush
, /* flush if we're at the end of the input data */
437 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && sourceLimit
< realSourceEnd
) );
439 if(U_FAILURE(status
)) {
440 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage
, myErrorName(status
), gNuConvTestName
);
444 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
445 sourceLen
, targ
-junkout
);
447 if(getTestOption(VERBOSITY_OPTION
))
450 char offset_str
[9999];
455 for(ptr
= junkout
;ptr
<targ
;ptr
++) {
456 sprintf(junk
+ strlen(junk
), "0x%02x, ", (int)(0xFF & *ptr
));
457 sprintf(offset_str
+ strlen(offset_str
), "0x%02x, ", (int)(0xFF & junokout
[ptr
-junkout
]));
461 printSeq((const uint8_t *)expect
, expectLen
);
462 if ( checkOffsets
) {
463 log_verbose("\nOffsets:");
464 log_verbose(offset_str
);
470 if(expectLen
!= targ
-junkout
) {
471 log_err("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
472 log_verbose("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
473 fprintf(stderr
, "Got:\n");
474 printSeqErr((const unsigned char*)junkout
, (int32_t)(targ
-junkout
));
475 fprintf(stderr
, "Expected:\n");
476 printSeqErr((const unsigned char*)expect
, expectLen
);
480 if (checkOffsets
&& (expectOffsets
!= 0) ) {
481 log_verbose("comparing %d offsets..\n", targ
-junkout
);
482 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t) )){
483 log_err("did not get the expected offsets. %s\n", gNuConvTestName
);
484 printSeqErr((const unsigned char*)junkout
, (int32_t)(targ
-junkout
));
487 for(p
=junkout
;p
<targ
;p
++) {
488 log_err("%d,", junokout
[p
-junkout
]);
491 log_err("Expected: ");
492 for(i
=0; i
<(targ
-junkout
); i
++) {
493 log_err("%d,", expectOffsets
[i
]);
499 log_verbose("comparing..\n");
500 if(!memcmp(junkout
, expect
, expectLen
)) {
501 log_verbose("Matches!\n");
504 log_err("String does not match u->%s\n", gNuConvTestName
);
505 printUSeqErr(source
, sourceLen
);
506 fprintf(stderr
, "Got:\n");
507 printSeqErr((const unsigned char *)junkout
, expectLen
);
508 fprintf(stderr
, "Expected:\n");
509 printSeqErr((const unsigned char *)expect
, expectLen
);
515 /* Note: This function uses global variables and it will not do offset
516 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
517 static ETestConvertResult
testConvertToU( const uint8_t *source
, int sourcelen
, const UChar
*expect
, int expectlen
,
518 const char *codepage
, const int32_t *expectOffsets
, UBool useFallback
)
520 UErrorCode status
= U_ZERO_ERROR
;
521 UConverter
*conv
= 0;
522 UChar junkout
[NEW_MAX_BUFFER
]; /* FIX */
523 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
525 const char *realSourceEnd
;
526 const char *srcLimit
;
532 UBool checkOffsets
= TRUE
;
534 int32_t realBufferSize
;
535 UChar
*realBufferEnd
;
538 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
541 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
544 setNuConvTestName(codepage
, "TO");
546 log_verbose("\n========= %s\n", gNuConvTestName
);
548 conv
= my_ucnv_open(codepage
, &status
);
550 if(U_FAILURE(status
))
552 log_data_err("Couldn't open converter %s\n",gNuConvTestName
);
556 ucnv_setFallback(conv
,useFallback
);
558 log_verbose("Converter opened..\n");
560 src
= (const char *)source
;
564 realBufferSize
= UPRV_LENGTHOF(junkout
);
565 realBufferEnd
= junkout
+ realBufferSize
;
566 realSourceEnd
= src
+ sourcelen
;
568 if ( gOutBufferSize
!= realBufferSize
|| gInBufferSize
!= NEW_MAX_BUFFER
)
569 checkOffsets
= FALSE
;
573 end
= nct_min( targ
+ gOutBufferSize
, realBufferEnd
);
574 srcLimit
= nct_min(realSourceEnd
, src
+ gInBufferSize
);
576 if(targ
== realBufferEnd
)
578 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ
,gNuConvTestName
);
581 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ
,end
);
583 /* oldTarg = targ; */
585 status
= U_ZERO_ERROR
;
587 ucnv_toUnicode (conv
,
592 checkOffsets
? offs
: NULL
,
593 (UBool
)(srcLimit
== realSourceEnd
), /* flush if we're at the end of hte source data */
596 /* offs += (targ-oldTarg); */
598 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (srcLimit
< realSourceEnd
)) ); /* while we just need another buffer */
600 if(U_FAILURE(status
))
602 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage
, myErrorName(status
), gNuConvTestName
);
606 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
607 sourcelen
, targ
-junkout
);
608 if(getTestOption(VERBOSITY_OPTION
))
611 char offset_str
[9999];
617 for(ptr
= junkout
;ptr
<targ
;ptr
++)
619 sprintf(junk
+ strlen(junk
), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr
);
620 sprintf(offset_str
+ strlen(offset_str
), "0x%04x, ", (0xFFFF) & (unsigned int)junokout
[ptr
-junkout
]);
624 printUSeq(expect
, expectlen
);
627 log_verbose("\nOffsets:");
628 log_verbose(offset_str
);
634 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen
,expectlen
*2);
636 if (checkOffsets
&& (expectOffsets
!= 0))
638 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t))){
639 log_err("did not get the expected offsets. %s\n",gNuConvTestName
);
641 for(p
=junkout
;p
<targ
;p
++) {
642 log_err("%d,", junokout
[p
-junkout
]);
645 log_err("Expected: ");
646 for(i
=0; i
<(targ
-junkout
); i
++) {
647 log_err("%d,", expectOffsets
[i
]);
651 for(i
=0; i
<(targ
-junkout
); i
++) {
652 log_err("%X,", junkout
[i
]);
656 for(i
=0; i
<(src
-(const char *)source
); i
++) {
657 log_err("%X,", (unsigned char)source
[i
]);
663 if(!memcmp(junkout
, expect
, expectlen
*2))
665 log_verbose("Matches!\n");
670 log_err("String does not match. %s\n", gNuConvTestName
);
671 log_verbose("String does not match. %s\n", gNuConvTestName
);
673 printUSeqErr(junkout
, expectlen
);
674 printf("\nExpected:");
675 printUSeqErr(expect
, expectlen
);
681 static void TestNewConvertWithBufferSizes(int32_t outsize
, int32_t insize
)
684 /* 1 2 3 1Han 2Han 3Han . */
685 static const UChar sampleText
[] =
686 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
687 static const UChar sampleTextRoundTripUnmappable
[] =
688 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
691 static const uint8_t expectedUTF8
[] =
692 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
693 static const int32_t toUTF8Offs
[] =
694 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
695 static const int32_t fmUTF8Offs
[] =
696 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
698 #ifdef U_ENABLE_GENERIC_ISO_2022
699 /* Same as UTF8, but with ^[%B preceeding */
700 static const const uint8_t expectedISO2022
[] =
701 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
702 static const int32_t toISO2022Offs
[] =
703 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
704 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
705 static const int32_t fmISO2022Offs
[] =
706 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
709 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
710 static const uint8_t expectedIBM930
[] =
711 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
712 static const int32_t toIBM930Offs
[] =
713 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
714 static const int32_t fmIBM930Offs
[] =
715 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
717 /* 1 2 3 0 h1 h2 h3 . MBCS*/
718 static const uint8_t expectedIBM943
[] =
719 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
720 static const int32_t toIBM943Offs
[] =
721 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
722 static const int32_t fmIBM943Offs
[] =
723 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
725 /* 1 2 3 0 h1 h2 h3 . DBCS*/
726 static const uint8_t expectedIBM9027
[] =
727 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
728 static const int32_t toIBM9027Offs
[] =
729 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
731 /* 1 2 3 0 <?> <?> <?> . SBCS*/
732 static const uint8_t expectedIBM920
[] =
733 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
734 static const int32_t toIBM920Offs
[] =
735 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
737 /* 1 2 3 0 <?> <?> <?> . SBCS*/
738 static const uint8_t expectedISO88593
[] =
739 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
740 static const int32_t toISO88593Offs
[] =
741 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
743 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
744 static const uint8_t expectedLATIN1
[] =
745 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
746 static const int32_t toLATIN1Offs
[] =
747 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
751 static const uint8_t expectedUTF16BE
[] =
752 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
753 static const int32_t toUTF16BEOffs
[]=
754 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
755 static const int32_t fmUTF16BEOffs
[] =
756 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
758 static const uint8_t expectedUTF16LE
[] =
759 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
760 static const int32_t toUTF16LEOffs
[]=
761 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
762 static const int32_t fmUTF16LEOffs
[] =
763 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
765 static const uint8_t expectedUTF32BE
[] =
766 { 0x00, 0x00, 0x00, 0x31,
767 0x00, 0x00, 0x00, 0x32,
768 0x00, 0x00, 0x00, 0x33,
769 0x00, 0x00, 0x00, 0x00,
770 0x00, 0x00, 0x4e, 0x00,
771 0x00, 0x00, 0x4e, 0x8c,
772 0x00, 0x00, 0x4e, 0x09,
773 0x00, 0x00, 0x00, 0x2e,
774 0x00, 0x02, 0x00, 0x21 };
775 static const int32_t toUTF32BEOffs
[]=
776 { 0x00, 0x00, 0x00, 0x00,
777 0x01, 0x01, 0x01, 0x01,
778 0x02, 0x02, 0x02, 0x02,
779 0x03, 0x03, 0x03, 0x03,
780 0x04, 0x04, 0x04, 0x04,
781 0x05, 0x05, 0x05, 0x05,
782 0x06, 0x06, 0x06, 0x06,
783 0x07, 0x07, 0x07, 0x07,
784 0x08, 0x08, 0x08, 0x08,
785 0x08, 0x08, 0x08, 0x08 };
786 static const int32_t fmUTF32BEOffs
[] =
787 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
789 static const uint8_t expectedUTF32LE
[] =
790 { 0x31, 0x00, 0x00, 0x00,
791 0x32, 0x00, 0x00, 0x00,
792 0x33, 0x00, 0x00, 0x00,
793 0x00, 0x00, 0x00, 0x00,
794 0x00, 0x4e, 0x00, 0x00,
795 0x8c, 0x4e, 0x00, 0x00,
796 0x09, 0x4e, 0x00, 0x00,
797 0x2e, 0x00, 0x00, 0x00,
798 0x21, 0x00, 0x02, 0x00 };
799 static const int32_t toUTF32LEOffs
[]=
800 { 0x00, 0x00, 0x00, 0x00,
801 0x01, 0x01, 0x01, 0x01,
802 0x02, 0x02, 0x02, 0x02,
803 0x03, 0x03, 0x03, 0x03,
804 0x04, 0x04, 0x04, 0x04,
805 0x05, 0x05, 0x05, 0x05,
806 0x06, 0x06, 0x06, 0x06,
807 0x07, 0x07, 0x07, 0x07,
808 0x08, 0x08, 0x08, 0x08,
809 0x08, 0x08, 0x08, 0x08 };
810 static const int32_t fmUTF32LEOffs
[] =
811 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
816 /** Test chars #2 **/
818 /* Sahha [health], slashed h's */
819 static const UChar malteseUChars
[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
820 static const uint8_t expectedMaltese913
[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
823 static const UChar LMBCSUChars
[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
824 static const uint8_t expectedLMBCS
[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
825 static const int32_t toLMBCSOffs
[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
826 static const int32_t fmLMBCSOffs
[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
827 /*********************************** START OF CODE finally *************/
829 gInBufferSize
= insize
;
830 gOutBufferSize
= outsize
;
832 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize
, gOutBufferSize
);
836 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
837 expectedUTF8
, sizeof(expectedUTF8
), "UTF8", toUTF8Offs
,FALSE
);
839 log_verbose("Test surrogate behaviour for UTF8\n");
841 static const UChar testinput
[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
842 static const uint8_t expectedUTF8test2
[]= { 0xe2, 0x82, 0xac,
843 0xf0, 0x90, 0x90, 0x81,
846 static const int32_t offsets
[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
847 testConvertFromU(testinput
, UPRV_LENGTHOF(testinput
),
848 expectedUTF8test2
, sizeof(expectedUTF8test2
), "UTF8", offsets
,FALSE
);
853 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
855 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
856 expectedISO2022
, sizeof(expectedISO2022
), "ISO_2022", toISO2022Offs
,FALSE
);
860 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
861 expectedUTF16LE
, sizeof(expectedUTF16LE
), "utf-16le", toUTF16LEOffs
,FALSE
);
863 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
864 expectedUTF16BE
, sizeof(expectedUTF16BE
), "utf-16be", toUTF16BEOffs
,FALSE
);
866 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
867 expectedUTF32LE
, sizeof(expectedUTF32LE
), "utf-32le", toUTF32LEOffs
,FALSE
);
869 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
870 expectedUTF32BE
, sizeof(expectedUTF32BE
), "utf-32be", toUTF32BEOffs
,FALSE
);
873 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
874 expectedLATIN1
, sizeof(expectedLATIN1
), "LATIN_1", toLATIN1Offs
,FALSE
);
876 #if !UCONFIG_NO_LEGACY_CONVERSION
878 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
879 expectedIBM930
, sizeof(expectedIBM930
), "ibm-930", toIBM930Offs
,FALSE
);
881 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
882 expectedISO88593
, sizeof(expectedISO88593
), "iso-8859-3", toISO88593Offs
,FALSE
);
886 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
887 expectedIBM943
, sizeof(expectedIBM943
), "ibm-943", toIBM943Offs
,FALSE
);
889 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
890 expectedIBM9027
, sizeof(expectedIBM9027
), "@ibm9027", toIBM9027Offs
,FALSE
);
892 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
893 expectedIBM920
, sizeof(expectedIBM920
), "ibm-920", toIBM920Offs
,FALSE
);
895 testConvertFromU(sampleText
, UPRV_LENGTHOF(sampleText
),
896 expectedISO88593
, sizeof(expectedISO88593
), "iso-8859-3", toISO88593Offs
,FALSE
);
903 testConvertToU(expectedUTF8
, sizeof(expectedUTF8
),
904 sampleText
, UPRV_LENGTHOF(sampleText
), "utf8", fmUTF8Offs
,FALSE
);
905 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
907 testConvertToU(expectedISO2022
, sizeof(expectedISO2022
),
908 sampleText
, UPRV_LENGTHOF(sampleText
), "ISO_2022", fmISO2022Offs
,FALSE
);
912 testConvertToU(expectedUTF16LE
, sizeof(expectedUTF16LE
),
913 sampleText
, UPRV_LENGTHOF(sampleText
), "utf-16le", fmUTF16LEOffs
,FALSE
);
915 testConvertToU(expectedUTF16BE
, sizeof(expectedUTF16BE
),
916 sampleText
, UPRV_LENGTHOF(sampleText
), "utf-16be", fmUTF16BEOffs
,FALSE
);
918 testConvertToU(expectedUTF32LE
, sizeof(expectedUTF32LE
),
919 sampleText
, UPRV_LENGTHOF(sampleText
), "utf-32le", fmUTF32LEOffs
,FALSE
);
921 testConvertToU(expectedUTF32BE
, sizeof(expectedUTF32BE
),
922 sampleText
, UPRV_LENGTHOF(sampleText
), "utf-32be", fmUTF32BEOffs
,FALSE
);
924 #if !UCONFIG_NO_LEGACY_CONVERSION
926 testConvertToU(expectedIBM930
, sizeof(expectedIBM930
), sampleTextRoundTripUnmappable
,
927 UPRV_LENGTHOF(sampleTextRoundTripUnmappable
), "ibm-930", fmIBM930Offs
,FALSE
);
929 testConvertToU(expectedIBM943
, sizeof(expectedIBM943
),sampleTextRoundTripUnmappable
,
930 UPRV_LENGTHOF(sampleTextRoundTripUnmappable
), "ibm-943", fmIBM943Offs
,FALSE
);
933 /* Try it again to make sure it still works */
934 testConvertToU(expectedUTF16LE
, sizeof(expectedUTF16LE
),
935 sampleText
, UPRV_LENGTHOF(sampleText
), "utf-16le", fmUTF16LEOffs
,FALSE
);
937 #if !UCONFIG_NO_LEGACY_CONVERSION
938 testConvertToU(expectedMaltese913
, sizeof(expectedMaltese913
),
939 malteseUChars
, UPRV_LENGTHOF(malteseUChars
), "latin3", NULL
,FALSE
);
941 testConvertFromU(malteseUChars
, UPRV_LENGTHOF(malteseUChars
),
942 expectedMaltese913
, sizeof(expectedMaltese913
), "iso-8859-3", NULL
,FALSE
);
945 testConvertFromU(LMBCSUChars
, UPRV_LENGTHOF(LMBCSUChars
),
946 expectedLMBCS
, sizeof(expectedLMBCS
), "LMBCS-1", toLMBCSOffs
,FALSE
);
947 testConvertToU(expectedLMBCS
, sizeof(expectedLMBCS
),
948 LMBCSUChars
, UPRV_LENGTHOF(LMBCSUChars
), "LMBCS-1", fmLMBCSOffs
,FALSE
);
951 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
953 /* encode directly set D and set O */
954 static const uint8_t utf7
[] = {
961 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
962 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
964 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
966 static const UChar unicode
[] = {
968 Hi Mom -<WHITE SMILING FACE>-!
969 A<NOT IDENTICAL TO><ALPHA>.
971 [Japanese word "nihongo"]
973 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
974 0x41, 0x2262, 0x0391, 0x2e,
976 0x65e5, 0x672c, 0x8a9e
978 static const int32_t toUnicodeOffsets
[] = {
979 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
984 static const int32_t fromUnicodeOffsets
[] = {
985 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
986 11, 12, 12, 12, 13, 13, 13, 13, 14,
988 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
991 /* same but escaping set O (the exclamation mark) */
992 static const uint8_t utf7Restricted
[] = {
999 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
1000 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
1002 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
1004 static const int32_t toUnicodeOffsetsR
[] = {
1005 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1010 static const int32_t fromUnicodeOffsetsR
[] = {
1011 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1012 11, 12, 12, 12, 13, 13, 13, 13, 14,
1014 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
1017 testConvertFromU(unicode
, UPRV_LENGTHOF(unicode
), utf7
, sizeof(utf7
), "UTF-7", fromUnicodeOffsets
,FALSE
);
1019 testConvertToU(utf7
, sizeof(utf7
), unicode
, UPRV_LENGTHOF(unicode
), "UTF-7", toUnicodeOffsets
,FALSE
);
1021 testConvertFromU(unicode
, UPRV_LENGTHOF(unicode
), utf7Restricted
, sizeof(utf7Restricted
), "UTF-7,version=1", fromUnicodeOffsetsR
,FALSE
);
1023 testConvertToU(utf7Restricted
, sizeof(utf7Restricted
), unicode
, UPRV_LENGTHOF(unicode
), "UTF-7,version=1", toUnicodeOffsetsR
,FALSE
);
1027 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1028 * modified according to RFC 2060,
1029 * and supplemented with the one example in RFC 2060 itself.
1032 static const uint8_t imap
[] = {
1043 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1044 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1046 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1048 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1049 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1050 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1051 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1053 static const UChar unicode
[] = {
1054 /* Hi Mom -<WHITE SMILING FACE>-!
1055 A<NOT IDENTICAL TO><ALPHA>.
1057 [Japanese word "nihongo"]
1064 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1065 0x41, 0x2262, 0x0391, 0x2e,
1067 0x65e5, 0x672c, 0x8a9e,
1069 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1070 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1071 0x2f, 0x65e5, 0x672c, 0x8a9e,
1072 0x2f, 0x53f0, 0x5317
1074 static const int32_t toUnicodeOffsets
[] = {
1075 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1080 38, 39, 40, 41, 42, 43,
1085 static const int32_t fromUnicodeOffsets
[] = {
1086 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1087 11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1089 16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1091 20, 21, 22, 23, 24, 25,
1093 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1094 35, 36, 36, 36, 37, 37, 37, 37, 37
1097 testConvertFromU(unicode
, UPRV_LENGTHOF(unicode
), imap
, sizeof(imap
), "IMAP-mailbox-name", fromUnicodeOffsets
,FALSE
);
1099 testConvertToU(imap
, sizeof(imap
), unicode
, UPRV_LENGTHOF(unicode
), "IMAP-mailbox-name", toUnicodeOffsets
,FALSE
);
1102 /* Test UTF-8 bad data handling*/
1104 static const uint8_t utf8
[]={
1106 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1109 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1110 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1111 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */
1112 0xdf, 0xbf, /* 7ff */
1113 0xbf, /* truncated tail */
1114 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */
1118 static const uint16_t utf8Expected
[]={
1132 static const int32_t utf8Offsets
[]={
1133 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
1135 testConvertToU(utf8
, sizeof(utf8
),
1136 utf8Expected
, UPRV_LENGTHOF(utf8Expected
), "utf-8", utf8Offsets
,FALSE
);
1140 /* Test UTF-32BE bad data handling*/
1142 static const uint8_t utf32
[]={
1143 0x00, 0x00, 0x00, 0x61,
1144 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
1145 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1146 0x00, 0x00, 0x00, 0x62,
1147 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1148 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
1149 0x00, 0x00, 0x01, 0x62,
1150 0x00, 0x00, 0x02, 0x62
1152 static const uint16_t utf32Expected
[]={
1154 0xfffd, /* 0x110000 out of range */
1155 0xDBFF, /* 0x10FFFF in range */
1158 0xfffd, /* 0xffffffff out of range */
1159 0xfffd, /* 0x7fffffff out of range */
1163 static const int32_t utf32Offsets
[]={
1164 0, 4, 8, 8, 12, 16, 20, 24, 28
1166 static const uint8_t utf32ExpectedBack
[]={
1167 0x00, 0x00, 0x00, 0x61,
1168 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */
1169 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1170 0x00, 0x00, 0x00, 0x62,
1171 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */
1172 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */
1173 0x00, 0x00, 0x01, 0x62,
1174 0x00, 0x00, 0x02, 0x62
1176 static const int32_t utf32OffsetsBack
[]={
1187 testConvertToU(utf32
, sizeof(utf32
),
1188 utf32Expected
, UPRV_LENGTHOF(utf32Expected
), "utf-32be", utf32Offsets
,FALSE
);
1189 testConvertFromU(utf32Expected
, UPRV_LENGTHOF(utf32Expected
),
1190 utf32ExpectedBack
, sizeof(utf32ExpectedBack
), "utf-32be", utf32OffsetsBack
, FALSE
);
1193 /* Test UTF-32LE bad data handling*/
1195 static const uint8_t utf32
[]={
1196 0x61, 0x00, 0x00, 0x00,
1197 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
1198 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1199 0x62, 0x00, 0x00, 0x00,
1200 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1201 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
1202 0x62, 0x01, 0x00, 0x00,
1203 0x62, 0x02, 0x00, 0x00,
1206 static const uint16_t utf32Expected
[]={
1208 0xfffd, /* 0x110000 out of range */
1209 0xDBFF, /* 0x10FFFF in range */
1212 0xfffd, /* 0xffffffff out of range */
1213 0xfffd, /* 0x7fffffff out of range */
1217 static const int32_t utf32Offsets
[]={
1218 0, 4, 8, 8, 12, 16, 20, 24, 28
1220 static const uint8_t utf32ExpectedBack
[]={
1221 0x61, 0x00, 0x00, 0x00,
1222 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */
1223 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1224 0x62, 0x00, 0x00, 0x00,
1225 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */
1226 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */
1227 0x62, 0x01, 0x00, 0x00,
1228 0x62, 0x02, 0x00, 0x00
1230 static const int32_t utf32OffsetsBack
[]={
1240 testConvertToU(utf32
, sizeof(utf32
),
1241 utf32Expected
, UPRV_LENGTHOF(utf32Expected
), "utf-32le", utf32Offsets
,FALSE
);
1242 testConvertFromU(utf32Expected
, UPRV_LENGTHOF(utf32Expected
),
1243 utf32ExpectedBack
, sizeof(utf32ExpectedBack
), "utf-32le", utf32OffsetsBack
, FALSE
);
1247 static void TestCoverageMBCS(){
1249 UErrorCode status
= U_ZERO_ERROR
;
1250 const char *directory
= loadTestData(&status
);
1251 char* tdpath
= NULL
;
1252 char* saveDirectory
= (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1253 int len
= strlen(directory
);
1256 tdpath
= (char*) malloc(sizeof(char) * (len
* 2));
1257 uprv_strcpy(saveDirectory
,u_getDataDirectory());
1258 log_verbose("Retrieved data directory %s \n",saveDirectory
);
1259 uprv_strcpy(tdpath
,directory
);
1260 index
=strrchr(tdpath
,(char)U_FILE_SEP_CHAR
);
1262 if((unsigned int)(index
-tdpath
) != (strlen(tdpath
)-1)){
1265 u_setDataDirectory(tdpath
);
1266 log_verbose("ICU data directory is set to: %s \n" ,tdpath
);
1269 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
1270 which is test file for MBCS conversion with single-byte codepage data.*/
1273 /* MBCS with single byte codepage data test1.ucm*/
1274 const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1275 const uint8_t expectedtest1
[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1276 int32_t totest1Offs
[] = { 0, 1, 2, 3, 5, };
1279 testConvertFromU(unicodeInput
, UPRV_LENGTHOF(unicodeInput
),
1280 expectedtest1
, sizeof(expectedtest1
), "@test1", totest1Offs
,FALSE
);
1283 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
1284 which is test file for MBCS conversion with three-byte codepage data.*/
1287 /* MBCS with three byte codepage data test3.ucm*/
1288 const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1289 const uint8_t expectedtest3
[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,};
1290 int32_t totest3Offs
[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1292 const uint8_t test3input
[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1293 const UChar expectedUnicode
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1294 int32_t fromtest3Offs
[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1297 testConvertFromU(unicodeInput
, UPRV_LENGTHOF(unicodeInput
),
1298 expectedtest3
, sizeof(expectedtest3
), "@test3", totest3Offs
,FALSE
);
1301 testConvertToU(test3input
, sizeof(test3input
),
1302 expectedUnicode
, UPRV_LENGTHOF(expectedUnicode
), "@test3", fromtest3Offs
,FALSE
);
1306 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
1307 which is test file for MBCS conversion with four-byte codepage data.*/
1310 /* MBCS with three byte codepage data test4.ucm*/
1311 static const UChar unicodeInput
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1312 static const uint8_t expectedtest4
[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,};
1313 static const int32_t totest4Offs
[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1315 static const uint8_t test4input
[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1316 static const UChar expectedUnicode
[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1317 static const int32_t fromtest4Offs
[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1320 testConvertFromU(unicodeInput
, UPRV_LENGTHOF(unicodeInput
),
1321 expectedtest4
, sizeof(expectedtest4
), "@test4", totest4Offs
,FALSE
);
1324 testConvertToU(test4input
, sizeof(test4input
),
1325 expectedUnicode
, UPRV_LENGTHOF(expectedUnicode
), "@test4", fromtest4Offs
,FALSE
);
1330 /* restore the original data directory */
1331 log_verbose("Setting the data directory to %s \n", saveDirectory
);
1332 u_setDataDirectory(saveDirectory
);
1333 free(saveDirectory
);
1338 static void TestConverterType(const char *convName
, UConverterType convType
) {
1339 UConverter
* myConverter
;
1340 UErrorCode err
= U_ZERO_ERROR
;
1342 myConverter
= my_ucnv_open(convName
, &err
);
1344 if (U_FAILURE(err
)) {
1345 log_data_err("Failed to create an %s converter\n", convName
);
1350 if (ucnv_getType(myConverter
)!=convType
) {
1351 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1352 convName
, convType
);
1355 log_verbose("ucnv_getType %s ok\n", convName
);
1358 ucnv_close(myConverter
);
1361 static void TestConverterTypesAndStarters()
1363 #if !UCONFIG_NO_LEGACY_CONVERSION
1364 UConverter
* myConverter
;
1365 UErrorCode err
= U_ZERO_ERROR
;
1366 UBool mystarters
[256];
1368 /* const UBool expectedKSCstarters[256] = {
1369 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1370 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1371 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1372 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1373 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1374 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1375 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1376 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1377 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1378 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1379 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1380 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1381 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1382 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1383 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1384 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1385 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1386 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1387 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1388 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1389 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1390 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1391 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1392 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1393 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1394 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1397 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types.");
1399 myConverter
= ucnv_open("ksc", &err
);
1400 if (U_FAILURE(err
)) {
1401 log_data_err("Failed to create an ibm-ksc converter\n");
1406 if (ucnv_getType(myConverter
)!=UCNV_MBCS
)
1407 log_err("ucnv_getType Failed for ibm-949\n");
1409 log_verbose("ucnv_getType ibm-949 ok\n");
1411 if(myConverter
!=NULL
)
1412 ucnv_getStarters(myConverter
, mystarters
, &err
);
1414 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1415 log_err("Failed ucnv_getStarters for ksc\n");
1417 log_verbose("ucnv_getStarters ok\n");*/
1420 ucnv_close(myConverter
);
1422 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL
);
1423 TestConverterType("ibm-878", UCNV_SBCS
);
1426 TestConverterType("iso-8859-1", UCNV_LATIN_1
);
1428 TestConverterType("ibm-1208", UCNV_UTF8
);
1430 TestConverterType("utf-8", UCNV_UTF8
);
1431 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian
);
1432 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian
);
1433 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian
);
1434 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian
);
1436 #if !UCONFIG_NO_LEGACY_CONVERSION
1438 #if defined(U_ENABLE_GENERIC_ISO_2022)
1439 TestConverterType("iso-2022", UCNV_ISO_2022
);
1442 TestConverterType("hz", UCNV_HZ
);
1445 TestConverterType("scsu", UCNV_SCSU
);
1447 #if !UCONFIG_NO_LEGACY_CONVERSION
1448 TestConverterType("x-iscii-de", UCNV_ISCII
);
1451 TestConverterType("ascii", UCNV_US_ASCII
);
1452 TestConverterType("utf-7", UCNV_UTF7
);
1453 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX
);
1454 TestConverterType("bocu-1", UCNV_BOCU1
);
1458 TestAmbiguousConverter(UConverter
*cnv
) {
1459 static const char inBytes
[3]={ 0x61, 0x5B, 0x5c };
1460 UChar outUnicode
[20]={ 0, 0, 0, 0 };
1464 UErrorCode errorCode
;
1467 /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1468 errorCode
=U_ZERO_ERROR
;
1471 ucnv_toUnicode(cnv
, &u
, u
+20, &s
, s
+3, NULL
, TRUE
, &errorCode
);
1472 if(U_FAILURE(errorCode
)) {
1473 /* we do not care about general failures in this test; the input may just not be mappable */
1477 if(outUnicode
[0]!=0x61 || outUnicode
[1]!=0x5B || outUnicode
[2]==0xfffd) {
1478 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1479 /* There are some encodings that are partially ASCII based,
1480 like the ISO-7 and GSM series of codepages, which we ignore. */
1484 isAmbiguous
=ucnv_isAmbiguous(cnv
);
1486 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1487 if((outUnicode
[2]!=0x5c)!=isAmbiguous
) {
1488 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1489 ucnv_getName(cnv
, &errorCode
), outUnicode
[2]!=0x5c, isAmbiguous
);
1493 if(outUnicode
[2]!=0x5c) {
1494 /* needs fixup, fix it */
1495 ucnv_fixFileSeparator(cnv
, outUnicode
, (int32_t)(u
-outUnicode
));
1496 if(outUnicode
[2]!=0x5c) {
1497 /* the fix failed */
1498 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv
, &errorCode
));
1504 static void TestAmbiguous()
1506 UErrorCode status
= U_ZERO_ERROR
;
1507 UConverter
*ascii_cnv
= 0, *sjis_cnv
= 0, *cnv
;
1508 static const char target
[] = {
1509 /* "\\usr\\local\\share\\data\\icutest.txt" */
1510 0x5c, 0x75, 0x73, 0x72,
1511 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1512 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1513 0x5c, 0x64, 0x61, 0x74, 0x61,
1514 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1517 UChar asciiResult
[200], sjisResult
[200];
1518 int32_t /*asciiLength = 0,*/ sjisLength
= 0, i
;
1521 /* enumerate all converters */
1522 status
=U_ZERO_ERROR
;
1523 for(i
=0; (name
=ucnv_getAvailableName(i
))!=NULL
; ++i
) {
1524 cnv
=ucnv_open(name
, &status
);
1525 if(U_SUCCESS(status
)) {
1526 TestAmbiguousConverter(cnv
);
1529 log_err("error: unable to open available converter \"%s\"\n", name
);
1530 status
=U_ZERO_ERROR
;
1534 #if !UCONFIG_NO_LEGACY_CONVERSION
1535 sjis_cnv
= ucnv_open("ibm-943", &status
);
1536 if (U_FAILURE(status
))
1538 log_data_err("Failed to create a SJIS converter\n");
1541 ascii_cnv
= ucnv_open("LATIN-1", &status
);
1542 if (U_FAILURE(status
))
1544 log_data_err("Failed to create a LATIN-1 converter\n");
1545 ucnv_close(sjis_cnv
);
1548 /* convert target from SJIS to Unicode */
1549 sjisLength
= ucnv_toUChars(sjis_cnv
, sjisResult
, UPRV_LENGTHOF(sjisResult
), target
, (int32_t)strlen(target
), &status
);
1550 if (U_FAILURE(status
))
1552 log_err("Failed to convert the SJIS string.\n");
1553 ucnv_close(sjis_cnv
);
1554 ucnv_close(ascii_cnv
);
1557 /* convert target from Latin-1 to Unicode */
1558 /*asciiLength =*/ ucnv_toUChars(ascii_cnv
, asciiResult
, UPRV_LENGTHOF(asciiResult
), target
, (int32_t)strlen(target
), &status
);
1559 if (U_FAILURE(status
))
1561 log_err("Failed to convert the Latin-1 string.\n");
1562 ucnv_close(sjis_cnv
);
1563 ucnv_close(ascii_cnv
);
1566 if (!ucnv_isAmbiguous(sjis_cnv
))
1568 log_err("SJIS converter should contain ambiguous character mappings.\n");
1569 ucnv_close(sjis_cnv
);
1570 ucnv_close(ascii_cnv
);
1573 if (u_strcmp(sjisResult
, asciiResult
) == 0)
1575 log_err("File separators for SJIS don't need to be fixed.\n");
1577 ucnv_fixFileSeparator(sjis_cnv
, sjisResult
, sjisLength
);
1578 if (u_strcmp(sjisResult
, asciiResult
) != 0)
1580 log_err("Fixing file separator for SJIS failed.\n");
1582 ucnv_close(sjis_cnv
);
1583 ucnv_close(ascii_cnv
);
1588 TestSignatureDetection(){
1589 /* with null terminated strings */
1591 static const char* data
[] = {
1592 "\xFE\xFF\x00\x00", /* UTF-16BE */
1593 "\xFF\xFE\x00\x00", /* UTF-16LE */
1594 "\xEF\xBB\xBF\x00", /* UTF-8 */
1595 "\x0E\xFE\xFF\x00", /* SCSU */
1597 "\xFE\xFF", /* UTF-16BE */
1598 "\xFF\xFE", /* UTF-16LE */
1599 "\xEF\xBB\xBF", /* UTF-8 */
1600 "\x0E\xFE\xFF", /* SCSU */
1602 "\xFE\xFF\x41\x42", /* UTF-16BE */
1603 "\xFF\xFE\x41\x41", /* UTF-16LE */
1604 "\xEF\xBB\xBF\x41", /* UTF-8 */
1605 "\x0E\xFE\xFF\x41", /* SCSU */
1607 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */
1608 "\x2B\x2F\x76\x38\x41", /* UTF-7 */
1609 "\x2B\x2F\x76\x39\x41", /* UTF-7 */
1610 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */
1611 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */
1613 "\xDD\x73\x66\x73" /* UTF-EBCDIC */
1615 static const char* expected
[] = {
1638 static const int32_t expectedLength
[] ={
1663 int32_t signatureLength
= -1;
1664 const char* source
= NULL
;
1665 const char* enc
= NULL
;
1666 for( ; i
<UPRV_LENGTHOF(data
); i
++){
1669 enc
= ucnv_detectUnicodeSignature(source
, -1 , &signatureLength
, &err
);
1671 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source
,i
,u_errorName(err
));
1674 if(enc
== NULL
|| strcmp(enc
,expected
[i
]) !=0){
1675 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source
,i
,expected
[i
],enc
);
1678 if(signatureLength
!= expectedLength
[i
]){
1679 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source
,i
,signatureLength
,expectedLength
[i
]);
1684 static const char* data
[] = {
1685 "\xFE\xFF\x00", /* UTF-16BE */
1686 "\xFF\xFE\x00", /* UTF-16LE */
1687 "\xEF\xBB\xBF\x00", /* UTF-8 */
1688 "\x0E\xFE\xFF\x00", /* SCSU */
1689 "\x00\x00\xFE\xFF", /* UTF-32BE */
1690 "\xFF\xFE\x00\x00", /* UTF-32LE */
1691 "\xFE\xFF", /* UTF-16BE */
1692 "\xFF\xFE", /* UTF-16LE */
1693 "\xEF\xBB\xBF", /* UTF-8 */
1694 "\x0E\xFE\xFF", /* SCSU */
1695 "\x00\x00\xFE\xFF", /* UTF-32BE */
1696 "\xFF\xFE\x00\x00", /* UTF-32LE */
1697 "\xFE\xFF\x41\x42", /* UTF-16BE */
1698 "\xFF\xFE\x41\x41", /* UTF-16LE */
1699 "\xEF\xBB\xBF\x41", /* UTF-8 */
1700 "\x0E\xFE\xFF\x41", /* SCSU */
1701 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1702 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1703 "\xFB\xEE\x28", /* BOCU-1 */
1704 "\xFF\x41\x42" /* NULL */
1706 static const int len
[] = {
1729 static const char* expected
[] = {
1751 static const int32_t expectedLength
[] ={
1775 int32_t signatureLength
= -1;
1776 int32_t sourceLength
=-1;
1777 const char* source
= NULL
;
1778 const char* enc
= NULL
;
1779 for( ; i
<UPRV_LENGTHOF(data
); i
++){
1782 sourceLength
= len
[i
];
1783 enc
= ucnv_detectUnicodeSignature(source
, sourceLength
, &signatureLength
, &err
);
1785 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source
,i
,u_errorName(err
));
1788 if(enc
== NULL
|| strcmp(enc
,expected
[i
]) !=0){
1789 if(expected
[i
] !=NULL
){
1790 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source
,i
,expected
[i
],enc
);
1794 if(signatureLength
!= expectedLength
[i
]){
1795 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source
,i
,signatureLength
,expectedLength
[i
]);
1801 static void TestUTF7() {
1803 static const uint8_t in
[]={
1804 /* H - +Jjo- - ! +- +2AHcAQ */
1807 0x2b, 0x4a, 0x6a, 0x6f,
1811 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1814 /* expected test results */
1815 static const int32_t results
[]={
1816 /* number of bytes read, code point */
1819 4, 0x263a, /* <WHITE SMILING FACE> */
1826 const char *cnvName
;
1827 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
1828 UErrorCode errorCode
=U_ZERO_ERROR
;
1829 UConverter
*cnv
=ucnv_open("UTF-7", &errorCode
);
1830 if(U_FAILURE(errorCode
)) {
1831 log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode
));
1834 TestNextUChar(cnv
, source
, limit
, results
, "UTF-7");
1835 /* Test the condition when source >= sourceLimit */
1836 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1837 cnvName
= ucnv_getName(cnv
, &errorCode
);
1838 if (U_FAILURE(errorCode
) || uprv_strcmp(cnvName
, "UTF-7") != 0) {
1839 log_err("UTF-7 converter is called %s: %s\n", cnvName
, u_errorName(errorCode
));
1844 static void TestIMAP() {
1846 static const uint8_t in
[]={
1847 /* H - &Jjo- - ! &- &2AHcAQ- \ */
1850 0x26, 0x4a, 0x6a, 0x6f,
1854 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1857 /* expected test results */
1858 static const int32_t results
[]={
1859 /* number of bytes read, code point */
1862 4, 0x263a, /* <WHITE SMILING FACE> */
1869 const char *cnvName
;
1870 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
1871 UErrorCode errorCode
=U_ZERO_ERROR
;
1872 UConverter
*cnv
=ucnv_open("IMAP-mailbox-name", &errorCode
);
1873 if(U_FAILURE(errorCode
)) {
1874 log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode
));
1877 TestNextUChar(cnv
, source
, limit
, results
, "IMAP-mailbox-name");
1878 /* Test the condition when source >= sourceLimit */
1879 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1880 cnvName
= ucnv_getName(cnv
, &errorCode
);
1881 if (U_FAILURE(errorCode
) || uprv_strcmp(cnvName
, "IMAP-mailbox-name") != 0) {
1882 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName
, u_errorName(errorCode
));
1887 static void TestUTF8() {
1889 static const uint8_t in
[]={
1893 0xf0, 0x90, 0x80, 0x80,
1894 0xf4, 0x84, 0x8c, 0xa1,
1895 0xf0, 0x90, 0x90, 0x81
1898 /* expected test results */
1899 static const int32_t results
[]={
1900 /* number of bytes read, code point */
1909 /* error test input */
1910 static const uint8_t in2
[]={
1912 0xc0, 0x80, /* illegal non-shortest form */
1913 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1914 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1915 0xc0, 0xc0, /* illegal trail byte */
1916 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1917 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1918 0xfe, /* illegal byte altogether */
1922 /* expected error test results */
1923 static const int32_t results2
[]={
1924 /* number of bytes read, code point */
1929 UConverterToUCallback cb
;
1932 const char *source
=(const char *)in
,*limit
=(const char *)in
+sizeof(in
);
1933 UErrorCode errorCode
=U_ZERO_ERROR
;
1934 UConverter
*cnv
=ucnv_open("UTF-8", &errorCode
);
1935 if(U_FAILURE(errorCode
)) {
1936 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode
));
1939 TestNextUChar(cnv
, source
, limit
, results
, "UTF-8");
1940 /* Test the condition when source >= sourceLimit */
1941 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
1943 /* test error behavior with a skip callback */
1944 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
1945 source
=(const char *)in2
;
1946 limit
=(const char *)(in2
+sizeof(in2
));
1947 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-8");
1952 static void TestCESU8() {
1954 static const uint8_t in
[]={
1958 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1959 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1960 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1964 /* expected test results */
1965 static const int32_t results
[]={
1966 /* number of bytes read, code point */
1972 -1,0xd802, /* may read 3 or 6 bytes */
1973 -1,0x10ffff,/* may read 0 or 3 bytes */
1977 /* error test input */
1978 static const uint8_t in2
[]={
1980 0xc0, 0x80, /* illegal non-shortest form */
1981 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1982 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1983 0xc0, 0xc0, /* illegal trail byte */
1984 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */
1985 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */
1986 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */
1987 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1988 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1989 0xfe, /* illegal byte altogether */
1993 /* expected error test results */
1994 static const int32_t results2
[]={
1995 /* number of bytes read, code point */
2000 UConverterToUCallback cb
;
2003 const char *source
=(const char *)in
,*limit
=(const char *)in
+sizeof(in
);
2004 UErrorCode errorCode
=U_ZERO_ERROR
;
2005 UConverter
*cnv
=ucnv_open("CESU-8", &errorCode
);
2006 if(U_FAILURE(errorCode
)) {
2007 log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode
));
2010 TestNextUChar(cnv
, source
, limit
, results
, "CESU-8");
2011 /* Test the condition when source >= sourceLimit */
2012 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2014 /* test error behavior with a skip callback */
2015 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
2016 source
=(const char *)in2
;
2017 limit
=(const char *)(in2
+sizeof(in2
));
2018 TestNextUChar(cnv
, source
, limit
, results2
, "CESU-8");
2023 static void TestUTF16() {
2025 static const uint8_t in1
[]={
2026 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2028 static const uint8_t in2
[]={
2029 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2031 static const uint8_t in3
[]={
2032 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2035 /* expected test results */
2036 static const int32_t results1
[]={
2037 /* number of bytes read, code point */
2041 static const int32_t results2
[]={
2042 /* number of bytes read, code point */
2046 static const int32_t results3
[]={
2047 /* number of bytes read, code point */
2054 const char *source
, *limit
;
2056 UErrorCode errorCode
=U_ZERO_ERROR
;
2057 UConverter
*cnv
=ucnv_open("UTF-16", &errorCode
);
2058 if(U_FAILURE(errorCode
)) {
2059 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode
));
2063 source
=(const char *)in1
, limit
=(const char *)in1
+sizeof(in1
);
2064 TestNextUChar(cnv
, source
, limit
, results1
, "UTF-16");
2066 source
=(const char *)in2
, limit
=(const char *)in2
+sizeof(in2
);
2067 ucnv_resetToUnicode(cnv
);
2068 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-16");
2070 source
=(const char *)in3
, limit
=(const char *)in3
+sizeof(in3
);
2071 ucnv_resetToUnicode(cnv
);
2072 TestNextUChar(cnv
, source
, limit
, results3
, "UTF-16");
2074 /* Test the condition when source >= sourceLimit */
2075 ucnv_resetToUnicode(cnv
);
2076 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2081 static void TestUTF16BE() {
2083 static const uint8_t in
[]={
2089 0xd8, 0x01, 0xdc, 0x01
2092 /* expected test results */
2093 static const int32_t results
[]={
2094 /* number of bytes read, code point */
2103 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2104 UErrorCode errorCode
=U_ZERO_ERROR
;
2105 UConverter
*cnv
=ucnv_open("utf-16be", &errorCode
);
2106 if(U_FAILURE(errorCode
)) {
2107 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode
));
2110 TestNextUChar(cnv
, source
, limit
, results
, "UTF-16BE");
2111 /* Test the condition when source >= sourceLimit */
2112 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2113 /*Test for the condition where there is an invalid character*/
2115 static const uint8_t source2
[]={0x61};
2116 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2117 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an invalid character");
2121 * Test disabled because currently the UTF-16BE/LE converters are supposed
2122 * to not set errors for unpaired surrogates.
2123 * This may change with
2124 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2127 /*Test for the condition where there is a surrogate pair*/
2129 const uint8_t source2
[]={0xd8, 0x01};
2130 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an truncated surrogate character");
2139 static const uint8_t in
[]={
2144 0x01, 0xd8, 0x01, 0xdc
2147 /* expected test results */
2148 static const int32_t results
[]={
2149 /* number of bytes read, code point */
2157 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2158 UErrorCode errorCode
=U_ZERO_ERROR
;
2159 UConverter
*cnv
=ucnv_open("utf-16le", &errorCode
);
2160 if(U_FAILURE(errorCode
)) {
2161 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode
));
2164 TestNextUChar(cnv
, source
, limit
, results
, "UTF-16LE");
2165 /* Test the condition when source >= sourceLimit */
2166 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2167 /*Test for the condition where there is an invalid character*/
2169 static const uint8_t source2
[]={0x61};
2170 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2171 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an invalid character");
2175 * Test disabled because currently the UTF-16BE/LE converters are supposed
2176 * to not set errors for unpaired surrogates.
2177 * This may change with
2178 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2181 /*Test for the condition where there is a surrogate character*/
2183 static const uint8_t source2
[]={0x01, 0xd8};
2184 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_TRUNCATED_CHAR_FOUND
, "an truncated surrogate character");
2191 static void TestUTF32() {
2193 static const uint8_t in1
[]={
2194 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff
2196 static const uint8_t in2
[]={
2197 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00
2199 static const uint8_t in3
[]={
2200 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01
2203 /* expected test results */
2204 static const int32_t results1
[]={
2205 /* number of bytes read, code point */
2209 static const int32_t results2
[]={
2210 /* number of bytes read, code point */
2214 static const int32_t results3
[]={
2215 /* number of bytes read, code point */
2218 4, 0xfffd, /* unmatched surrogate */
2219 4, 0xfffd /* unmatched surrogate */
2222 const char *source
, *limit
;
2224 UErrorCode errorCode
=U_ZERO_ERROR
;
2225 UConverter
*cnv
=ucnv_open("UTF-32", &errorCode
);
2226 if(U_FAILURE(errorCode
)) {
2227 log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode
));
2231 source
=(const char *)in1
, limit
=(const char *)in1
+sizeof(in1
);
2232 TestNextUChar(cnv
, source
, limit
, results1
, "UTF-32");
2234 source
=(const char *)in2
, limit
=(const char *)in2
+sizeof(in2
);
2235 ucnv_resetToUnicode(cnv
);
2236 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32");
2238 source
=(const char *)in3
, limit
=(const char *)in3
+sizeof(in3
);
2239 ucnv_resetToUnicode(cnv
);
2240 TestNextUChar(cnv
, source
, limit
, results3
, "UTF-32");
2242 /* Test the condition when source >= sourceLimit */
2243 ucnv_resetToUnicode(cnv
);
2244 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2252 static const uint8_t in
[]={
2253 0x00, 0x00, 0x00, 0x61,
2254 0x00, 0x00, 0x30, 0x61,
2255 0x00, 0x00, 0xdc, 0x00,
2256 0x00, 0x00, 0xd8, 0x00,
2257 0x00, 0x00, 0xdf, 0xff,
2258 0x00, 0x00, 0xff, 0xfe,
2259 0x00, 0x10, 0xab, 0xcd,
2260 0x00, 0x10, 0xff, 0xff
2263 /* expected test results */
2264 static const int32_t results
[]={
2265 /* number of bytes read, code point */
2276 /* error test input */
2277 static const uint8_t in2
[]={
2278 0x00, 0x00, 0x00, 0x61,
2279 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
2280 0x00, 0x00, 0x00, 0x62,
2281 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2282 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
2283 0x00, 0x00, 0x01, 0x62,
2284 0x00, 0x00, 0x02, 0x62
2287 /* expected error test results */
2288 static const int32_t results2
[]={
2289 /* number of bytes read, code point */
2296 UConverterToUCallback cb
;
2299 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2300 UErrorCode errorCode
=U_ZERO_ERROR
;
2301 UConverter
*cnv
=ucnv_open("UTF-32BE", &errorCode
);
2302 if(U_FAILURE(errorCode
)) {
2303 log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode
));
2306 TestNextUChar(cnv
, source
, limit
, results
, "UTF-32BE");
2308 /* Test the condition when source >= sourceLimit */
2309 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2311 /* test error behavior with a skip callback */
2312 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
2313 source
=(const char *)in2
;
2314 limit
=(const char *)(in2
+sizeof(in2
));
2315 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32BE");
2323 static const uint8_t in
[]={
2324 0x61, 0x00, 0x00, 0x00,
2325 0x61, 0x30, 0x00, 0x00,
2326 0x00, 0xdc, 0x00, 0x00,
2327 0x00, 0xd8, 0x00, 0x00,
2328 0xff, 0xdf, 0x00, 0x00,
2329 0xfe, 0xff, 0x00, 0x00,
2330 0xcd, 0xab, 0x10, 0x00,
2331 0xff, 0xff, 0x10, 0x00
2334 /* expected test results */
2335 static const int32_t results
[]={
2336 /* number of bytes read, code point */
2347 /* error test input */
2348 static const uint8_t in2
[]={
2349 0x61, 0x00, 0x00, 0x00,
2350 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
2351 0x62, 0x00, 0x00, 0x00,
2352 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2353 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
2354 0x62, 0x01, 0x00, 0x00,
2355 0x62, 0x02, 0x00, 0x00,
2358 /* expected error test results */
2359 static const int32_t results2
[]={
2360 /* number of bytes read, code point */
2367 UConverterToUCallback cb
;
2370 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2371 UErrorCode errorCode
=U_ZERO_ERROR
;
2372 UConverter
*cnv
=ucnv_open("UTF-32LE", &errorCode
);
2373 if(U_FAILURE(errorCode
)) {
2374 log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode
));
2377 TestNextUChar(cnv
, source
, limit
, results
, "UTF-32LE");
2379 /* Test the condition when source >= sourceLimit */
2380 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2382 /* test error behavior with a skip callback */
2383 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_SKIP
, NULL
, &cb
, &p
, &errorCode
);
2384 source
=(const char *)in2
;
2385 limit
=(const char *)(in2
+sizeof(in2
));
2386 TestNextUChar(cnv
, source
, limit
, results2
, "UTF-32LE");
2394 static const uint8_t in
[]={
2403 /* expected test results */
2404 static const int32_t results
[]={
2405 /* number of bytes read, code point */
2413 static const uint16_t in1
[] = {
2414 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2415 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2416 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2417 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2418 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2419 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2420 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2421 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2422 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2423 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2424 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2427 static const uint8_t out1
[] = {
2428 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2429 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2430 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2431 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2432 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2433 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2434 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2435 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2436 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2437 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2438 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2441 static const uint16_t in2
[]={
2442 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2443 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2444 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2445 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2446 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2447 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2448 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2449 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2450 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2451 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2452 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2453 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2454 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2455 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2456 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2457 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2458 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2459 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2460 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2461 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2462 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2463 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2464 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2465 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2466 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2467 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2468 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2469 0x37, 0x20, 0x2A, 0x2F,
2471 static const unsigned char out2
[]={
2472 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2473 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2474 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2475 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2476 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2477 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2478 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2479 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2480 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2481 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2482 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2483 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2484 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2485 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2486 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2487 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2488 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2489 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2490 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2491 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2492 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2493 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2494 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2495 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2496 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2497 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2498 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2499 0x37, 0x20, 0x2A, 0x2F,
2501 const char *source
=(const char *)in
;
2502 const char *limit
=(const char *)in
+sizeof(in
);
2504 UErrorCode errorCode
=U_ZERO_ERROR
;
2505 UConverter
*cnv
=ucnv_open("LATIN_1", &errorCode
);
2506 if(U_FAILURE(errorCode
)) {
2507 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode
));
2510 TestNextUChar(cnv
, source
, limit
, results
, "LATIN_1");
2511 /* Test the condition when source >= sourceLimit */
2512 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2513 TestConv((uint16_t*)in1
,sizeof(in1
)/2,"LATIN_1","LATIN-1",(char*)out1
,sizeof(out1
));
2514 TestConv((uint16_t*)in2
,sizeof(in2
)/2,"ASCII","ASCII",(char*)out2
,sizeof(out2
));
2522 static const uint8_t in
[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2523 /* expected test results */
2524 static const int32_t results
[]={
2525 /* number of bytes read, code point */
2534 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2535 UErrorCode errorCode
=U_ZERO_ERROR
;
2536 UConverter
*cnv
=ucnv_open("x-mac-turkish", &errorCode
);
2537 if(U_FAILURE(errorCode
)) {
2538 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode
));
2541 TestNextUChar(cnv
, source
, limit
, results
, "SBCS(x-mac-turkish)");
2542 /* Test the condition when source >= sourceLimit */
2543 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2544 /*Test for Illegal character */ /*
2546 static const uint8_t input1[]={ 0xA1 };
2547 const char* illegalsource=(const char*)input1;
2548 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2557 static const uint8_t in
[]={
2566 /* expected test results */
2567 static const int32_t results
[]={
2568 /* number of bytes read, code point */
2576 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2577 UErrorCode errorCode
=U_ZERO_ERROR
;
2579 UConverter
*cnv
=my_ucnv_open("@ibm9027", &errorCode
);
2580 if(U_FAILURE(errorCode
)) {
2581 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode
));
2584 TestNextUChar(cnv
, source
, limit
, results
, "DBCS(@ibm9027)");
2585 /* Test the condition when source >= sourceLimit */
2586 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2587 /*Test for the condition where there is an invalid character*/
2589 static const uint8_t source2
[]={0x1a, 0x1b};
2590 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character");
2592 /*Test for the condition where we have a truncated char*/
2594 static const uint8_t source1
[]={0xc4};
2595 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2596 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2604 static const uint8_t in
[]={
2615 /* expected test results */
2616 static const int32_t results
[]={
2617 /* number of bytes read, code point */
2627 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2628 UErrorCode errorCode
=U_ZERO_ERROR
;
2630 UConverter
*cnv
=ucnv_open("ibm-1363", &errorCode
);
2631 if(U_FAILURE(errorCode
)) {
2632 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode
));
2635 TestNextUChar(cnv
, source
, limit
, results
, "MBCS(ibm-1363)");
2636 /* Test the condition when source >= sourceLimit */
2637 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2638 /*Test for the condition where there is an invalid character*/
2640 static const uint8_t source2
[]={0xa1, 0x80};
2641 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character");
2643 /*Test for the condition where we have a truncated char*/
2645 static const uint8_t source1
[]={0xc4};
2646 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2647 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2653 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2656 /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2658 const char *cnvName
= "ibm-1363";
2659 UErrorCode status
= U_ZERO_ERROR
;
2660 const char sourceData
[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2661 /* UChar expectUData[] = { 0x00a1, 0x001a }; */
2662 const char *source
= sourceData
;
2663 const char *sourceLim
= sourceData
+sizeof(sourceData
);
2665 UConverter
*cnv
=ucnv_open(cnvName
, &status
);
2666 if(U_FAILURE(status
)) {
2667 log_data_err("Unable to open %s converter: %s\n", cnvName
, u_errorName(status
));
2673 UChar targetBuf
[256];
2674 UChar
*target
= targetBuf
;
2675 UChar
*targetLim
= target
+256;
2676 ucnv_toUnicode(cnv
, &target
, targetLim
, &source
, sourceLim
, NULL
, TRUE
, &status
);
2678 log_info("After convert: target@%d, source@%d, status%s\n",
2679 target
-targetBuf
, source
-sourceData
, u_errorName(status
));
2681 if(U_FAILURE(status
)) {
2682 log_err("Failed to convert: %s\n", u_errorName(status
));
2689 c1
=ucnv_getNextUChar(cnv
, &source
, sourceLim
, &status
);
2690 log_verbose("c1: U+%04X, source@%d, status %s\n", c1
, source
-sourceData
, u_errorName(status
));
2692 c2
=ucnv_getNextUChar(cnv
, &source
, sourceLim
, &status
);
2693 log_verbose("c2: U+%04X, source@%d, status %s\n", c2
, source
-sourceData
, u_errorName(status
));
2695 c3
=ucnv_getNextUChar(cnv
, &source
, sourceLim
, &status
);
2696 log_verbose("c3: U+%04X, source@%d, status %s\n", c3
, source
-sourceData
, u_errorName(status
));
2698 if(status
==U_INDEX_OUTOFBOUNDS_ERROR
&& c3
==0xFFFF) {
2699 log_verbose("OK\n");
2701 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2709 #ifdef U_ENABLE_GENERIC_ISO_2022
2714 static const uint8_t in
[]={
2721 0xf0, 0x90, 0x80, 0x80
2726 /* expected test results */
2727 static const int32_t results
[]={
2728 /* number of bytes read, code point */
2729 4, 0x0031, /* 4 bytes including the escape sequence */
2737 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
2738 UErrorCode errorCode
=U_ZERO_ERROR
;
2741 cnv
=ucnv_open("ISO_2022", &errorCode
);
2742 if(U_FAILURE(errorCode
)) {
2743 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
2746 TestNextUChar(cnv
, source
, limit
, results
, "ISO_2022");
2748 /* Test the condition when source >= sourceLimit */
2749 TestNextUCharError(cnv
, source
, source
-1, U_ILLEGAL_ARGUMENT_ERROR
, "sourceLimit < source");
2750 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
2751 /*Test for the condition where we have a truncated char*/
2753 static const uint8_t source1
[]={0xc4};
2754 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
2755 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_TRUNCATED_CHAR_FOUND
, "a character is truncated");
2757 /*Test for the condition where there is an invalid character*/
2759 static const uint8_t source2
[]={0xa1, 0x01};
2760 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ILLEGAL_CHAR_FOUND
, "an invalid character");
2768 TestSmallTargetBuffer(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2769 const UChar
* uSource
;
2770 const UChar
* uSourceLimit
;
2771 const char* cSource
;
2772 const char* cSourceLimit
;
2773 UChar
*uTargetLimit
=NULL
;
2776 const char *cTargetLimit
;
2778 UChar
*uBuf
; /*,*test;*/
2779 int32_t uBufSize
= 120;
2782 UErrorCode errorCode
=U_ZERO_ERROR
;
2783 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2784 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
2787 uSource
= (UChar
*) source
;
2788 uSourceLimit
=(const UChar
*)sourceLimit
;
2792 cTargetLimit
= cBuf
;
2793 uTargetLimit
= uBuf
;
2797 cTargetLimit
= cTargetLimit
+ i
;
2798 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,FALSE
, &errorCode
);
2799 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2800 errorCode
=U_ZERO_ERROR
;
2804 if(U_FAILURE(errorCode
)){
2805 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2809 }while (uSource
<uSourceLimit
);
2811 cSourceLimit
=cTarget
;
2813 uTargetLimit
=uTargetLimit
+i
;
2814 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,FALSE
,&errorCode
);
2815 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2816 errorCode
=U_ZERO_ERROR
;
2819 if(U_FAILURE(errorCode
)){
2820 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2823 }while(cSource
<cSourceLimit
);
2827 for(len
=0;len
<(int)(source
- sourceLimit
);len
++){
2828 if(uBuf
[len
]!=uSource
[len
]){
2829 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource
[len
],(int)uBuf
[len
]) ;
2836 /* Test for Jitterbug 778 */
2837 static void TestToAndFromUChars(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2838 const UChar
* uSource
;
2839 const UChar
* uSourceLimit
;
2840 const char* cSource
;
2841 UChar
*uTargetLimit
=NULL
;
2844 const char *cTargetLimit
;
2847 int32_t uBufSize
= 120;
2848 int numCharsInTarget
=0;
2849 UErrorCode errorCode
=U_ZERO_ERROR
;
2850 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2851 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
2853 uSourceLimit
=sourceLimit
;
2855 cTargetLimit
= cBuf
+uBufSize
*5;
2857 uTargetLimit
= uBuf
+ uBufSize
*5;
2859 numCharsInTarget
=ucnv_fromUChars(cnv
, cTarget
, (int32_t)(cTargetLimit
-cTarget
), uSource
, (int32_t)(uSourceLimit
-uSource
), &errorCode
);
2860 if(U_FAILURE(errorCode
)){
2861 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2866 ucnv_toUChars(cnv
,uTarget
,(int32_t)(uTargetLimit
-uTarget
),cSource
,numCharsInTarget
,&errorCode
);
2867 if(U_FAILURE(errorCode
)){
2868 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode
));
2872 while(uSource
<uSourceLimit
){
2873 if(*test
!=*uSource
){
2875 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
2884 static void TestSmallSourceBuffer(const uint16_t* source
, const UChar
* sourceLimit
,UConverter
* cnv
){
2885 const UChar
* uSource
;
2886 const UChar
* uSourceLimit
;
2887 const char* cSource
;
2888 const char* cSourceLimit
;
2889 UChar
*uTargetLimit
=NULL
;
2892 const char *cTargetLimit
;
2894 UChar
*uBuf
; /*,*test;*/
2895 int32_t uBufSize
= 120;
2898 const UChar
*temp
= sourceLimit
;
2899 UErrorCode errorCode
=U_ZERO_ERROR
;
2900 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
2901 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
2905 uSource
= (UChar
*) source
;
2909 cTargetLimit
= cBuf
;
2910 uTargetLimit
= uBuf
+uBufSize
*5;
2911 cTargetLimit
= cTargetLimit
+uBufSize
*10;
2912 uSourceLimit
=uSource
;
2915 if (uSourceLimit
< sourceLimit
) {
2916 uSourceLimit
= uSourceLimit
+1;
2918 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,FALSE
, &errorCode
);
2919 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2920 errorCode
=U_ZERO_ERROR
;
2924 if(U_FAILURE(errorCode
)){
2925 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2929 }while (uSource
<temp
);
2933 if (cSourceLimit
< cBuf
+ (cTarget
- cBuf
)) {
2934 cSourceLimit
= cSourceLimit
+1;
2936 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,FALSE
,&errorCode
);
2937 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
){
2938 errorCode
=U_ZERO_ERROR
;
2941 if(U_FAILURE(errorCode
)){
2942 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
2945 }while(cSource
<cTarget
);
2949 for(;len
<(int)(source
- sourceLimit
);len
++){
2950 if(uBuf
[len
]!=uSource
[len
]){
2951 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource
[len
],(int)uBuf
[len
]) ;
2959 TestGetNextUChar2022(UConverter
* cnv
, const char* source
, const char* limit
,
2960 const uint16_t results
[], const char* message
){
2961 /* const char* s0; */
2962 const char* s
=(char*)source
;
2963 const uint16_t *r
=results
;
2964 UErrorCode errorCode
=U_ZERO_ERROR
;
2969 c
=ucnv_getNextUChar(cnv
, &s
, limit
, &errorCode
);
2970 if(errorCode
==U_INDEX_OUTOFBOUNDS_ERROR
) {
2971 break; /* no more significant input */
2972 } else if(U_FAILURE(errorCode
)) {
2973 log_err("%s ucnv_getNextUChar() failed: %s\n", message
, u_errorName(errorCode
));
2976 if(U16_IS_LEAD(*r
)){
2978 U16_NEXT(r
, i
, len
, exC
);
2983 if(c
!=(uint32_t)(exC
))
2984 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message
,(uint32_t) (*r
),c
);
2990 static int TestJitterbug930(const char* enc
){
2991 UErrorCode err
= U_ZERO_ERROR
;
2992 UConverter
*converter
;
2996 const UChar
*source
= in
;
2998 int32_t* offsets
= off
;
2999 int numOffWritten
=0;
3001 converter
= my_ucnv_open(enc
, &err
);
3003 in
[0] = 0x41; /* 0x4E00;*/
3008 memset(off
, '*', sizeof(off
));
3010 ucnv_fromUnicode (converter
,
3019 /* writes three bytes into the output buffer: 41 1B 24
3020 * but offsets contains 0 1 1
3022 while(*offsets
< off
[10]){
3026 log_verbose("Testing Jitterbug 930 for encoding %s",enc
);
3027 if(numOffWritten
!= (int)(target
-out
)){
3028 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc
, (int)(target
-out
),numOffWritten
);
3033 memset(off
,'*' , sizeof(off
));
3037 ucnv_fromUnicode (converter
,
3046 while(*offsets
< off
[10]){
3049 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc
,-1,*offsets
) ;
3054 /* writes 42 43 7A into output buffer,
3055 * offsets contains -1 -1 -1
3057 ucnv_close(converter
);
3064 static const uint16_t in
[]={
3065 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3066 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3067 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3068 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3069 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3070 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3071 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3072 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3073 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3074 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3075 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3076 0x005A, 0x005B, 0x005C, 0x000A
3078 const UChar
* uSource
;
3079 const UChar
* uSourceLimit
;
3080 const char* cSource
;
3081 const char* cSourceLimit
;
3082 UChar
*uTargetLimit
=NULL
;
3085 const char *cTargetLimit
;
3088 int32_t uBufSize
= 120;
3089 UErrorCode errorCode
=U_ZERO_ERROR
;
3091 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3092 int32_t* myOff
= offsets
;
3093 cnv
=ucnv_open("HZ", &errorCode
);
3094 if(U_FAILURE(errorCode
)) {
3095 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode
));
3099 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3100 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3101 uSource
= (const UChar
*)in
;
3102 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
3104 cTargetLimit
= cBuf
+uBufSize
*5;
3106 uTargetLimit
= uBuf
+ uBufSize
*5;
3107 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3108 if(U_FAILURE(errorCode
)){
3109 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3113 cSourceLimit
=cTarget
;
3116 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3117 if(U_FAILURE(errorCode
)){
3118 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3121 uSource
= (const UChar
*)in
;
3122 while(uSource
<uSourceLimit
){
3123 if(*test
!=*uSource
){
3125 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3130 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "HZ encoding");
3131 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3132 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3133 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3134 TestJitterbug930("csISO2022JP");
3144 static const uint16_t in
[]={
3145 /* test full range of Devanagari */
3146 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3147 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3148 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3149 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3150 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3151 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3152 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3153 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3154 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3155 0x096D,0x096E,0x096F,
3156 /* test Soft halant*/
3157 0x0915,0x094d, 0x200D,
3158 /* test explicit halant */
3159 0x0915,0x094d, 0x200c,
3160 /* test double danda */
3163 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3164 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3165 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3166 /* tests from Lotus */
3167 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3168 0x0930,0x094D,0x200D,
3169 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3170 0x0915,0x0921,0x002B,0x095F,
3172 0x0B86, 0xB87, 0xB88,
3174 0x0C05, 0x0C02, 0x0C03,0x0c31,
3176 0x0C85, 0xC82, 0x0C83,
3177 /* test Abbr sign and Anudatta */
3187 0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3188 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3191 0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3192 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3193 0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3194 0x093D /* Avagraha 0xEA, 0xE9*/,
3202 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3204 static const unsigned char byteArr
[]={
3206 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3207 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3208 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3209 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3210 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3211 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3212 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3213 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3214 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3216 /* test soft halant */
3218 /* test explicit halant */
3220 /* test double danda */
3223 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3224 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3225 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3228 /* tests from Lotus */
3229 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3230 0xEF,0x42,0xCF,0xE8,0xD9,
3231 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3232 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3234 0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3236 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3238 0xEF, 0x48,0xa4, 0xa2, 0xa3,
3239 /* anudatta and abbreviation sign */
3240 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3243 0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3245 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3247 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3249 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3251 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3253 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3255 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3257 0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3259 0xB3, 0xE9, /* Ka + NUKTA */
3261 0xB4, 0xE9, /* Kha + NUKTA */
3263 0xB5, 0xE9, /* Ga + NUKTA */
3275 /* just consume unhandled codepoints */
3279 testConvertToU(byteArr
,(sizeof(byteArr
)),in
,UPRV_LENGTHOF(in
),"x-iscii-de",NULL
,TRUE
);
3280 TestConv(in
,(sizeof(in
)/2),"ISCII,version=0","hindi", (char *)byteArr
,sizeof(byteArr
));
3287 static const uint16_t in
[]={
3288 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3289 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3290 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3291 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3292 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3293 0x201D, 0x3014, 0x000D, 0x000A,
3294 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3295 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3297 const UChar
* uSource
;
3298 const UChar
* uSourceLimit
;
3299 const char* cSource
;
3300 const char* cSourceLimit
;
3301 UChar
*uTargetLimit
=NULL
;
3304 const char *cTargetLimit
;
3307 int32_t uBufSize
= 120;
3308 UErrorCode errorCode
=U_ZERO_ERROR
;
3310 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3311 int32_t* myOff
= offsets
;
3312 cnv
=ucnv_open("ISO_2022_JP_1", &errorCode
);
3313 if(U_FAILURE(errorCode
)) {
3314 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode
));
3318 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3319 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3320 uSource
= (const UChar
*)in
;
3321 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
3323 cTargetLimit
= cBuf
+uBufSize
*5;
3325 uTargetLimit
= uBuf
+ uBufSize
*5;
3326 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3327 if(U_FAILURE(errorCode
)){
3328 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3332 cSourceLimit
=cTarget
;
3335 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3336 if(U_FAILURE(errorCode
)){
3337 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3341 uSource
= (const UChar
*)in
;
3342 while(uSource
<uSourceLimit
){
3343 if(*test
!=*uSource
){
3345 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3351 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3352 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3353 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-JP encoding");
3354 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3355 TestJitterbug930("csISO2022JP");
3362 static void TestConv(const uint16_t in
[],int len
, const char* conv
, const char* lang
, char byteArr
[],int byteArrLen
){
3363 const UChar
* uSource
;
3364 const UChar
* uSourceLimit
;
3365 const char* cSource
;
3366 const char* cSourceLimit
;
3367 UChar
*uTargetLimit
=NULL
;
3370 const char *cTargetLimit
;
3373 int32_t uBufSize
= 120*10;
3374 UErrorCode errorCode
=U_ZERO_ERROR
;
3376 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) );
3377 int32_t* myOff
= offsets
;
3378 cnv
=my_ucnv_open(conv
, &errorCode
);
3379 if(U_FAILURE(errorCode
)) {
3380 log_data_err("Unable to open a %s converter: %s\n", conv
, u_errorName(errorCode
));
3384 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
));
3385 cBuf
=(char*)malloc(uBufSize
* sizeof(char));
3386 uSource
= (const UChar
*)in
;
3387 uSourceLimit
=uSource
+len
;
3389 cTargetLimit
= cBuf
+uBufSize
;
3391 uTargetLimit
= uBuf
+ uBufSize
;
3392 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3393 if(U_FAILURE(errorCode
)){
3394 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3397 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3399 cSourceLimit
=cTarget
;
3402 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3403 if(U_FAILURE(errorCode
)){
3404 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode
));
3408 uSource
= (const UChar
*)in
;
3409 while(uSource
<uSourceLimit
){
3410 if(*test
!=*uSource
){
3411 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv
,*uSource
,(int)*test
) ;
3416 TestSmallTargetBuffer(in
,(const UChar
*)&in
[len
],cnv
);
3417 TestSmallSourceBuffer(in
,(const UChar
*)&in
[len
],cnv
);
3418 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, conv
);
3419 if(byteArr
&& byteArrLen
!=0){
3420 TestGetNextUChar2022(cnv
, byteArr
, (byteArr
+byteArrLen
), in
, lang
);
3421 TestToAndFromUChars(in
,(const UChar
*)&in
[len
],cnv
);
3424 cSourceLimit
= cSource
+byteArrLen
;
3427 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3428 if(U_FAILURE(errorCode
)){
3429 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3433 uSource
= (const UChar
*)in
;
3434 while(uSource
<uSourceLimit
){
3435 if(*test
!=*uSource
){
3436 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3449 static UChar U_CALLCONV
3450 _charAt(int32_t offset
, void *context
) {
3451 return ((char*)context
)[offset
];
3455 unescape(UChar
* dst
, int32_t dstLen
,const char* src
,int32_t srcLen
,UErrorCode
*status
){
3458 if(U_FAILURE(*status
)){
3461 if((dst
==NULL
&& dstLen
>0) || (src
==NULL
) || dstLen
< -1 || srcLen
<-1 ){
3462 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
3466 srcLen
= (int32_t)uprv_strlen(src
);
3469 for (; srcIndex
<srcLen
; ) {
3470 UChar32 c
= src
[srcIndex
++];
3471 if (c
== 0x005C /*'\\'*/) {
3472 c
= u_unescapeAt(_charAt
,&srcIndex
,srcLen
,(void*)src
); /* advances i*/
3473 if (c
== (UChar32
)0xFFFFFFFF) {
3474 *status
=U_INVALID_CHAR_FOUND
; /* return empty string */
3475 break; /* invalid escape sequence */
3478 if(dstIndex
< dstLen
){
3480 dst
[dstIndex
++] = U16_LEAD(c
);
3481 if(dstIndex
<dstLen
){
3482 dst
[dstIndex
]=U16_TRAIL(c
);
3484 *status
=U_BUFFER_OVERFLOW_ERROR
;
3487 dst
[dstIndex
]=(UChar
)c
;
3491 *status
= U_BUFFER_OVERFLOW_ERROR
;
3493 dstIndex
++; /* for preflighting */
3499 TestFullRoundtrip(const char* cp
){
3500 UChar usource
[10] ={0};
3501 UChar nsrc
[10] = {0};
3505 /* Test codepoint 0 */
3506 TestConv(usource
,1,cp
,"",NULL
,0);
3507 TestConv(usource
,2,cp
,"",NULL
,0);
3509 TestConv(nsrc
,3,cp
,"",NULL
,0);
3511 for(;i
<=0x10FFFF;i
++){
3517 usource
[0] =(UChar
) i
;
3520 usource
[0]=U16_LEAD(i
);
3521 usource
[1]=U16_TRAIL(i
);
3528 /* Test only single code points */
3529 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3530 /* Test codepoint repeated twice */
3531 usource
[ulen
]=usource
[0];
3532 usource
[ulen
+1]=usource
[1];
3534 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3535 /* Test codepoint repeated 3 times */
3536 usource
[ulen
]=usource
[0];
3537 usource
[ulen
+1]=usource
[1];
3539 TestConv(usource
,ulen
,cp
,"",NULL
,0);
3540 /* Test codepoint in between 2 codepoints */
3544 TestConv(nsrc
,len
+2,cp
,"",NULL
,0);
3545 uprv_memset(usource
,0,sizeof(UChar
)*10);
3550 TestRoundTrippingAllUTF(void){
3551 if(!getTestOption(QUICK_OPTION
)){
3552 log_verbose("Running exhaustive round trip test for BOCU-1\n");
3553 TestFullRoundtrip("BOCU-1");
3554 log_verbose("Running exhaustive round trip test for SCSU\n");
3555 TestFullRoundtrip("SCSU");
3556 log_verbose("Running exhaustive round trip test for UTF-8\n");
3557 TestFullRoundtrip("UTF-8");
3558 log_verbose("Running exhaustive round trip test for CESU-8\n");
3559 TestFullRoundtrip("CESU-8");
3560 log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3561 TestFullRoundtrip("UTF-16BE");
3562 log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3563 TestFullRoundtrip("UTF-16LE");
3564 log_verbose("Running exhaustive round trip test for UTF-16\n");
3565 TestFullRoundtrip("UTF-16");
3566 log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3567 TestFullRoundtrip("UTF-32BE");
3568 log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3569 TestFullRoundtrip("UTF-32LE");
3570 log_verbose("Running exhaustive round trip test for UTF-32\n");
3571 TestFullRoundtrip("UTF-32");
3572 log_verbose("Running exhaustive round trip test for UTF-7\n");
3573 TestFullRoundtrip("UTF-7");
3574 log_verbose("Running exhaustive round trip test for UTF-7\n");
3575 TestFullRoundtrip("UTF-7,version=1");
3576 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3577 TestFullRoundtrip("IMAP-mailbox-name");
3580 * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
3581 * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
3582 * The old mappings remain as fallbacks.
3583 * This test may be reintroduced at a later time.
3588 log_verbose("Running exhaustive round trip test for GB18030\n");
3589 TestFullRoundtrip("GB18030");
3597 static const uint16_t germanUTF16
[]={
3598 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3601 static const uint8_t germanSCSU
[]={
3602 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3605 static const uint16_t russianUTF16
[]={
3606 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3609 static const uint8_t russianSCSU
[]={
3610 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3613 static const uint16_t japaneseUTF16
[]={
3614 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3615 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3616 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3617 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3618 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3619 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3620 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3621 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3622 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3623 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3624 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3625 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3626 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3627 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3628 0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3631 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3632 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3633 static const uint8_t japaneseSCSU
[]={
3634 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3635 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3636 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3637 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3638 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3639 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3640 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3641 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3642 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3643 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3644 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3648 static const uint16_t allFeaturesUTF16
[]={
3649 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3650 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3651 0x01df, 0xf000, 0xdbff, 0xdfff
3654 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3655 * result here (34B vs. 35B)
3657 static const uint8_t allFeaturesSCSU
[]={
3658 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3659 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3660 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3661 0xdf, 0x14, 0x80, 0x15, 0xff
3663 static const uint16_t monkeyIn
[]={
3664 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3665 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3666 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3667 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3668 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3669 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3670 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3671 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3672 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3673 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3674 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3675 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3676 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3677 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3678 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3679 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3680 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3681 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3682 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3683 /* test non-BMP code points */
3684 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3685 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3686 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3687 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3688 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3689 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3690 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3691 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3692 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3693 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3694 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3697 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3698 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3699 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3700 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3701 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3703 static const char *fTestCases
[] = {
3704 "\\ud800\\udc00", /* smallest surrogate*/
3706 "\\udBff\\udFff", /* largest surrogate pair*/
3709 "Hello \\u9292 \\u9192 World!",
3710 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3711 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3713 "\\u0648\\u06c8", /* catch missing reset*/
3716 "\\u4444\\uE001", /* lowest quotable*/
3717 "\\u4444\\uf2FF", /* highest quotable*/
3718 "\\u4444\\uf188\\u4444",
3719 "\\u4444\\uf188\\uf288",
3720 "\\u4444\\uf188abc\\u0429\\uf288",
3722 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3723 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3724 "Hello World!123456",
3725 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3727 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/
3728 "abc\\u4411d", /* uses SQU*/
3729 "abc\\u4411\\u4412d",/* uses SCU*/
3730 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3731 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3733 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3734 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3735 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3737 "", /* empty input*/
3738 "\\u0000", /* smallest BMP character*/
3739 "\\uFFFF", /* largest BMP character*/
3741 /* regression tests*/
3742 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3743 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3744 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3745 "\\u0041\\u00df\\u0401\\u015f",
3746 "\\u9066\\u2123abc",
3747 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3748 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3751 for(;i
<UPRV_LENGTHOF(fTestCases
);i
++){
3752 const char* cSrc
= fTestCases
[i
];
3753 UErrorCode status
= U_ZERO_ERROR
;
3754 int32_t cSrcLen
,srcLen
;
3756 /* UConverter* cnv = ucnv_open("SCSU",&status); */
3757 cSrcLen
= srcLen
= (int32_t)uprv_strlen(fTestCases
[i
]);
3758 src
= (UChar
*) malloc((sizeof(UChar
) * srcLen
) + sizeof(UChar
));
3759 srcLen
=unescape(src
,srcLen
,cSrc
,cSrcLen
,&status
);
3760 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc
,i
);
3761 TestConv(src
,srcLen
,"SCSU","Coverage",NULL
,0);
3764 TestConv(allFeaturesUTF16
,(sizeof(allFeaturesUTF16
)/2),"SCSU","all features", (char *)allFeaturesSCSU
,sizeof(allFeaturesSCSU
));
3765 TestConv(allFeaturesUTF16
,(sizeof(allFeaturesUTF16
)/2),"SCSU","all features",(char *)allFeaturesSCSU
,sizeof(allFeaturesSCSU
));
3766 TestConv(japaneseUTF16
,(sizeof(japaneseUTF16
)/2),"SCSU","japaneese",(char *)japaneseSCSU
,sizeof(japaneseSCSU
));
3767 TestConv(japaneseUTF16
,(sizeof(japaneseUTF16
)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU
,sizeof(japaneseSCSU
));
3768 TestConv(germanUTF16
,(sizeof(germanUTF16
)/2),"SCSU","german",(char *)germanSCSU
,sizeof(germanSCSU
));
3769 TestConv(russianUTF16
,(sizeof(russianUTF16
)/2), "SCSU","russian",(char *)russianSCSU
,sizeof(russianSCSU
));
3770 TestConv(monkeyIn
,(sizeof(monkeyIn
)/2),"SCSU","monkey",NULL
,0);
3773 #if !UCONFIG_NO_LEGACY_CONVERSION
3774 static void TestJitterbug2346(){
3775 char source
[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3776 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3777 uint16_t expected
[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3779 UChar uTarget
[500]={'\0'};
3780 UChar
* utarget
=uTarget
;
3781 UChar
* utargetLimit
=uTarget
+sizeof(uTarget
)/2;
3783 char cTarget
[500]={'\0'};
3784 char* ctarget
=cTarget
;
3785 char* ctargetLimit
=cTarget
+sizeof(cTarget
);
3786 const char* csource
=source
;
3787 UChar
* temp
= expected
;
3788 UErrorCode err
=U_ZERO_ERROR
;
3790 UConverter
* conv
=ucnv_open("ISO_2022_JP",&err
);
3791 if(U_FAILURE(err
)) {
3792 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err
));
3795 ucnv_toUnicode(conv
,&utarget
,utargetLimit
,&csource
,csource
+sizeof(source
),NULL
,TRUE
,&err
);
3796 if(U_FAILURE(err
)) {
3797 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err
));
3800 utargetLimit
=utarget
;
3802 while(utarget
<utargetLimit
){
3803 if(*temp
!=*utarget
){
3805 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget
,(int)*temp
) ;
3810 ucnv_fromUnicode(conv
,&ctarget
,ctargetLimit
,(const UChar
**)&utarget
,utargetLimit
,NULL
,TRUE
,&err
);
3811 if(U_FAILURE(err
)) {
3812 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err
));
3815 ctargetLimit
=ctarget
;
3823 TestISO_2022_JP_1() {
3825 static const uint16_t in
[]={
3826 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3827 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3828 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3829 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3830 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3831 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3832 0x201D, 0x000D, 0x000A,
3833 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3834 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3835 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3836 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3837 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3838 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3840 const UChar
* uSource
;
3841 const UChar
* uSourceLimit
;
3842 const char* cSource
;
3843 const char* cSourceLimit
;
3844 UChar
*uTargetLimit
=NULL
;
3847 const char *cTargetLimit
;
3850 int32_t uBufSize
= 120;
3851 UErrorCode errorCode
=U_ZERO_ERROR
;
3854 cnv
=ucnv_open("ISO_2022_JP_1", &errorCode
);
3855 if(U_FAILURE(errorCode
)) {
3856 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
3860 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3861 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3862 uSource
= (const UChar
*)in
;
3863 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
3865 cTargetLimit
= cBuf
+uBufSize
*5;
3867 uTargetLimit
= uBuf
+ uBufSize
*5;
3868 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,NULL
,TRUE
, &errorCode
);
3869 if(U_FAILURE(errorCode
)){
3870 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3874 cSourceLimit
=cTarget
;
3876 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,NULL
,TRUE
,&errorCode
);
3877 if(U_FAILURE(errorCode
)){
3878 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3881 uSource
= (const UChar
*)in
;
3882 while(uSource
<uSourceLimit
){
3883 if(*test
!=*uSource
){
3885 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3891 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3892 /*Test for the condition where there is an invalid character*/
3895 static const uint8_t source2
[]={0x0e,0x24,0x053};
3896 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-JP-1]");
3898 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3899 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3906 TestISO_2022_JP_2() {
3908 static const uint16_t in
[]={
3909 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3910 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3911 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3912 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3913 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3914 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3915 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3916 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3917 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3918 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3919 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3920 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3921 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3922 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3923 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3924 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3925 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3926 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3927 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3929 const UChar
* uSource
;
3930 const UChar
* uSourceLimit
;
3931 const char* cSource
;
3932 const char* cSourceLimit
;
3933 UChar
*uTargetLimit
=NULL
;
3936 const char *cTargetLimit
;
3939 int32_t uBufSize
= 120;
3940 UErrorCode errorCode
=U_ZERO_ERROR
;
3942 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
3943 int32_t* myOff
= offsets
;
3944 cnv
=ucnv_open("ISO_2022_JP_2", &errorCode
);
3945 if(U_FAILURE(errorCode
)) {
3946 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
3950 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
3951 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
3952 uSource
= (const UChar
*)in
;
3953 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
3955 cTargetLimit
= cBuf
+uBufSize
*5;
3957 uTargetLimit
= uBuf
+ uBufSize
*5;
3958 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
3959 if(U_FAILURE(errorCode
)){
3960 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3964 cSourceLimit
=cTarget
;
3967 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
3968 if(U_FAILURE(errorCode
)){
3969 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
3972 uSource
= (const UChar
*)in
;
3973 while(uSource
<uSourceLimit
){
3974 if(*test
!=*uSource
){
3976 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
3981 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3982 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3983 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
3984 /*Test for the condition where there is an invalid character*/
3987 static const uint8_t source2
[]={0x0e,0x24,0x053};
3988 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-JP-2]");
3999 static const uint16_t in
[]={
4000 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4001 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4002 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4003 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4004 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4005 ,0x53E3,0x53E4,0x000A,0x000D};
4006 const UChar
* uSource
;
4007 const UChar
* uSourceLimit
;
4008 const char* cSource
;
4009 const char* cSourceLimit
;
4010 UChar
*uTargetLimit
=NULL
;
4013 const char *cTargetLimit
;
4016 int32_t uBufSize
= 120;
4017 UErrorCode errorCode
=U_ZERO_ERROR
;
4019 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4020 int32_t* myOff
= offsets
;
4021 cnv
=ucnv_open("ISO_2022,locale=kr", &errorCode
);
4022 if(U_FAILURE(errorCode
)) {
4023 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4027 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4028 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
4029 uSource
= (const UChar
*)in
;
4030 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
4032 cTargetLimit
= cBuf
+uBufSize
*5;
4034 uTargetLimit
= uBuf
+ uBufSize
*5;
4035 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4036 if(U_FAILURE(errorCode
)){
4037 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4041 cSourceLimit
=cTarget
;
4044 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4045 if(U_FAILURE(errorCode
)){
4046 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4049 uSource
= (const UChar
*)in
;
4050 while(uSource
<uSourceLimit
){
4051 if(*test
!=*uSource
){
4052 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,*test
) ;
4057 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-KR encoding");
4058 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4059 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4060 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4061 TestJitterbug930("csISO2022KR");
4062 /*Test for the condition where there is an invalid character*/
4065 static const uint8_t source2
[]={0x1b,0x24,0x053};
4066 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
4067 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ILLEGAL_ESCAPE_SEQUENCE
, "an invalid character [ISO-2022-KR]");
4076 TestISO_2022_KR_1() {
4078 static const uint16_t in
[]={
4079 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4080 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4081 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4082 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4083 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4084 ,0x53E3,0x53E4,0x000A,0x000D};
4085 const UChar
* uSource
;
4086 const UChar
* uSourceLimit
;
4087 const char* cSource
;
4088 const char* cSourceLimit
;
4089 UChar
*uTargetLimit
=NULL
;
4092 const char *cTargetLimit
;
4095 int32_t uBufSize
= 120;
4096 UErrorCode errorCode
=U_ZERO_ERROR
;
4098 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4099 int32_t* myOff
= offsets
;
4100 cnv
=ucnv_open("ibm-25546", &errorCode
);
4101 if(U_FAILURE(errorCode
)) {
4102 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4106 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4107 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 5);
4108 uSource
= (const UChar
*)in
;
4109 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
4111 cTargetLimit
= cBuf
+uBufSize
*5;
4113 uTargetLimit
= uBuf
+ uBufSize
*5;
4114 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4115 if(U_FAILURE(errorCode
)){
4116 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4120 cSourceLimit
=cTarget
;
4123 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4124 if(U_FAILURE(errorCode
)){
4125 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4128 uSource
= (const UChar
*)in
;
4129 while(uSource
<uSourceLimit
){
4130 if(*test
!=*uSource
){
4131 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,*test
) ;
4137 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-KR encoding");
4138 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4139 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4141 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4142 /*Test for the condition where there is an invalid character*/
4145 static const uint8_t source2
[]={0x1b,0x24,0x053};
4146 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
4147 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ILLEGAL_ESCAPE_SEQUENCE
, "an invalid character [ISO-2022-KR]");
4155 static void TestJitterbug2411(){
4156 static const char* source
= "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4157 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4158 UConverter
* kr
=NULL
, *kr1
=NULL
;
4159 UErrorCode errorCode
= U_ZERO_ERROR
;
4160 UChar tgt
[100]={'\0'};
4161 UChar
* target
= tgt
;
4162 UChar
* targetLimit
= target
+100;
4163 kr
=ucnv_open("iso-2022-kr", &errorCode
);
4164 if(U_FAILURE(errorCode
)) {
4165 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode
));
4168 ucnv_toUnicode(kr
,&target
,targetLimit
,&source
,source
+uprv_strlen(source
),NULL
,TRUE
,&errorCode
);
4169 if(U_FAILURE(errorCode
)) {
4170 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode
));
4173 kr1
= ucnv_open("ibm-25546", &errorCode
);
4174 if(U_FAILURE(errorCode
)) {
4175 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode
));
4179 targetLimit
= target
+100;
4180 ucnv_toUnicode(kr
,&target
,targetLimit
,&source
,source
+uprv_strlen(source
),NULL
,TRUE
,&errorCode
);
4182 if(U_FAILURE(errorCode
)) {
4183 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode
));
4194 /* From Unicode moved to testdata/conversion.txt */
4197 static const uint8_t sampleTextJIS
[] = {
4198 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4199 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4200 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4202 static const uint16_t expectedISO2022JIS
[] = {
4207 static const int32_t toISO2022JISOffs
[]={
4213 static const uint8_t sampleTextJIS7
[] = {
4214 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4215 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4216 0x1b,0x24,0x42,0x21,0x21,
4217 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */
4219 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4221 static const uint16_t expectedISO2022JIS7
[] = {
4229 static const int32_t toISO2022JIS7Offs
[]={
4236 static const uint8_t sampleTextJIS8
[] = {
4237 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4238 0xa1,0xc8,0xd9,/*Katakana Set*/
4241 0xb1,0xc3, /*Katakana Set*/
4242 0x1b,0x24,0x42,0x21,0x21
4244 static const uint16_t expectedISO2022JIS8
[] = {
4246 0xff61, 0xff88, 0xff99,
4251 static const int32_t toISO2022JIS8Offs
[]={
4257 testConvertToU(sampleTextJIS
,sizeof(sampleTextJIS
),expectedISO2022JIS
,
4258 UPRV_LENGTHOF(expectedISO2022JIS
),"JIS", toISO2022JISOffs
,TRUE
);
4259 testConvertToU(sampleTextJIS7
,sizeof(sampleTextJIS7
),expectedISO2022JIS7
,
4260 UPRV_LENGTHOF(expectedISO2022JIS7
),"JIS7", toISO2022JIS7Offs
,TRUE
);
4261 testConvertToU(sampleTextJIS8
,sizeof(sampleTextJIS8
),expectedISO2022JIS8
,
4262 UPRV_LENGTHOF(expectedISO2022JIS8
),"JIS8", toISO2022JIS8Offs
,TRUE
);
4269 ICU
4.4 (ticket
#7314) removes mappings for CNS 11643 planes 3..7
4271 static void TestJitterbug915(){
4272 /* tests for roundtripping of the below sequence
4273 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * /
4274 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4275 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4276 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4277 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4278 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4279 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4281 static const char cSource
[]={
4282 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4283 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4284 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4285 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4286 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4287 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4288 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4289 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4290 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4291 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4292 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4293 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4294 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4295 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4296 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4297 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4298 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4299 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4300 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4301 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4302 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4303 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4304 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4305 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4306 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4307 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4308 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4309 0x37, 0x20, 0x2A, 0x2F
4311 UChar uTarget
[500]={'\0'};
4312 UChar
* utarget
=uTarget
;
4313 UChar
* utargetLimit
=uTarget
+sizeof(uTarget
)/2;
4315 char cTarget
[500]={'\0'};
4316 char* ctarget
=cTarget
;
4317 char* ctargetLimit
=cTarget
+sizeof(cTarget
);
4318 const char* csource
=cSource
;
4319 const char* tempSrc
= cSource
;
4320 UErrorCode err
=U_ZERO_ERROR
;
4322 UConverter
* conv
=ucnv_open("ISO_2022_CN_EXT",&err
);
4323 if(U_FAILURE(err
)) {
4324 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err
));
4327 ucnv_toUnicode(conv
,&utarget
,utargetLimit
,&csource
,csource
+sizeof(cSource
),NULL
,TRUE
,&err
);
4328 if(U_FAILURE(err
)) {
4329 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err
));
4332 utargetLimit
=utarget
;
4334 ucnv_fromUnicode(conv
,&ctarget
,ctargetLimit
,(const UChar
**)&utarget
,utargetLimit
,NULL
,TRUE
,&err
);
4335 if(U_FAILURE(err
)) {
4336 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err
));
4339 ctargetLimit
=ctarget
;
4341 while(ctarget
<ctargetLimit
){
4342 if(*ctarget
!= *tempSrc
){
4343 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget
-cTarget
), *ctarget
,(int)*tempSrc
) ;
4353 TestISO_2022_CN_EXT() {
4355 static const uint16_t in
[]={
4356 /* test Non-BMP code points */
4357 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4358 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4359 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4360 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4361 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4362 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4363 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4364 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4365 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4368 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4369 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4370 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4371 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4372 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4373 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4374 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4375 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4376 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4377 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4378 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4379 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4380 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4381 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4382 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4383 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4384 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4385 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4387 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4391 const UChar
* uSource
;
4392 const UChar
* uSourceLimit
;
4393 const char* cSource
;
4394 const char* cSourceLimit
;
4395 UChar
*uTargetLimit
=NULL
;
4398 const char *cTargetLimit
;
4401 int32_t uBufSize
= 180;
4402 UErrorCode errorCode
=U_ZERO_ERROR
;
4404 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4405 int32_t* myOff
= offsets
;
4406 cnv
=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode
);
4407 if(U_FAILURE(errorCode
)) {
4408 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4412 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4413 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
4414 uSource
= (const UChar
*)in
;
4415 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
4417 cTargetLimit
= cBuf
+uBufSize
*5;
4419 uTargetLimit
= uBuf
+ uBufSize
*5;
4420 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4421 if(U_FAILURE(errorCode
)){
4422 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4426 cSourceLimit
=cTarget
;
4429 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4430 if(U_FAILURE(errorCode
)){
4431 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4434 uSource
= (const UChar
*)in
;
4435 while(uSource
<uSourceLimit
){
4436 if(*test
!=*uSource
){
4437 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
4440 log_verbose(" Got: \\u%04X\n",(int)*test
) ;
4445 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4446 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4447 /*Test for the condition where there is an invalid character*/
4450 static const uint8_t source2
[]={0x0e,0x24,0x053};
4451 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-CN-EXT]");
4463 static const uint16_t in
[]={
4465 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4466 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4467 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4468 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4469 0x0020, 0x0045, 0x004e, 0x0044,
4471 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4472 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4473 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4474 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4475 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4476 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4477 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4478 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4479 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4480 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4481 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4482 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4483 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4484 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4485 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4486 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4487 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4490 const UChar
* uSource
;
4491 const UChar
* uSourceLimit
;
4492 const char* cSource
;
4493 const char* cSourceLimit
;
4494 UChar
*uTargetLimit
=NULL
;
4497 const char *cTargetLimit
;
4500 int32_t uBufSize
= 180;
4501 UErrorCode errorCode
=U_ZERO_ERROR
;
4503 int32_t* offsets
= (int32_t*) malloc(uBufSize
* sizeof(int32_t) * 5);
4504 int32_t* myOff
= offsets
;
4505 cnv
=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode
);
4506 if(U_FAILURE(errorCode
)) {
4507 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode
));
4511 uBuf
= (UChar
*)malloc(uBufSize
* sizeof(UChar
)*5);
4512 cBuf
=(char*)malloc(uBufSize
* sizeof(char) * 10);
4513 uSource
= (const UChar
*)in
;
4514 uSourceLimit
=(const UChar
*)in
+ UPRV_LENGTHOF(in
);
4516 cTargetLimit
= cBuf
+uBufSize
*5;
4518 uTargetLimit
= uBuf
+ uBufSize
*5;
4519 ucnv_fromUnicode( cnv
, &cTarget
, cTargetLimit
,&uSource
,uSourceLimit
,myOff
,TRUE
, &errorCode
);
4520 if(U_FAILURE(errorCode
)){
4521 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4525 cSourceLimit
=cTarget
;
4528 ucnv_toUnicode(cnv
,&uTarget
,uTargetLimit
,&cSource
,cSourceLimit
,myOff
,TRUE
,&errorCode
);
4529 if(U_FAILURE(errorCode
)){
4530 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode
));
4533 uSource
= (const UChar
*)in
;
4534 while(uSource
<uSourceLimit
){
4535 if(*test
!=*uSource
){
4536 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource
,(int)*test
) ;
4539 log_verbose(" Got: \\u%04X\n",(int)*test
) ;
4544 TestGetNextUChar2022(cnv
, cBuf
, cTarget
, in
, "ISO-2022-CN encoding");
4545 TestSmallTargetBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4546 TestSmallSourceBuffer(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4547 TestToAndFromUChars(in
,(const UChar
*)in
+ UPRV_LENGTHOF(in
),cnv
);
4548 TestJitterbug930("csISO2022CN");
4549 /*Test for the condition where there is an invalid character*/
4552 static const uint8_t source2
[]={0x0e,0x24,0x053};
4553 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [ISO-2022-CN]");
4562 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4564 const char * converterName
;
4565 const char * inputText
;
4566 int inputTextLength
;
4569 /* Callback for TestJitterbug6175, should only get called for empty segment errors */
4570 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context
, UConverterToUnicodeArgs
*toArgs
, const char* codeUnits
,
4571 int32_t length
, UConverterCallbackReason reason
, UErrorCode
* err
) {
4572 if (reason
> UCNV_IRREGULAR
) {
4575 if (reason
!= UCNV_IRREGULAR
) {
4576 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4578 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4579 *err
= U_ZERO_ERROR
;
4580 ucnv_cbToUWriteSub(toArgs
,0,err
);
4583 enum { kEmptySegmentToUCharsMax
= 64 };
4584 static void TestJitterbug6175(void) {
4585 static const char iso2022jp_a
[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4586 static const char iso2022kr_a
[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4587 static const char iso2022cn_a
[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4588 static const char iso2022cn_b
[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4589 static const char hzGB2312_a
[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4590 static const EmptySegmentTest emptySegmentTests
[] = {
4591 /* converterName inputText inputTextLength */
4592 { "ISO-2022-JP", iso2022jp_a
, sizeof(iso2022jp_a
) },
4593 { "ISO-2022-KR", iso2022kr_a
, sizeof(iso2022kr_a
) },
4594 { "ISO-2022-CN", iso2022cn_a
, sizeof(iso2022cn_a
) },
4595 { "ISO-2022-CN", iso2022cn_b
, sizeof(iso2022cn_b
) },
4596 { "HZ-GB-2312", hzGB2312_a
, sizeof(hzGB2312_a
) },
4600 const EmptySegmentTest
* testPtr
;
4601 for (testPtr
= emptySegmentTests
; testPtr
->converterName
!= NULL
; ++testPtr
) {
4602 UErrorCode err
= U_ZERO_ERROR
;
4603 UConverter
* cnv
= ucnv_open(testPtr
->converterName
, &err
);
4604 if (U_FAILURE(err
)) {
4605 log_data_err("Unable to open %s converter: %s\n", testPtr
->converterName
, u_errorName(err
));
4608 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_EMPTYSEGMENT
, NULL
, NULL
, NULL
, &err
);
4609 if (U_FAILURE(err
)) {
4610 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr
->converterName
, u_errorName(err
));
4615 UChar toUChars
[kEmptySegmentToUCharsMax
];
4616 UChar
* toUCharsPtr
= toUChars
;
4617 const UChar
* toUCharsLimit
= toUCharsPtr
+ kEmptySegmentToUCharsMax
;
4618 const char * inCharsPtr
= testPtr
->inputText
;
4619 const char * inCharsLimit
= inCharsPtr
+ testPtr
->inputTextLength
;
4620 ucnv_toUnicode(cnv
, &toUCharsPtr
, toUCharsLimit
, &inCharsPtr
, inCharsLimit
, NULL
, TRUE
, &err
);
4627 TestEBCDIC_STATEFUL() {
4629 static const uint8_t in
[]={
4638 /* expected test results */
4639 static const int32_t results
[]={
4640 /* number of bytes read, code point */
4649 static const uint8_t in2
[]={
4655 /* expected test results */
4656 static const int32_t results2
[]={
4657 /* number of bytes read, code point */
4662 const char *source
=(const char *)in
, *limit
=(const char *)in
+sizeof(in
);
4663 UErrorCode errorCode
=U_ZERO_ERROR
;
4664 UConverter
*cnv
=ucnv_open("ibm-930", &errorCode
);
4665 if(U_FAILURE(errorCode
)) {
4666 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode
));
4669 TestNextUChar(cnv
, source
, limit
, results
, "EBCDIC_STATEFUL(ibm-930)");
4671 /* Test the condition when source >= sourceLimit */
4672 TestNextUCharError(cnv
, source
, source
, U_INDEX_OUTOFBOUNDS_ERROR
, "sourceLimit <= source");
4674 /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4676 static const uint8_t source1
[]={0x0f};
4677 TestNextUCharError(cnv
, (const char*)source1
, (const char*)source1
+sizeof(source1
), U_INDEX_OUTOFBOUNDS_ERROR
, "a character is truncated");
4679 /*Test for the condition where there is an invalid character*/
4682 static const uint8_t source2
[]={0x0e, 0x7F, 0xFF};
4683 TestNextUCharError(cnv
, (const char*)source2
, (const char*)source2
+sizeof(source2
), U_ZERO_ERROR
, "an invalid character [EBCDIC STATEFUL]");
4686 source
=(const char*)in2
;
4687 limit
=(const char*)in2
+sizeof(in2
);
4688 TestNextUChar(cnv
,source
,limit
,results2
,"EBCDIC_STATEFUL(ibm-930),seq#2");
4696 static const uint8_t in
[]={
4699 0x81, 0x30, 0x81, 0x30,
4703 0x82, 0x35, 0x8f, 0x33,
4704 0x84, 0x31, 0xa4, 0x39,
4705 0x90, 0x30, 0x81, 0x30,
4706 0xe3, 0x32, 0x9a, 0x35
4709 * Feature removed markus 2000-oct-26
4710 * Only some codepages must match surrogate pairs into supplementary code points -
4711 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4712 * GB 18030 provides direct encodings for supplementary code points, therefore
4713 * it must not combine two single-encoded surrogates into one code point.
4715 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4719 /* expected test results */
4720 static const int32_t results
[]={
4721 /* number of bytes read, code point */
4733 /* Feature removed. See comment above. */
4738 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4739 UErrorCode errorCode
=U_ZERO_ERROR
;
4740 UConverter
*cnv
=ucnv_open("gb18030", &errorCode
);
4741 if(U_FAILURE(errorCode
)) {
4742 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode
));
4745 TestNextUChar(cnv
, (const char *)in
, (const char *)in
+sizeof(in
), results
, "gb18030");
4751 /* LMBCS-1 string */
4752 static const uint8_t pszLMBCS
[]={
4761 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4765 /* Unicode UChar32 equivalents */
4766 static const UChar32 pszUnicode32
[]={
4776 0x00023456, /* code point for surrogate pair */
4780 /* Unicode UChar equivalents */
4781 static const UChar pszUnicode
[]={
4791 0xD84D, /* low surrogate */
4792 0xDC56, /* high surrogate */
4796 /* expected test results */
4797 static const int offsets32
[]={
4798 /* number of bytes read, code point */
4812 /* expected test results */
4813 static const int offsets
[]={
4814 /* number of bytes read, code point */
4832 #define NAME_LMBCS_1 "LMBCS-1"
4833 #define NAME_LMBCS_2 "LMBCS-2"
4836 /* Some basic open/close/property tests on some LMBCS converters */
4839 char expected_subchars
[] = {0x3F}; /* ANSI Question Mark */
4840 char new_subchars
[] = {0x7F}; /* subst char used by SmartSuite..*/
4841 char get_subchars
[1];
4842 const char * get_name
;
4846 int8_t len
= sizeof(get_subchars
);
4848 UErrorCode errorCode
=U_ZERO_ERROR
;
4851 cnv1
=ucnv_open(NAME_LMBCS_1
, &errorCode
);
4852 if(U_FAILURE(errorCode
)) {
4853 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
4856 cnv2
=ucnv_open(NAME_LMBCS_2
, &errorCode
);
4857 if(U_FAILURE(errorCode
)) {
4858 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode
));
4863 get_name
= ucnv_getName (cnv1
, &errorCode
);
4864 if (strcmp(NAME_LMBCS_1
,get_name
)){
4865 log_err("Unexpected converter name: %s\n", get_name
);
4867 get_name
= ucnv_getName (cnv2
, &errorCode
);
4868 if (strcmp(NAME_LMBCS_2
,get_name
)){
4869 log_err("Unexpected converter name: %s\n", get_name
);
4872 /* substitution chars */
4873 ucnv_getSubstChars (cnv1
, get_subchars
, &len
, &errorCode
);
4874 if(U_FAILURE(errorCode
)) {
4875 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode
));
4878 log_err("Unexpected length of sub chars\n");
4880 if (get_subchars
[0] != expected_subchars
[0]){
4881 log_err("Unexpected value of sub chars\n");
4883 ucnv_setSubstChars (cnv2
,new_subchars
, len
, &errorCode
);
4884 if(U_FAILURE(errorCode
)) {
4885 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode
));
4887 ucnv_getSubstChars (cnv2
, get_subchars
, &len
, &errorCode
);
4888 if(U_FAILURE(errorCode
)) {
4889 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode
));
4892 log_err("Unexpected length of sub chars\n");
4894 if (get_subchars
[0] != new_subchars
[0]){
4895 log_err("Unexpected value of sub chars\n");
4902 /* LMBCS to Unicode - offsets */
4904 UErrorCode errorCode
=U_ZERO_ERROR
;
4906 const char * pSource
= (const char *)pszLMBCS
;
4907 const char * sourceLimit
= (const char *)pszLMBCS
+ sizeof(pszLMBCS
);
4909 UChar Out
[sizeof(pszUnicode
) + 1];
4911 UChar
* OutLimit
= Out
+ UPRV_LENGTHOF(pszUnicode
);
4913 int32_t off
[sizeof(offsets
)];
4915 /* last 'offset' in expected results is just the final size.
4916 (Makes other tests easier). Compensate here: */
4918 off
[UPRV_LENGTHOF(offsets
)-1] = sizeof(pszLMBCS
);
4922 cnv
=ucnv_open("lmbcs", &errorCode
); /* use generic name for LMBCS-1 */
4923 if(U_FAILURE(errorCode
)) {
4924 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode
));
4930 ucnv_toUnicode (cnv
,
4940 if (memcmp(off
,offsets
,sizeof(offsets
)))
4942 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4944 if (memcmp(Out
,pszUnicode
,sizeof(pszUnicode
)))
4946 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4951 /* LMBCS to Unicode - getNextUChar */
4952 const char * sourceStart
;
4953 const char *source
=(const char *)pszLMBCS
;
4954 const char *limit
=(const char *)pszLMBCS
+sizeof(pszLMBCS
);
4955 const UChar32
*results
= pszUnicode32
;
4956 const int *off
= offsets32
;
4958 UErrorCode errorCode
=U_ZERO_ERROR
;
4961 cnv
=ucnv_open("LMBCS-1", &errorCode
);
4962 if(U_FAILURE(errorCode
)) {
4963 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
4969 while(source
<limit
) {
4971 uniChar
=ucnv_getNextUChar(cnv
, &source
, source
+ (off
[1] - off
[0]), &errorCode
);
4972 if(U_FAILURE(errorCode
)) {
4973 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode
));
4975 } else if(source
-sourceStart
!= off
[1] - off
[0] || uniChar
!= *results
) {
4976 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4977 uniChar
, (source
-sourceStart
), *results
, *off
);
4986 { /* test locale & optimization group operations: Unicode to LMBCS */
4988 UErrorCode errorCode
=U_ZERO_ERROR
;
4989 UConverter
*cnv16he
= ucnv_open("LMBCS-16,locale=he", &errorCode
);
4990 UConverter
*cnv16jp
= ucnv_open("LMBCS-16,locale=ja_JP", &errorCode
);
4991 UConverter
*cnv01us
= ucnv_open("LMBCS-1,locale=us_EN", &errorCode
);
4992 UChar uniString
[] = {0x0192}; /* Latin Small letter f with hook */
4993 const UChar
* pUniOut
= uniString
;
4994 UChar
* pUniIn
= uniString
;
4995 uint8_t lmbcsString
[4];
4996 const char * pLMBCSOut
= (const char *)lmbcsString
;
4997 char * pLMBCSIn
= (char *)lmbcsString
;
4999 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
5000 ucnv_fromUnicode (cnv16he
,
5001 &pLMBCSIn
, (pLMBCSIn
+ UPRV_LENGTHOF(lmbcsString
)),
5002 &pUniOut
, pUniOut
+ UPRV_LENGTHOF(uniString
),
5003 NULL
, 1, &errorCode
);
5005 if (lmbcsString
[0] != 0x3 || lmbcsString
[1] != 0x83)
5007 log_err("LMBCS-16,locale=he gives unexpected translation\n");
5010 pLMBCSIn
= (char *)lmbcsString
;
5011 pUniOut
= uniString
;
5012 ucnv_fromUnicode (cnv01us
,
5013 &pLMBCSIn
, (const char *)(lmbcsString
+ UPRV_LENGTHOF(lmbcsString
)),
5014 &pUniOut
, pUniOut
+ UPRV_LENGTHOF(uniString
),
5015 NULL
, 1, &errorCode
);
5017 if (lmbcsString
[0] != 0x9F)
5019 log_err("LMBCS-1,locale=US gives unexpected translation\n");
5022 /* single byte char from mbcs char set */
5023 lmbcsString
[0] = 0xAE; /* 1/2 width katakana letter small Yo */
5024 pLMBCSOut
= (const char *)lmbcsString
;
5026 ucnv_toUnicode (cnv16jp
,
5027 &pUniIn
, pUniIn
+ 1,
5028 &pLMBCSOut
, (pLMBCSOut
+ 1),
5029 NULL
, 1, &errorCode
);
5030 if (U_FAILURE(errorCode
) || pLMBCSOut
!= (const char *)lmbcsString
+1 || pUniIn
!= uniString
+1 || uniString
[0] != 0xFF6E)
5032 log_err("Unexpected results from LMBCS-16 single byte char\n");
5034 /* convert to group 1: should be 3 bytes */
5035 pLMBCSIn
= (char *)lmbcsString
;
5036 pUniOut
= uniString
;
5037 ucnv_fromUnicode (cnv01us
,
5038 &pLMBCSIn
, (const char *)(pLMBCSIn
+ 3),
5039 &pUniOut
, pUniOut
+ 1,
5040 NULL
, 1, &errorCode
);
5041 if (U_FAILURE(errorCode
) || pLMBCSIn
!= (const char *)lmbcsString
+3 || pUniOut
!= uniString
+1
5042 || lmbcsString
[0] != 0x10 || lmbcsString
[1] != 0x10 || lmbcsString
[2] != 0xAE)
5044 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5046 pLMBCSOut
= (const char *)lmbcsString
;
5048 ucnv_toUnicode (cnv01us
,
5049 &pUniIn
, pUniIn
+ 1,
5050 &pLMBCSOut
, (const char *)(pLMBCSOut
+ 3),
5051 NULL
, 1, &errorCode
);
5052 if (U_FAILURE(errorCode
) || pLMBCSOut
!= (const char *)lmbcsString
+3 || pUniIn
!= uniString
+1 || uniString
[0] != 0xFF6E)
5054 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5056 pLMBCSIn
= (char *)lmbcsString
;
5057 pUniOut
= uniString
;
5058 ucnv_fromUnicode (cnv16jp
,
5059 &pLMBCSIn
, (const char *)(pLMBCSIn
+ 1),
5060 &pUniOut
, pUniOut
+ 1,
5061 NULL
, 1, &errorCode
);
5062 if (U_FAILURE(errorCode
) || pLMBCSIn
!= (const char *)lmbcsString
+1 || pUniOut
!= uniString
+1 || lmbcsString
[0] != 0xAE)
5064 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5066 ucnv_close(cnv16he
);
5067 ucnv_close(cnv16jp
);
5068 ucnv_close(cnv01us
);
5071 /* Small source buffer testing, LMBCS -> Unicode */
5073 UErrorCode errorCode
=U_ZERO_ERROR
;
5075 const char * pSource
= (const char *)pszLMBCS
;
5076 const char * sourceLimit
= (const char *)pszLMBCS
+ sizeof(pszLMBCS
);
5077 int codepointCount
= 0;
5079 UChar Out
[sizeof(pszUnicode
) + 1];
5081 UChar
* OutLimit
= Out
+ UPRV_LENGTHOF(pszUnicode
);
5084 cnv
= ucnv_open(NAME_LMBCS_1
, &errorCode
);
5085 if(U_FAILURE(errorCode
)) {
5086 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode
));
5091 while ((pSource
< sourceLimit
) && U_SUCCESS (errorCode
))
5093 ucnv_toUnicode (cnv
,
5097 (pSource
+1), /* claim that this is a 1- byte buffer */
5099 FALSE
, /* FALSE means there might be more chars in the next buffer */
5102 if (U_SUCCESS (errorCode
))
5104 if ((pSource
- (const char *)pszLMBCS
) == offsets
[codepointCount
+1])
5106 /* we are on to the next code point: check value */
5108 if (Out
[0] != pszUnicode
[codepointCount
]){
5109 log_err("LMBCS->Uni result %lx should have been %lx \n",
5110 Out
[0], pszUnicode
[codepointCount
]);
5113 pOut
= Out
; /* reset for accumulating next code point */
5119 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode
));
5123 /* limits & surrogate error testing */
5124 char LIn
[sizeof(pszLMBCS
)];
5125 const char * pLIn
= LIn
;
5127 char LOut
[sizeof(pszLMBCS
)];
5128 char * pLOut
= LOut
;
5130 UChar UOut
[sizeof(pszUnicode
)];
5131 UChar
* pUOut
= UOut
;
5133 UChar UIn
[sizeof(pszUnicode
)];
5134 const UChar
* pUIn
= UIn
;
5136 int32_t off
[sizeof(offsets
)];
5139 errorCode
=U_ZERO_ERROR
;
5141 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5143 ucnv_fromUnicode(cnv
, &pLOut
, pLOut
+1, &pUIn
, pUIn
-1, off
, FALSE
, &errorCode
);
5144 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
5146 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode
));
5150 errorCode
=U_ZERO_ERROR
;
5151 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+1,(const char **)&pLIn
,(const char *)(pLIn
-1),off
,FALSE
, &errorCode
);
5152 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
5154 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode
));
5156 errorCode
=U_ZERO_ERROR
;
5158 uniChar
= ucnv_getNextUChar(cnv
, (const char **)&pLIn
, (const char *)(pLIn
-1), &errorCode
);
5159 if (errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
)
5161 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode
));
5163 errorCode
=U_ZERO_ERROR
;
5165 /* 0 byte source request - no error, no pointer movement */
5166 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+1,(const char **)&pLIn
,(const char *)pLIn
,off
,FALSE
, &errorCode
);
5167 ucnv_fromUnicode(cnv
, &pLOut
,pLOut
+1,&pUIn
,pUIn
,off
,FALSE
, &errorCode
);
5168 if(U_FAILURE(errorCode
)) {
5169 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode
));
5171 if ((pUOut
!= UOut
) || (pUIn
!= UIn
) || (pLOut
!= LOut
) || (pLIn
!= LIn
))
5173 log_err("Unexpected pointer move in 0 byte source request \n");
5175 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5176 uniChar
= ucnv_getNextUChar(cnv
, (const char **)&pLIn
, (const char *)pLIn
, &errorCode
);
5177 if (errorCode
!= U_INDEX_OUTOFBOUNDS_ERROR
)
5179 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode
));
5181 if (((uint32_t)uniChar
- 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5183 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5185 errorCode
= U_ZERO_ERROR
;
5187 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5190 ucnv_fromUnicode(cnv
, &pLOut
,pLOut
+offsets
[4],&pUIn
,pUIn
+UPRV_LENGTHOF(pszUnicode
),off
,FALSE
, &errorCode
);
5191 if (errorCode
!= U_BUFFER_OVERFLOW_ERROR
|| pLOut
!= LOut
+ offsets
[4] || pUIn
!= pszUnicode
+4 )
5193 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5196 errorCode
= U_ZERO_ERROR
;
5198 pLIn
= (const char *)pszLMBCS
;
5199 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+4,&pLIn
,(pLIn
+sizeof(pszLMBCS
)),off
,FALSE
, &errorCode
);
5200 if (errorCode
!= U_BUFFER_OVERFLOW_ERROR
|| pUOut
!= UOut
+ 4 || pLIn
!= (const char *)pszLMBCS
+offsets
[4])
5202 log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5205 /* unpaired or chopped LMBCS surrogates */
5207 /* OK high surrogate, Low surrogate is chopped */
5208 LIn
[0] = (char)0x14;
5209 LIn
[1] = (char)0xD8;
5210 LIn
[2] = (char)0x01;
5211 LIn
[3] = (char)0x14;
5212 LIn
[4] = (char)0xDC;
5214 errorCode
= U_ZERO_ERROR
;
5217 ucnv_setToUCallBack(cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
5218 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
5219 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 5)
5221 log_err("Unexpected results on chopped low surrogate\n");
5224 /* chopped at surrogate boundary */
5225 LIn
[0] = (char)0x14;
5226 LIn
[1] = (char)0xD8;
5227 LIn
[2] = (char)0x01;
5229 errorCode
= U_ZERO_ERROR
;
5232 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+3),off
,TRUE
, &errorCode
);
5233 if (UOut
[0] != 0xD801 || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 3)
5235 log_err("Unexpected results on chopped at surrogate boundary \n");
5238 /* unpaired surrogate plus valid Unichar */
5239 LIn
[0] = (char)0x14;
5240 LIn
[1] = (char)0xD8;
5241 LIn
[2] = (char)0x01;
5242 LIn
[3] = (char)0x14;
5243 LIn
[4] = (char)0xC9;
5244 LIn
[5] = (char)0xD0;
5246 errorCode
= U_ZERO_ERROR
;
5249 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+6),off
,TRUE
, &errorCode
);
5250 if (UOut
[0] != 0xD801 || UOut
[1] != 0xC9D0 || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 2 || pLIn
!= LIn
+ 6)
5252 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5255 /* unpaired surrogate plus chopped Unichar */
5256 LIn
[0] = (char)0x14;
5257 LIn
[1] = (char)0xD8;
5258 LIn
[2] = (char)0x01;
5259 LIn
[3] = (char)0x14;
5260 LIn
[4] = (char)0xC9;
5263 errorCode
= U_ZERO_ERROR
;
5266 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
5267 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 5)
5269 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5272 /* unpaired surrogate plus valid non-Unichar */
5273 LIn
[0] = (char)0x14;
5274 LIn
[1] = (char)0xD8;
5275 LIn
[2] = (char)0x01;
5276 LIn
[3] = (char)0x0F;
5277 LIn
[4] = (char)0x3B;
5280 errorCode
= U_ZERO_ERROR
;
5283 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+5),off
,TRUE
, &errorCode
);
5284 if (UOut
[0] != 0xD801 || UOut
[1] != 0x1B || U_FAILURE(errorCode
) || pUOut
!= UOut
+ 2 || pLIn
!= LIn
+ 5)
5286 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5289 /* unpaired surrogate plus chopped non-Unichar */
5290 LIn
[0] = (char)0x14;
5291 LIn
[1] = (char)0xD8;
5292 LIn
[2] = (char)0x01;
5293 LIn
[3] = (char)0x0F;
5296 errorCode
= U_ZERO_ERROR
;
5299 ucnv_toUnicode(cnv
, &pUOut
,pUOut
+UPRV_LENGTHOF(UOut
),(const char **)&pLIn
,(const char *)(pLIn
+4),off
,TRUE
, &errorCode
);
5301 if (UOut
[0] != 0xD801 || errorCode
!= U_TRUNCATED_CHAR_FOUND
|| pUOut
!= UOut
+ 1 || pLIn
!= LIn
+ 4)
5303 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5307 ucnv_close(cnv
); /* final cleanup */
5311 static void TestJitterbug255()
5313 static const uint8_t testBytes
[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5314 const char *testBuffer
= (const char *)testBytes
;
5315 const char *testEnd
= (const char *)testBytes
+ sizeof(testBytes
);
5316 UErrorCode status
= U_ZERO_ERROR
;
5318 UConverter
*cnv
= 0;
5320 cnv
= ucnv_open("shift-jis", &status
);
5321 if (U_FAILURE(status
) || cnv
== 0) {
5322 log_data_err("Failed to open the converter for SJIS.\n");
5325 while (testBuffer
!= testEnd
)
5327 /*result = */ucnv_getNextUChar (cnv
, &testBuffer
, testEnd
, &status
);
5328 if (U_FAILURE(status
))
5330 log_err("Failed to convert the next UChar for SJIS.\n");
5337 static void TestEBCDICUS4XML()
5339 UChar unicodes_x
[] = {0x0000, 0x0000, 0x0000, 0x0000};
5340 static const UChar toUnicodeMaps_x
[] = {0x000A, 0x000A, 0x000D, 0x0000};
5341 static const char fromUnicodeMaps_x
[] = {0x25, 0x25, 0x0D, 0x00};
5342 static const char newLines_x
[] = {0x25, 0x15, 0x0D, 0x00};
5343 char target_x
[] = {0x00, 0x00, 0x00, 0x00};
5344 UChar
*unicodes
= unicodes_x
;
5345 const UChar
*toUnicodeMaps
= toUnicodeMaps_x
;
5346 char *target
= target_x
;
5347 const char* fromUnicodeMaps
= fromUnicodeMaps_x
, *newLines
= newLines_x
;
5348 UErrorCode status
= U_ZERO_ERROR
;
5349 UConverter
*cnv
= 0;
5351 cnv
= ucnv_open("ebcdic-xml-us", &status
);
5352 if (U_FAILURE(status
) || cnv
== 0) {
5353 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5356 ucnv_toUnicode(cnv
, &unicodes
, unicodes
+3, (const char**)&newLines
, newLines
+3, NULL
, TRUE
, &status
);
5357 if (U_FAILURE(status
) || memcmp(unicodes_x
, toUnicodeMaps
, sizeof(UChar
)*3) != 0) {
5358 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5359 u_errorName(status
));
5360 printUSeqErr(unicodes_x
, 3);
5361 printUSeqErr(toUnicodeMaps
, 3);
5363 status
= U_ZERO_ERROR
;
5364 ucnv_fromUnicode(cnv
, &target
, target
+3, (const UChar
**)&toUnicodeMaps
, toUnicodeMaps
+3, NULL
, TRUE
, &status
);
5365 if (U_FAILURE(status
) || memcmp(target_x
, fromUnicodeMaps
, sizeof(char)*3) != 0) {
5366 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5367 u_errorName(status
));
5368 printSeqErr((const unsigned char*)target_x
, 3);
5369 printSeqErr((const unsigned char*)fromUnicodeMaps
, 3);
5373 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5375 #if !UCONFIG_NO_COLLATION
5377 static void TestJitterbug981(){
5379 int32_t rules_length
, target_cap
, bytes_needed
, buff_size
;
5380 UErrorCode status
= U_ZERO_ERROR
;
5381 UConverter
*utf8cnv
;
5382 UCollator
* myCollator
;
5385 utf8cnv
= ucnv_open ("utf8", &status
);
5386 if(U_FAILURE(status
)){
5387 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status
));
5390 myCollator
= ucol_open("zh", &status
);
5391 if(U_FAILURE(status
)){
5392 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status
));
5393 ucnv_close(utf8cnv
);
5397 rules
= ucol_getRules(myCollator
, &rules_length
);
5398 if(rules_length
== 0) {
5399 log_data_err("missing zh tailoring rule string\n");
5400 ucol_close(myCollator
);
5401 ucnv_close(utf8cnv
);
5404 buff_size
= rules_length
* ucnv_getMaxCharSize(utf8cnv
);
5405 buff
= malloc(buff_size
);
5409 ucnv_reset(utf8cnv
);
5410 status
= U_ZERO_ERROR
;
5411 if(target_cap
>= buff_size
) {
5412 log_err("wanted %d bytes, only %d available\n", target_cap
, buff_size
);
5415 bytes_needed
= ucnv_fromUChars(utf8cnv
, buff
, target_cap
,
5416 rules
, rules_length
, &status
);
5417 target_cap
= (bytes_needed
> target_cap
) ? bytes_needed
: target_cap
+1;
5418 if(numNeeded
!=0 && numNeeded
!= bytes_needed
){
5419 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5422 numNeeded
= bytes_needed
;
5423 } while (status
== U_BUFFER_OVERFLOW_ERROR
);
5424 ucol_close(myCollator
);
5425 ucnv_close(utf8cnv
);
5431 #if !UCONFIG_NO_LEGACY_CONVERSION
5432 static void TestJitterbug1293(){
5433 static const UChar src
[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5435 UErrorCode status
= U_ZERO_ERROR
;
5436 UConverter
* conv
=NULL
;
5437 int32_t target_cap
, bytes_needed
, numNeeded
= 0;
5438 conv
= ucnv_open("shift-jis",&status
);
5439 if(U_FAILURE(status
)){
5440 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status
));
5446 bytes_needed
= ucnv_fromUChars(conv
,target
,256,src
,u_strlen(src
),&status
);
5447 target_cap
= (bytes_needed
> target_cap
) ? bytes_needed
: target_cap
+1;
5448 if(numNeeded
!=0 && numNeeded
!= bytes_needed
){
5449 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5451 numNeeded
= bytes_needed
;
5452 } while (status
== U_BUFFER_OVERFLOW_ERROR
);
5453 if(U_FAILURE(status
)){
5454 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status
));
5461 static void TestJB5275_1(){
5463 static const char* data
= "\x3B\xB3\x0A" /* Easy characters */
5464 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5465 /* Switch script: */
5466 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5467 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5468 "\xEF\x40\x3B\xB3\x0A";
5469 static const UChar expected
[] ={
5470 0x003b, 0x0a15, 0x000a, /* Easy characters */
5471 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5472 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5473 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5474 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5477 UErrorCode status
= U_ZERO_ERROR
;
5478 UConverter
* conv
= ucnv_open("iscii-gur", &status
);
5479 UChar dest
[100] = {'\0'};
5480 UChar
* target
= dest
;
5481 UChar
* targetLimit
= dest
+100;
5482 const char* source
= data
;
5483 const char* sourceLimit
= data
+strlen(data
);
5484 const UChar
* exp
= expected
;
5486 if (U_FAILURE(status
)) {
5487 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status
));
5491 log_verbose("Testing switching back to default script when new line is encountered.\n");
5492 ucnv_toUnicode(conv
, &target
, targetLimit
, &source
, sourceLimit
, NULL
, TRUE
, &status
);
5493 if(U_FAILURE(status
)){
5494 log_err("conversion failed: %s \n", u_errorName(status
));
5496 targetLimit
= target
;
5498 printUSeq(target
, targetLimit
-target
);
5499 while(target
<targetLimit
){
5501 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp
, *target
);
5509 static void TestJB5275(){
5510 static const char* data
=
5511 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */
5512 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */
5513 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */
5514 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5515 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */
5516 "\xEF\x48\x38\xB3\x0A" /* Kannada test */
5517 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */
5518 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */
5519 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */
5520 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */;
5521 static const UChar expected
[] ={
5522 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5523 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */
5524 0x0038, 0x0C95, 0x000A, /* Kannada test */
5525 0x0039, 0x0D15, 0x000A, /* Malayalam test */
5526 0x003A, 0x0A95, 0x000A, /* Gujarati test */
5527 0x003B, 0x0A15, 0x000A, /* Punjabi test */
5530 UErrorCode status
= U_ZERO_ERROR
;
5531 UConverter
* conv
= ucnv_open("iscii", &status
);
5532 UChar dest
[100] = {'\0'};
5533 UChar
* target
= dest
;
5534 UChar
* targetLimit
= dest
+100;
5535 const char* source
= data
;
5536 const char* sourceLimit
= data
+strlen(data
);
5537 const UChar
* exp
= expected
;
5538 ucnv_toUnicode(conv
, &target
, targetLimit
, &source
, sourceLimit
, NULL
, TRUE
, &status
);
5539 if(U_FAILURE(status
)){
5540 log_data_err("conversion failed: %s \n", u_errorName(status
));
5542 targetLimit
= target
;
5545 printUSeq(target
, targetLimit
-target
);
5547 while(target
<targetLimit
){
5549 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp
, *target
);
5558 TestIsFixedWidth() {
5559 UErrorCode status
= U_ZERO_ERROR
;
5560 UConverter
*cnv
= NULL
;
5563 const char *fixedWidth
[] = {
5566 "ibm-5478_P100-1995"
5569 const char *notFixedWidth
[] = {
5576 for (i
= 0; i
< UPRV_LENGTHOF(fixedWidth
); i
++) {
5577 cnv
= ucnv_open(fixedWidth
[i
], &status
);
5578 if (cnv
== NULL
|| U_FAILURE(status
)) {
5579 log_data_err("Error open converter: %s - %s \n", fixedWidth
[i
], u_errorName(status
));
5583 if (!ucnv_isFixedWidth(cnv
, &status
)) {
5584 log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth
[i
]);
5589 for (i
= 0; i
< UPRV_LENGTHOF(notFixedWidth
); i
++) {
5590 cnv
= ucnv_open(notFixedWidth
[i
], &status
);
5591 if (cnv
== NULL
|| U_FAILURE(status
)) {
5592 log_data_err("Error open converter: %s - %s \n", notFixedWidth
[i
], u_errorName(status
));
5596 if (ucnv_isFixedWidth(cnv
, &status
)) {
5597 log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth
[i
]);