1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2003-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: convtest.cpp
12 * tab size: 8 (not used)
15 * created on: 2003jul15
16 * created by: Markus W. Scherer
18 * Test file for data-driven conversion tests.
21 #include "unicode/utypes.h"
23 #if !UCONFIG_NO_LEGACY_CONVERSION
25 * Note: Turning off all of convtest.cpp if !UCONFIG_NO_LEGACY_CONVERSION
26 * is slightly unnecessary - it removes tests for Unicode charsets
27 * like UTF-8 that should work.
28 * However, there is no easy way for the test to detect whether a test case
29 * is for a Unicode charset, so it would be difficult to only exclude those.
30 * Also, regular testing of ICU is done with all modules on, therefore
31 * not testing conversion for a custom configuration like this should be ok.
34 #include "unicode/ucnv.h"
35 #include "unicode/unistr.h"
36 #include "unicode/parsepos.h"
37 #include "unicode/uniset.h"
38 #include "unicode/ustring.h"
39 #include "unicode/ures.h"
42 #include "unicode/tstdtmod.h"
47 // characters used in test data for callbacks
54 ConversionTest::ConversionTest() {
55 UErrorCode errorCode
=U_ZERO_ERROR
;
56 utf8Cnv
=ucnv_open("UTF-8", &errorCode
);
57 ucnv_setToUCallBack(utf8Cnv
, UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
58 if(U_FAILURE(errorCode
)) {
59 errln("unable to open UTF-8 converter");
63 ConversionTest::~ConversionTest() {
68 ConversionTest::runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char * /*par*/) {
69 if (exec
) logln("TestSuite ConversionTest: ");
71 #if !UCONFIG_NO_FILE_IO
72 case 0: name
="TestToUnicode"; if (exec
) TestToUnicode(); break;
73 case 1: name
="TestFromUnicode"; if (exec
) TestFromUnicode(); break;
74 case 2: name
="TestGetUnicodeSet"; if (exec
) TestGetUnicodeSet(); break;
75 case 3: name
="TestDefaultIgnorableCallback"; if (exec
) TestDefaultIgnorableCallback(); break;
80 case 3: name
="skip"; break;
82 case 4: name
="TestGetUnicodeSet2"; if (exec
) TestGetUnicodeSet2(); break;
83 default: name
=""; break; //needed to end loop
87 // test data interface ----------------------------------------------------- ***
90 ConversionTest::TestToUnicode() {
92 char charset
[100], cbopt
[4];
94 UnicodeString s
, unicode
;
95 int32_t offsetsLength
;
96 UConverterToUCallback callback
;
98 TestDataModule
*dataModule
;
100 const DataMap
*testCase
;
101 UErrorCode errorCode
;
104 errorCode
=U_ZERO_ERROR
;
105 dataModule
=TestDataModule::getTestDataModule("conversion", *this, errorCode
);
106 if(U_SUCCESS(errorCode
)) {
107 testData
=dataModule
->createTestData("toUnicode", errorCode
);
108 if(U_SUCCESS(errorCode
)) {
109 for(i
=0; testData
->nextCase(testCase
, errorCode
); ++i
) {
110 if(U_FAILURE(errorCode
)) {
111 errln("error retrieving conversion/toUnicode test case %d - %s",
112 i
, u_errorName(errorCode
));
113 errorCode
=U_ZERO_ERROR
;
119 s
=testCase
->getString("charset", errorCode
);
120 s
.extract(0, 0x7fffffff, charset
, sizeof(charset
), "");
123 cc
.bytes
=testCase
->getBinary(cc
.bytesLength
, "bytes", errorCode
);
124 unicode
=testCase
->getString("unicode", errorCode
);
125 cc
.unicode
=unicode
.getBuffer();
126 cc
.unicodeLength
=unicode
.length();
129 cc
.offsets
=testCase
->getIntVector(offsetsLength
, "offsets", errorCode
);
130 if(offsetsLength
==0) {
132 } else if(offsetsLength
!=unicode
.length()) {
133 errln("toUnicode[%d] unicode[%d] and offsets[%d] must have the same length",
134 i
, unicode
.length(), offsetsLength
);
135 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
138 cc
.finalFlush
= 0!=testCase
->getInt28("flush", errorCode
);
139 cc
.fallbacks
= 0!=testCase
->getInt28("fallbacks", errorCode
);
141 s
=testCase
->getString("errorCode", errorCode
);
142 if(s
==UNICODE_STRING("invalid", 7)) {
143 cc
.outErrorCode
=U_INVALID_CHAR_FOUND
;
144 } else if(s
==UNICODE_STRING("illegal", 7)) {
145 cc
.outErrorCode
=U_ILLEGAL_CHAR_FOUND
;
146 } else if(s
==UNICODE_STRING("truncated", 9)) {
147 cc
.outErrorCode
=U_TRUNCATED_CHAR_FOUND
;
148 } else if(s
==UNICODE_STRING("illesc", 6)) {
149 cc
.outErrorCode
=U_ILLEGAL_ESCAPE_SEQUENCE
;
150 } else if(s
==UNICODE_STRING("unsuppesc", 9)) {
151 cc
.outErrorCode
=U_UNSUPPORTED_ESCAPE_SEQUENCE
;
153 cc
.outErrorCode
=U_ZERO_ERROR
;
156 s
=testCase
->getString("callback", errorCode
);
157 s
.extract(0, 0x7fffffff, cbopt
, sizeof(cbopt
), "");
161 callback
=UCNV_TO_U_CALLBACK_SUBSTITUTE
;
164 callback
=UCNV_TO_U_CALLBACK_SKIP
;
167 callback
=UCNV_TO_U_CALLBACK_STOP
;
170 callback
=UCNV_TO_U_CALLBACK_ESCAPE
;
176 option
=callback
==NULL
? cbopt
: cbopt
+1;
181 cc
.invalidChars
=testCase
->getBinary(cc
.invalidLength
, "invalidChars", errorCode
);
183 if(U_FAILURE(errorCode
)) {
184 errln("error parsing conversion/toUnicode test case %d - %s",
185 i
, u_errorName(errorCode
));
186 errorCode
=U_ZERO_ERROR
;
188 logln("TestToUnicode[%d] %s", i
, charset
);
189 ToUnicodeCase(cc
, callback
, option
);
197 dataerrln("Could not load test conversion data");
202 ConversionTest::TestFromUnicode() {
204 char charset
[100], cbopt
[4];
206 UnicodeString s
, unicode
, invalidUChars
;
207 int32_t offsetsLength
, index
;
208 UConverterFromUCallback callback
;
210 TestDataModule
*dataModule
;
212 const DataMap
*testCase
;
214 UErrorCode errorCode
;
217 errorCode
=U_ZERO_ERROR
;
218 dataModule
=TestDataModule::getTestDataModule("conversion", *this, errorCode
);
219 if(U_SUCCESS(errorCode
)) {
220 testData
=dataModule
->createTestData("fromUnicode", errorCode
);
221 if(U_SUCCESS(errorCode
)) {
222 for(i
=0; testData
->nextCase(testCase
, errorCode
); ++i
) {
223 if(U_FAILURE(errorCode
)) {
224 errln("error retrieving conversion/fromUnicode test case %d - %s",
225 i
, u_errorName(errorCode
));
226 errorCode
=U_ZERO_ERROR
;
232 s
=testCase
->getString("charset", errorCode
);
233 s
.extract(0, 0x7fffffff, charset
, sizeof(charset
), "");
236 unicode
=testCase
->getString("unicode", errorCode
);
237 cc
.unicode
=unicode
.getBuffer();
238 cc
.unicodeLength
=unicode
.length();
239 cc
.bytes
=testCase
->getBinary(cc
.bytesLength
, "bytes", errorCode
);
242 cc
.offsets
=testCase
->getIntVector(offsetsLength
, "offsets", errorCode
);
243 if(offsetsLength
==0) {
245 } else if(offsetsLength
!=cc
.bytesLength
) {
246 errln("fromUnicode[%d] bytes[%d] and offsets[%d] must have the same length",
247 i
, cc
.bytesLength
, offsetsLength
);
248 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
251 cc
.finalFlush
= 0!=testCase
->getInt28("flush", errorCode
);
252 cc
.fallbacks
= 0!=testCase
->getInt28("fallbacks", errorCode
);
254 s
=testCase
->getString("errorCode", errorCode
);
255 if(s
==UNICODE_STRING("invalid", 7)) {
256 cc
.outErrorCode
=U_INVALID_CHAR_FOUND
;
257 } else if(s
==UNICODE_STRING("illegal", 7)) {
258 cc
.outErrorCode
=U_ILLEGAL_CHAR_FOUND
;
259 } else if(s
==UNICODE_STRING("truncated", 9)) {
260 cc
.outErrorCode
=U_TRUNCATED_CHAR_FOUND
;
262 cc
.outErrorCode
=U_ZERO_ERROR
;
265 s
=testCase
->getString("callback", errorCode
);
266 cc
.setSub
=0; // default: no subchar
268 if((index
=s
.indexOf((UChar
)0))>0) {
269 // read NUL-separated subchar first, if any
270 // copy the subchar from Latin-1 characters
271 // start after the NUL
272 p
=s
.getTerminatedBuffer();
275 length
=s
.length()-length
;
276 if(length
<=0 || length
>=(int32_t)sizeof(cc
.subchar
)) {
277 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
281 for(j
=0; j
<length
; ++j
) {
282 cc
.subchar
[j
]=(char)p
[j
];
284 // NUL-terminate the subchar
289 // remove the NUL and subchar from s
291 } else if((index
=s
.indexOf((UChar
)0x3d))>0) /* '=' */ {
292 // read a substitution string, separated by an equal sign
293 p
=s
.getBuffer()+index
+1;
294 length
=s
.length()-(index
+1);
295 if(length
<0 || length
>=UPRV_LENGTHOF(cc
.subString
)) {
296 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
298 u_memcpy(cc
.subString
, p
, length
);
299 // NUL-terminate the subString
300 cc
.subString
[length
]=0;
304 // remove the equal sign and subString from s
308 s
.extract(0, 0x7fffffff, cbopt
, sizeof(cbopt
), "");
312 callback
=UCNV_FROM_U_CALLBACK_SUBSTITUTE
;
315 callback
=UCNV_FROM_U_CALLBACK_SKIP
;
318 callback
=UCNV_FROM_U_CALLBACK_STOP
;
321 callback
=UCNV_FROM_U_CALLBACK_ESCAPE
;
327 option
=callback
==NULL
? cbopt
: cbopt
+1;
332 invalidUChars
=testCase
->getString("invalidUChars", errorCode
);
333 cc
.invalidUChars
=invalidUChars
.getBuffer();
334 cc
.invalidLength
=invalidUChars
.length();
336 if(U_FAILURE(errorCode
)) {
337 errln("error parsing conversion/fromUnicode test case %d - %s",
338 i
, u_errorName(errorCode
));
339 errorCode
=U_ZERO_ERROR
;
341 logln("TestFromUnicode[%d] %s", i
, charset
);
342 FromUnicodeCase(cc
, callback
, option
);
350 dataerrln("Could not load test conversion data");
354 static const UChar ellipsis
[]={ 0x2e, 0x2e, 0x2e };
357 ConversionTest::TestGetUnicodeSet() {
359 UnicodeString s
, map
, mapnot
;
363 UnicodeSet cnvSet
, mapSet
, mapnotSet
, diffSet
;
364 UnicodeSet
*cnvSetPtr
= &cnvSet
;
365 LocalUConverterPointer cnv
;
367 TestDataModule
*dataModule
;
369 const DataMap
*testCase
;
370 UErrorCode errorCode
;
373 errorCode
=U_ZERO_ERROR
;
374 dataModule
=TestDataModule::getTestDataModule("conversion", *this, errorCode
);
375 if(U_SUCCESS(errorCode
)) {
376 testData
=dataModule
->createTestData("getUnicodeSet", errorCode
);
377 if(U_SUCCESS(errorCode
)) {
378 for(i
=0; testData
->nextCase(testCase
, errorCode
); ++i
) {
379 if(U_FAILURE(errorCode
)) {
380 errln("error retrieving conversion/getUnicodeSet test case %d - %s",
381 i
, u_errorName(errorCode
));
382 errorCode
=U_ZERO_ERROR
;
386 s
=testCase
->getString("charset", errorCode
);
387 s
.extract(0, 0x7fffffff, charset
, sizeof(charset
), "");
389 map
=testCase
->getString("map", errorCode
);
390 mapnot
=testCase
->getString("mapnot", errorCode
);
392 which
=testCase
->getInt28("which", errorCode
);
394 if(U_FAILURE(errorCode
)) {
395 errln("error parsing conversion/getUnicodeSet test case %d - %s",
396 i
, u_errorName(errorCode
));
397 errorCode
=U_ZERO_ERROR
;
401 // test this test case
406 mapSet
.applyPattern(map
, pos
, 0, NULL
, errorCode
);
407 if(U_FAILURE(errorCode
) || pos
.getIndex()!=map
.length()) {
408 errln("error creating the map set for conversion/getUnicodeSet test case %d - %s\n"
409 " error index %d index %d U+%04x",
410 i
, u_errorName(errorCode
), pos
.getErrorIndex(), pos
.getIndex(), map
.char32At(pos
.getIndex()));
411 errorCode
=U_ZERO_ERROR
;
416 mapnotSet
.applyPattern(mapnot
, pos
, 0, NULL
, errorCode
);
417 if(U_FAILURE(errorCode
) || pos
.getIndex()!=mapnot
.length()) {
418 errln("error creating the mapnot set for conversion/getUnicodeSet test case %d - %s\n"
419 " error index %d index %d U+%04x",
420 i
, u_errorName(errorCode
), pos
.getErrorIndex(), pos
.getIndex(), mapnot
.char32At(pos
.getIndex()));
421 errorCode
=U_ZERO_ERROR
;
425 logln("TestGetUnicodeSet[%d] %s", i
, charset
);
427 cnv
.adoptInstead(cnv_open(charset
, errorCode
));
428 if(U_FAILURE(errorCode
)) {
429 errcheckln(errorCode
, "error opening \"%s\" for conversion/getUnicodeSet test case %d - %s",
430 charset
, i
, u_errorName(errorCode
));
431 errorCode
=U_ZERO_ERROR
;
435 ucnv_getUnicodeSet(cnv
.getAlias(), cnvSetPtr
->toUSet(), (UConverterUnicodeSet
)which
, &errorCode
);
437 if(U_FAILURE(errorCode
)) {
438 errln("error in ucnv_getUnicodeSet(\"%s\") for conversion/getUnicodeSet test case %d - %s",
439 charset
, i
, u_errorName(errorCode
));
440 errorCode
=U_ZERO_ERROR
;
444 // are there items that must be in cnvSet but are not?
445 (diffSet
=mapSet
).removeAll(cnvSet
);
446 if(!diffSet
.isEmpty()) {
447 diffSet
.toPattern(s
, TRUE
);
449 s
.replace(100, 0x7fffffff, ellipsis
, UPRV_LENGTHOF(ellipsis
));
451 errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d",
456 // are there items that must not be in cnvSet but are?
457 (diffSet
=mapnotSet
).retainAll(cnvSet
);
458 if(!diffSet
.isEmpty()) {
459 diffSet
.toPattern(s
, TRUE
);
461 s
.replace(100, 0x7fffffff, ellipsis
, UPRV_LENGTHOF(ellipsis
));
463 errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test case %d",
473 dataerrln("Could not load test conversion data");
478 static void U_CALLCONV
479 getUnicodeSetCallback(const void *context
,
480 UConverterFromUnicodeArgs
* /*fromUArgs*/,
481 const UChar
* /*codeUnits*/,
484 UConverterCallbackReason reason
,
485 UErrorCode
*pErrorCode
) {
486 if(reason
<=UCNV_IRREGULAR
) {
487 ((UnicodeSet
*)context
)->remove(codePoint
); // the converter cannot convert this code point
488 *pErrorCode
=U_ZERO_ERROR
; // skip
489 } // else ignore the reset, close and clone calls.
493 // Compare ucnv_getUnicodeSet() with the set of characters that can be converted.
495 ConversionTest::TestGetUnicodeSet2() {
496 // Build a string with all code points.
500 cpLimit
=s0Length
=0x10000; // BMP only
503 s0Length
=0x10000+0x200000; // BMP + surrogate pairs
505 UChar
*s0
=new UChar
[s0Length
];
513 for(c
=0; c
<=0xd7ff; ++c
) {
517 for(c
=0xdc00; c
<=0xdfff; ++c
) {
521 // (after trails so that there is not even one surrogate pair in between)
522 for(c
=0xd800; c
<=0xdbff; ++c
) {
526 for(c
=0xe000; c
<=0xffff; ++c
) {
529 // supplementary code points = surrogate pairs
530 if(cpLimit
==0x110000) {
531 for(c
=0xd800; c
<=0xdbff; ++c
) {
532 for(c2
=0xdc00; c2
<=0xdfff; ++c2
) {
539 static const char *const cnvNames
[]={
547 "ibm-1390", // EBCDIC_STATEFUL table
548 "ibm-16684", // DBCS-only extension table based on EBCDIC_STATEFUL table
556 LocalUConverterPointer cnv
;
559 for(i
=0; i
<UPRV_LENGTHOF(cnvNames
); ++i
) {
560 UErrorCode errorCode
=U_ZERO_ERROR
;
561 cnv
.adoptInstead(cnv_open(cnvNames
[i
], errorCode
));
562 if(U_FAILURE(errorCode
)) {
563 errcheckln(errorCode
, "failed to open converter %s - %s", cnvNames
[i
], u_errorName(errorCode
));
567 ucnv_setFromUCallBack(cnv
.getAlias(), getUnicodeSetCallback
, &expected
, NULL
, NULL
, &errorCode
);
568 if(U_FAILURE(errorCode
)) {
569 errln("failed to set the callback on converter %s - %s", cnvNames
[i
], u_errorName(errorCode
));
572 UConverterUnicodeSet which
;
573 for(which
=UCNV_ROUNDTRIP_SET
; which
<UCNV_SET_COUNT
; which
=(UConverterUnicodeSet
)((int)which
+1)) {
574 if(which
==UCNV_ROUNDTRIP_AND_FALLBACK_SET
) {
575 ucnv_setFallback(cnv
.getAlias(), TRUE
);
577 expected
.add(0, cpLimit
-1);
582 flush
=(UBool
)(s
==s0
+s0Length
);
583 ucnv_fromUnicode(cnv
.getAlias(), &t
, buffer
+sizeof(buffer
), (const UChar
**)&s
, s0
+s0Length
, NULL
, flush
, &errorCode
);
584 if(U_FAILURE(errorCode
)) {
585 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
) {
586 errorCode
=U_ZERO_ERROR
;
589 break; // unexpected error, should not occur
594 ucnv_getUnicodeSet(cnv
.getAlias(), set
.toUSet(), which
, &errorCode
);
595 if(cpLimit
<0x110000) {
596 set
.remove(cpLimit
, 0x10ffff);
598 if(which
==UCNV_ROUNDTRIP_SET
) {
599 // ignore PUA code points because they will be converted even if they
600 // are fallbacks and when other fallbacks are turned off,
601 // but ucnv_getUnicodeSet(UCNV_ROUNDTRIP_SET) delivers true roundtrips
602 expected
.remove(0xe000, 0xf8ff);
603 expected
.remove(0xf0000, 0xffffd);
604 expected
.remove(0x100000, 0x10fffd);
605 set
.remove(0xe000, 0xf8ff);
606 set
.remove(0xf0000, 0xffffd);
607 set
.remove(0x100000, 0x10fffd);
610 // First try to see if we have different sets because ucnv_getUnicodeSet()
611 // added strings: The above conversion method does not tell us what strings might be convertible.
612 // Remove strings from the set and compare again.
613 // Unfortunately, there are no good, direct set methods for finding out whether there are strings
614 // in the set, nor for enumerating or removing just them.
615 // Intersect all code points with the set. The intersection will not contain strings.
616 UnicodeSet
temp(0, 0x10ffff);
624 // are there items that must be in the set but are not?
625 (diffSet
=expected
).removeAll(set
);
626 if(!diffSet
.isEmpty()) {
627 diffSet
.toPattern(out
, TRUE
);
628 if(out
.length()>100) {
629 out
.replace(100, 0x7fffffff, ellipsis
, UPRV_LENGTHOF(ellipsis
));
631 errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - which set: %d",
636 // are there items that must not be in the set but are?
637 (diffSet
=set
).removeAll(expected
);
638 if(!diffSet
.isEmpty()) {
639 diffSet
.toPattern(out
, TRUE
);
640 if(out
.length()>100) {
641 out
.replace(100, 0x7fffffff, ellipsis
, UPRV_LENGTHOF(ellipsis
));
643 errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - which set: %d",
654 // Test all codepoints which has the default ignorable Unicode property are ignored if they have no mapping
655 // If there are any failures, the hard coded list (IS_DEFAULT_IGNORABLE_CODE_POINT) in ucnv_err.c should be updated
657 ConversionTest::TestDefaultIgnorableCallback() {
658 UErrorCode status
= U_ZERO_ERROR
;
659 const char *cnv_name
= "euc-jp-2007";
660 const char *pattern_ignorable
= "[:Default_Ignorable_Code_Point:]";
661 const char *pattern_not_ignorable
= "[:^Default_Ignorable_Code_Point:]";
663 UnicodeSet
*set_ignorable
= new UnicodeSet(pattern_ignorable
, status
);
664 if (U_FAILURE(status
)) {
665 dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_ignorable
, u_errorName(status
));
669 UnicodeSet
*set_not_ignorable
= new UnicodeSet(pattern_not_ignorable
, status
);
670 if (U_FAILURE(status
)) {
671 dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_not_ignorable
, u_errorName(status
));
675 UConverter
*cnv
= cnv_open(cnv_name
, status
);
676 if (U_FAILURE(status
)) {
677 dataerrln("Unable to open converter: %s - %s\n", cnv_name
, u_errorName(status
));
681 // set callback for the converter
682 ucnv_setFromUCallBack(cnv
, UCNV_FROM_U_CALLBACK_SUBSTITUTE
, NULL
, NULL
, NULL
, &status
);
686 int32_t outputLength
;
688 // test default ignorables are ignored
689 int size
= set_ignorable
->size();
690 for (int i
= 0; i
< size
; i
++) {
691 status
= U_ZERO_ERROR
;
694 input
[0] = set_ignorable
->charAt(i
);
696 outputLength
= ucnv_fromUChars(cnv
, output
, 10, UnicodeString::fromUTF32(input
, 1).getTerminatedBuffer(), -1, &status
);
697 if (U_FAILURE(status
) || outputLength
!= 0) {
698 errln("Ignorable code point: U+%04X not skipped as expected - %s", input
[0], u_errorName(status
));
702 // test non-ignorables are not ignored
703 size
= set_not_ignorable
->size();
704 for (int i
= 0; i
< size
; i
++) {
705 status
= U_ZERO_ERROR
;
708 input
[0] = set_not_ignorable
->charAt(i
);
714 outputLength
= ucnv_fromUChars(cnv
, output
, 10, UnicodeString::fromUTF32(input
, 1).getTerminatedBuffer(), -1, &status
);
715 if (U_FAILURE(status
) || outputLength
<= 0) {
716 errln("Non-ignorable code point: U+%04X skipped unexpectedly - %s", input
[0], u_errorName(status
));
721 delete set_not_ignorable
;
722 delete set_ignorable
;
725 // open testdata or ICU data converter ------------------------------------- ***
728 ConversionTest::cnv_open(const char *name
, UErrorCode
&errorCode
) {
729 if(name
!=NULL
&& *name
=='+') {
730 // Converter names that start with '+' are ignored in ICU4J tests.
733 if(name
!=NULL
&& *name
=='*') {
734 /* loadTestData(): set the data directory */
735 return ucnv_openPackage(loadTestData(errorCode
), name
+1, &errorCode
);
737 return ucnv_open(name
, &errorCode
);
741 // output helpers ---------------------------------------------------------- ***
744 hexDigit(uint8_t digit
) {
745 return digit
<=9 ? (char)('0'+digit
) : (char)('a'-10+digit
);
749 printBytes(const uint8_t *bytes
, int32_t length
, char *out
) {
755 *out
++=hexDigit((uint8_t)(b
>>4));
756 *out
++=hexDigit((uint8_t)(b
&0xf));
763 *out
++=hexDigit((uint8_t)(b
>>4));
764 *out
++=hexDigit((uint8_t)(b
&0xf));
771 printUnicode(const UChar
*unicode
, int32_t length
, char *out
) {
775 for(i
=0; i
<length
;) {
779 U16_NEXT(unicode
, i
, length
, c
);
785 *out
++=hexDigit((uint8_t)((c
>>16)&0xf));
787 *out
++=hexDigit((uint8_t)((c
>>12)&0xf));
788 *out
++=hexDigit((uint8_t)((c
>>8)&0xf));
789 *out
++=hexDigit((uint8_t)((c
>>4)&0xf));
790 *out
++=hexDigit((uint8_t)(c
&0xf));
797 printOffsets(const int32_t *offsets
, int32_t length
, char *out
) {
804 for(i
=0; i
<length
; ++i
) {
810 // print all offsets with 2 characters each (-x, -9..99, xx)
816 *out
++=(char)('0'-o
);
818 *out
++=(d
=o
/10)==0 ? ' ' : (char)('0'+d
);
819 *out
++=(char)('0'+o%10
);
829 // toUnicode test worker functions ----------------------------------------- ***
832 stepToUnicode(ConversionCase
&cc
, UConverter
*cnv
,
833 UChar
*result
, int32_t resultCapacity
,
834 int32_t *resultOffsets
, /* also resultCapacity */
836 UErrorCode
*pErrorCode
) {
837 const char *source
, *sourceLimit
, *bytesLimit
;
838 UChar
*target
, *targetLimit
, *resultLimit
;
841 source
=(const char *)cc
.bytes
;
843 bytesLimit
=source
+cc
.bytesLength
;
844 resultLimit
=result
+resultCapacity
;
847 // call ucnv_toUnicode() with in/out buffers no larger than (step) at a time
848 // move only one buffer (in vs. out) at a time to be extra mean
849 // step==0 performs bulk conversion and generates offsets
851 // initialize the partial limits for the loop
853 // use the entire buffers
854 sourceLimit
=bytesLimit
;
855 targetLimit
=resultLimit
;
858 // start with empty partial buffers
863 // output offsets only for bulk conversion
868 // resetting the opposite conversion direction must not affect this one
869 ucnv_resetFromUnicode(cnv
);
873 &target
, targetLimit
,
874 &source
, sourceLimit
,
878 // check pointers and errors
879 if(source
>sourceLimit
|| target
>targetLimit
) {
880 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
882 } else if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
883 if(target
!=targetLimit
) {
884 // buffer overflow must only be set when the target is filled
885 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
887 } else if(targetLimit
==resultLimit
) {
888 // not just a partial overflow
892 // the partial target is filled, set a new limit, reset the error and continue
893 targetLimit
=(resultLimit
-target
)>=step
? target
+step
: resultLimit
;
894 *pErrorCode
=U_ZERO_ERROR
;
895 } else if(U_FAILURE(*pErrorCode
)) {
896 // some other error occurred, done
899 if(source
!=sourceLimit
) {
900 // when no error occurs, then the input must be consumed
901 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
905 if(sourceLimit
==bytesLimit
) {
910 // the partial conversion succeeded, set a new limit and continue
911 sourceLimit
=(bytesLimit
-source
)>=step
? source
+step
: bytesLimit
;
912 flush
=(UBool
)(cc
.finalFlush
&& sourceLimit
==bytesLimit
);
915 } else /* step<0 */ {
917 * step==-1: call only ucnv_getNextUChar()
918 * otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar()
919 * if step==-2 or -3, then give ucnv_toUnicode() the whole remaining input,
920 * else give it at most (-step-2)/2 bytes
924 // end the loop by getting an index out of bounds error
926 // resetting the opposite conversion direction must not affect this one
927 ucnv_resetFromUnicode(cnv
);
930 if((step
&1)!=0 /* odd: -1, -3, -5, ... */) {
931 sourceLimit
=source
; // use sourceLimit not as a real limit
932 // but to remember the pre-getNextUChar source pointer
933 c
=ucnv_getNextUChar(cnv
, &source
, bytesLimit
, pErrorCode
);
935 // check pointers and errors
936 if(*pErrorCode
==U_INDEX_OUTOFBOUNDS_ERROR
) {
937 if(source
!=bytesLimit
) {
938 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
940 *pErrorCode
=U_ZERO_ERROR
;
943 } else if(U_FAILURE(*pErrorCode
)) {
946 // source may not move if c is from previous overflow
948 if(target
==resultLimit
) {
949 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
955 *target
++=U16_LEAD(c
);
956 if(target
==resultLimit
) {
957 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
960 *target
++=U16_TRAIL(c
);
963 // alternate between -n-1 and -n but leave -1 alone
967 } else /* step is even */ {
968 // allow only one UChar output
969 targetLimit
=target
<resultLimit
? target
+1 : resultLimit
;
971 // as with ucnv_getNextUChar(), we always flush (if we go to bytesLimit)
972 // and never output offsets
974 sourceLimit
=bytesLimit
;
976 sourceLimit
=source
+(-step
-2)/2;
977 if(sourceLimit
>bytesLimit
) {
978 sourceLimit
=bytesLimit
;
983 &target
, targetLimit
,
984 &source
, sourceLimit
,
985 NULL
, (UBool
)(sourceLimit
==bytesLimit
), pErrorCode
);
987 // check pointers and errors
988 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
989 if(target
!=targetLimit
) {
990 // buffer overflow must only be set when the target is filled
991 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
993 } else if(targetLimit
==resultLimit
) {
994 // not just a partial overflow
998 // the partial target is filled, set a new limit and continue
999 *pErrorCode
=U_ZERO_ERROR
;
1000 } else if(U_FAILURE(*pErrorCode
)) {
1001 // some other error occurred, done
1004 if(source
!=sourceLimit
) {
1005 // when no error occurs, then the input must be consumed
1006 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
1010 // we are done (flush==TRUE) but we continue, to get the index out of bounds error above
1018 return (int32_t)(target
-result
);
1022 ConversionTest::ToUnicodeCase(ConversionCase
&cc
, UConverterToUCallback callback
, const char *option
) {
1023 // open the converter
1024 IcuTestErrorCode
errorCode(*this, "ToUnicodeCase");
1025 LocalUConverterPointer
cnv(cnv_open(cc
.charset
, errorCode
));
1026 // with no data, the above crashes with "pointer being freed was not allocated" for charset "x11-compound-text", see #13078
1027 if(errorCode
.isFailure()) {
1028 errcheckln(errorCode
, "toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
1029 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, errorCode
.errorName());
1035 if(callback
!=NULL
) {
1036 ucnv_setToUCallBack(cnv
.getAlias(), callback
, option
, NULL
, NULL
, errorCode
);
1037 if(U_FAILURE(errorCode
)) {
1038 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setToUCallBack() failed - %s",
1039 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, u_errorName(errorCode
));
1044 int32_t resultOffsets
[256];
1046 int32_t resultLength
;
1049 static const struct {
1053 { 0, "bulk" }, // must be first for offsets to be checked
1058 { -2, "toU(bulk)+getNext" },
1059 { -3, "getNext+toU(bulk)" },
1060 { -4, "toU(1)+getNext" },
1061 { -5, "getNext+toU(1)" },
1062 { -12, "toU(5)+getNext" },
1063 { -13, "getNext+toU(5)" },
1068 for(i
=0; i
<UPRV_LENGTHOF(steps
) && ok
; ++i
) {
1070 if(step
<0 && !cc
.finalFlush
) {
1071 // skip ucnv_getNextUChar() if !finalFlush because
1072 // ucnv_getNextUChar() always implies flush
1076 // bulk test is first, then offsets are not checked any more
1080 memset(resultOffsets
, -1, UPRV_LENGTHOF(resultOffsets
));
1082 memset(result
, -1, UPRV_LENGTHOF(result
));
1084 resultLength
=stepToUnicode(cc
, cnv
.getAlias(),
1085 result
, UPRV_LENGTHOF(result
),
1086 step
==0 ? resultOffsets
: NULL
,
1089 cc
, cnv
.getAlias(), steps
[i
].name
,
1090 result
, resultLength
,
1091 cc
.offsets
!=NULL
? resultOffsets
: NULL
,
1093 if(errorCode
.isFailure() || !cc
.finalFlush
) {
1094 // reset if an error occurred or we did not flush
1095 // otherwise do nothing to make sure that flushing resets
1096 ucnv_resetToUnicode(cnv
.getAlias());
1098 if (cc
.offsets
!= NULL
&& resultOffsets
[resultLength
] != -1) {
1099 errln("toUnicode[%d](%s) Conversion wrote too much to offsets at index %d",
1100 cc
.caseNr
, cc
.charset
, resultLength
);
1102 if (result
[resultLength
] != (UChar
)-1) {
1103 errln("toUnicode[%d](%s) Conversion wrote too much to result at index %d",
1104 cc
.caseNr
, cc
.charset
, resultLength
);
1108 // not a real loop, just a convenience for breaking out of the block
1109 while(ok
&& cc
.finalFlush
) {
1110 // test ucnv_toUChars()
1111 memset(result
, 0, sizeof(result
));
1114 resultLength
=ucnv_toUChars(cnv
.getAlias(),
1115 result
, UPRV_LENGTHOF(result
),
1116 (const char *)cc
.bytes
, cc
.bytesLength
,
1119 cc
, cnv
.getAlias(), "toUChars",
1120 result
, resultLength
,
1127 // test preflighting
1128 // keep the correct result for simple checking
1130 resultLength
=ucnv_toUChars(cnv
.getAlias(),
1132 (const char *)cc
.bytes
, cc
.bytesLength
,
1134 if(errorCode
.get()==U_STRING_NOT_TERMINATED_WARNING
|| errorCode
.get()==U_BUFFER_OVERFLOW_ERROR
) {
1138 cc
, cnv
.getAlias(), "preflight toUChars",
1139 result
, resultLength
,
1145 errorCode
.reset(); // all errors have already been reported
1150 ConversionTest::checkToUnicode(ConversionCase
&cc
, UConverter
*cnv
, const char *name
,
1151 const UChar
*result
, int32_t resultLength
,
1152 const int32_t *resultOffsets
,
1153 UErrorCode resultErrorCode
) {
1154 char resultInvalidChars
[8];
1155 int8_t resultInvalidLength
;
1156 UErrorCode errorCode
;
1160 // reset the message; NULL will mean "ok"
1163 errorCode
=U_ZERO_ERROR
;
1164 resultInvalidLength
=sizeof(resultInvalidChars
);
1165 ucnv_getInvalidChars(cnv
, resultInvalidChars
, &resultInvalidLength
, &errorCode
);
1166 if(U_FAILURE(errorCode
)) {
1167 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidChars() failed - %s",
1168 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, u_errorName(errorCode
));
1172 // check everything that might have gone wrong
1173 if(cc
.unicodeLength
!=resultLength
) {
1174 msg
="wrong result length";
1175 } else if(0!=u_memcmp(cc
.unicode
, result
, cc
.unicodeLength
)) {
1176 msg
="wrong result string";
1177 } else if(cc
.offsets
!=NULL
&& 0!=memcmp(cc
.offsets
, resultOffsets
, cc
.unicodeLength
*sizeof(*cc
.offsets
))) {
1178 msg
="wrong offsets";
1179 } else if(cc
.outErrorCode
!=resultErrorCode
) {
1180 msg
="wrong error code";
1181 } else if(cc
.invalidLength
!=resultInvalidLength
) {
1182 msg
="wrong length of last invalid input";
1183 } else if(0!=memcmp(cc
.invalidChars
, resultInvalidChars
, cc
.invalidLength
)) {
1184 msg
="wrong last invalid input";
1190 char buffer
[2000]; // one buffer for all strings
1191 char *s
, *bytesString
, *unicodeString
, *resultString
,
1192 *offsetsString
, *resultOffsetsString
,
1193 *invalidCharsString
, *resultInvalidCharsString
;
1195 bytesString
=s
=buffer
;
1196 s
=printBytes(cc
.bytes
, cc
.bytesLength
, bytesString
);
1197 s
=printUnicode(cc
.unicode
, cc
.unicodeLength
, unicodeString
=s
);
1198 s
=printUnicode(result
, resultLength
, resultString
=s
);
1199 s
=printOffsets(cc
.offsets
, cc
.unicodeLength
, offsetsString
=s
);
1200 s
=printOffsets(resultOffsets
, resultLength
, resultOffsetsString
=s
);
1201 s
=printBytes(cc
.invalidChars
, cc
.invalidLength
, invalidCharsString
=s
);
1202 s
=printBytes((uint8_t *)resultInvalidChars
, resultInvalidLength
, resultInvalidCharsString
=s
);
1204 if((s
-buffer
)>(int32_t)sizeof(buffer
)) {
1205 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkToUnicode() test output buffer overflow writing %d chars\n",
1206 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, (int)(s
-buffer
));
1210 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"
1212 " expected <%s>[%d]\n"
1213 " result <%s>[%d]\n"
1215 " result offsets <%s>\n"
1216 " error code expected %s got %s\n"
1217 " invalidChars expected <%s> got <%s>\n",
1218 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, msg
,
1219 bytesString
, cc
.bytesLength
,
1220 unicodeString
, cc
.unicodeLength
,
1221 resultString
, resultLength
,
1223 resultOffsetsString
,
1224 u_errorName(cc
.outErrorCode
), u_errorName(resultErrorCode
),
1225 invalidCharsString
, resultInvalidCharsString
);
1231 // fromUnicode test worker functions --------------------------------------- ***
1234 stepFromUTF8(ConversionCase
&cc
,
1235 UConverter
*utf8Cnv
, UConverter
*cnv
,
1236 char *result
, int32_t resultCapacity
,
1238 UErrorCode
*pErrorCode
) {
1239 const char *source
, *sourceLimit
, *utf8Limit
;
1240 UChar pivotBuffer
[32];
1241 UChar
*pivotSource
, *pivotTarget
, *pivotLimit
;
1242 char *target
, *targetLimit
, *resultLimit
;
1246 pivotSource
=pivotTarget
=pivotBuffer
;
1248 utf8Limit
=source
+cc
.utf8Length
;
1249 resultLimit
=result
+resultCapacity
;
1251 // call ucnv_convertEx() with in/out buffers no larger than (step) at a time
1252 // move only one buffer (in vs. out) at a time to be extra mean
1253 // step==0 performs bulk conversion
1255 // initialize the partial limits for the loop
1257 // use the entire buffers
1258 sourceLimit
=utf8Limit
;
1259 targetLimit
=resultLimit
;
1260 flush
=cc
.finalFlush
;
1262 pivotLimit
=pivotBuffer
+UPRV_LENGTHOF(pivotBuffer
);
1264 // start with empty partial buffers
1269 // empty pivot is not allowed, make it of length step
1270 pivotLimit
=pivotBuffer
+step
;
1274 // resetting the opposite conversion direction must not affect this one
1275 ucnv_resetFromUnicode(utf8Cnv
);
1276 ucnv_resetToUnicode(cnv
);
1279 ucnv_convertEx(cnv
, utf8Cnv
,
1280 &target
, targetLimit
,
1281 &source
, sourceLimit
,
1282 pivotBuffer
, &pivotSource
, &pivotTarget
, pivotLimit
,
1283 FALSE
, flush
, pErrorCode
);
1285 // check pointers and errors
1286 if(source
>sourceLimit
|| target
>targetLimit
) {
1287 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
1289 } else if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
1290 if(target
!=targetLimit
) {
1291 // buffer overflow must only be set when the target is filled
1292 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
1294 } else if(targetLimit
==resultLimit
) {
1295 // not just a partial overflow
1299 // the partial target is filled, set a new limit, reset the error and continue
1300 targetLimit
=(resultLimit
-target
)>=step
? target
+step
: resultLimit
;
1301 *pErrorCode
=U_ZERO_ERROR
;
1302 } else if(U_FAILURE(*pErrorCode
)) {
1303 if(pivotSource
==pivotBuffer
) {
1304 // toUnicode error, should not occur
1305 // toUnicode errors are tested in cintltst TestConvertExFromUTF8()
1308 // fromUnicode error
1309 // some other error occurred, done
1313 if(source
!=sourceLimit
) {
1314 // when no error occurs, then the input must be consumed
1315 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
1319 if(sourceLimit
==utf8Limit
) {
1321 if(*pErrorCode
==U_STRING_NOT_TERMINATED_WARNING
) {
1322 // ucnv_convertEx() warns about not terminating the output
1323 // but ucnv_fromUnicode() does not and so
1324 // checkFromUnicode() does not expect it
1325 *pErrorCode
=U_ZERO_ERROR
;
1330 // the partial conversion succeeded, set a new limit and continue
1331 sourceLimit
=(utf8Limit
-source
)>=step
? source
+step
: utf8Limit
;
1332 flush
=(UBool
)(cc
.finalFlush
&& sourceLimit
==utf8Limit
);
1336 return (int32_t)(target
-result
);
1340 stepFromUnicode(ConversionCase
&cc
, UConverter
*cnv
,
1341 char *result
, int32_t resultCapacity
,
1342 int32_t *resultOffsets
, /* also resultCapacity */
1344 UErrorCode
*pErrorCode
) {
1345 const UChar
*source
, *sourceLimit
, *unicodeLimit
;
1346 char *target
, *targetLimit
, *resultLimit
;
1351 unicodeLimit
=source
+cc
.unicodeLength
;
1352 resultLimit
=result
+resultCapacity
;
1354 // call ucnv_fromUnicode() with in/out buffers no larger than (step) at a time
1355 // move only one buffer (in vs. out) at a time to be extra mean
1356 // step==0 performs bulk conversion and generates offsets
1358 // initialize the partial limits for the loop
1360 // use the entire buffers
1361 sourceLimit
=unicodeLimit
;
1362 targetLimit
=resultLimit
;
1363 flush
=cc
.finalFlush
;
1365 // start with empty partial buffers
1370 // output offsets only for bulk conversion
1375 // resetting the opposite conversion direction must not affect this one
1376 ucnv_resetToUnicode(cnv
);
1379 ucnv_fromUnicode(cnv
,
1380 &target
, targetLimit
,
1381 &source
, sourceLimit
,
1385 // check pointers and errors
1386 if(source
>sourceLimit
|| target
>targetLimit
) {
1387 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
1389 } else if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
1390 if(target
!=targetLimit
) {
1391 // buffer overflow must only be set when the target is filled
1392 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
1394 } else if(targetLimit
==resultLimit
) {
1395 // not just a partial overflow
1399 // the partial target is filled, set a new limit, reset the error and continue
1400 targetLimit
=(resultLimit
-target
)>=step
? target
+step
: resultLimit
;
1401 *pErrorCode
=U_ZERO_ERROR
;
1402 } else if(U_FAILURE(*pErrorCode
)) {
1403 // some other error occurred, done
1406 if(source
!=sourceLimit
) {
1407 // when no error occurs, then the input must be consumed
1408 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
1412 if(sourceLimit
==unicodeLimit
) {
1417 // the partial conversion succeeded, set a new limit and continue
1418 sourceLimit
=(unicodeLimit
-source
)>=step
? source
+step
: unicodeLimit
;
1419 flush
=(UBool
)(cc
.finalFlush
&& sourceLimit
==unicodeLimit
);
1423 return (int32_t)(target
-result
);
1427 ConversionTest::FromUnicodeCase(ConversionCase
&cc
, UConverterFromUCallback callback
, const char *option
) {
1429 UErrorCode errorCode
;
1431 // open the converter
1432 errorCode
=U_ZERO_ERROR
;
1433 cnv
=cnv_open(cc
.charset
, errorCode
);
1434 if(U_FAILURE(errorCode
)) {
1435 errcheckln(errorCode
, "fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
1436 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, u_errorName(errorCode
));
1439 ucnv_resetToUnicode(utf8Cnv
);
1442 if(callback
!=NULL
) {
1443 ucnv_setFromUCallBack(cnv
, callback
, option
, NULL
, NULL
, &errorCode
);
1444 if(U_FAILURE(errorCode
)) {
1445 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setFromUCallBack() failed - %s",
1446 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, u_errorName(errorCode
));
1452 // set the fallbacks flag
1453 // TODO change with Jitterbug 2401, then add a similar call for toUnicode too
1454 ucnv_setFallback(cnv
, cc
.fallbacks
);
1460 length
=(int32_t)strlen(cc
.subchar
);
1461 ucnv_setSubstChars(cnv
, cc
.subchar
, (int8_t)length
, &errorCode
);
1462 if(U_FAILURE(errorCode
)) {
1463 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstChars() failed - %s",
1464 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, u_errorName(errorCode
));
1468 } else if(cc
.setSub
<0) {
1469 ucnv_setSubstString(cnv
, cc
.subString
, -1, &errorCode
);
1470 if(U_FAILURE(errorCode
)) {
1471 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstString() failed - %s",
1472 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, u_errorName(errorCode
));
1478 // convert unicode to utf8
1481 u_strToUTF8(utf8
, UPRV_LENGTHOF(utf8
), &cc
.utf8Length
,
1482 cc
.unicode
, cc
.unicodeLength
,
1484 if(U_FAILURE(errorCode
)) {
1485 // skip UTF-8 testing of a string with an unpaired surrogate,
1486 // or of one that's too long
1487 // toUnicode errors are tested in cintltst TestConvertExFromUTF8()
1491 int32_t resultOffsets
[256];
1493 int32_t resultLength
;
1496 static const struct {
1498 const char *name
, *utf8Name
;
1500 { 0, "bulk", "utf8" }, // must be first for offsets to be checked
1501 { 1, "step=1", "utf8 step=1" },
1502 { 3, "step=3", "utf8 step=3" },
1503 { 7, "step=7", "utf8 step=7" }
1508 for(i
=0; i
<UPRV_LENGTHOF(steps
) && ok
; ++i
) {
1510 memset(resultOffsets
, -1, UPRV_LENGTHOF(resultOffsets
));
1511 memset(result
, -1, UPRV_LENGTHOF(result
));
1512 errorCode
=U_ZERO_ERROR
;
1513 resultLength
=stepFromUnicode(cc
, cnv
,
1514 result
, UPRV_LENGTHOF(result
),
1515 step
==0 ? resultOffsets
: NULL
,
1517 ok
=checkFromUnicode(
1518 cc
, cnv
, steps
[i
].name
,
1519 (uint8_t *)result
, resultLength
,
1520 cc
.offsets
!=NULL
? resultOffsets
: NULL
,
1522 if(U_FAILURE(errorCode
) || !cc
.finalFlush
) {
1523 // reset if an error occurred or we did not flush
1524 // otherwise do nothing to make sure that flushing resets
1525 ucnv_resetFromUnicode(cnv
);
1527 if (resultOffsets
[resultLength
] != -1) {
1528 errln("fromUnicode[%d](%s) Conversion wrote too much to offsets at index %d",
1529 cc
.caseNr
, cc
.charset
, resultLength
);
1531 if (result
[resultLength
] != (char)-1) {
1532 errln("fromUnicode[%d](%s) Conversion wrote too much to result at index %d",
1533 cc
.caseNr
, cc
.charset
, resultLength
);
1536 // bulk test is first, then offsets are not checked any more
1539 // test direct conversion from UTF-8
1540 if(cc
.utf8Length
>=0) {
1541 errorCode
=U_ZERO_ERROR
;
1542 resultLength
=stepFromUTF8(cc
, utf8Cnv
, cnv
,
1543 result
, UPRV_LENGTHOF(result
),
1545 ok
=checkFromUnicode(
1546 cc
, cnv
, steps
[i
].utf8Name
,
1547 (uint8_t *)result
, resultLength
,
1550 if(U_FAILURE(errorCode
) || !cc
.finalFlush
) {
1551 // reset if an error occurred or we did not flush
1552 // otherwise do nothing to make sure that flushing resets
1553 ucnv_resetToUnicode(utf8Cnv
);
1554 ucnv_resetFromUnicode(cnv
);
1559 // not a real loop, just a convenience for breaking out of the block
1560 while(ok
&& cc
.finalFlush
) {
1561 // test ucnv_fromUChars()
1562 memset(result
, 0, sizeof(result
));
1564 errorCode
=U_ZERO_ERROR
;
1565 resultLength
=ucnv_fromUChars(cnv
,
1566 result
, UPRV_LENGTHOF(result
),
1567 cc
.unicode
, cc
.unicodeLength
,
1569 ok
=checkFromUnicode(
1570 cc
, cnv
, "fromUChars",
1571 (uint8_t *)result
, resultLength
,
1578 // test preflighting
1579 // keep the correct result for simple checking
1580 errorCode
=U_ZERO_ERROR
;
1581 resultLength
=ucnv_fromUChars(cnv
,
1583 cc
.unicode
, cc
.unicodeLength
,
1585 if(errorCode
==U_STRING_NOT_TERMINATED_WARNING
|| errorCode
==U_BUFFER_OVERFLOW_ERROR
) {
1586 errorCode
=U_ZERO_ERROR
;
1588 ok
=checkFromUnicode(
1589 cc
, cnv
, "preflight fromUChars",
1590 (uint8_t *)result
, resultLength
,
1601 ConversionTest::checkFromUnicode(ConversionCase
&cc
, UConverter
*cnv
, const char *name
,
1602 const uint8_t *result
, int32_t resultLength
,
1603 const int32_t *resultOffsets
,
1604 UErrorCode resultErrorCode
) {
1605 UChar resultInvalidUChars
[8];
1606 int8_t resultInvalidLength
;
1607 UErrorCode errorCode
;
1611 // reset the message; NULL will mean "ok"
1614 errorCode
=U_ZERO_ERROR
;
1615 resultInvalidLength
=UPRV_LENGTHOF(resultInvalidUChars
);
1616 ucnv_getInvalidUChars(cnv
, resultInvalidUChars
, &resultInvalidLength
, &errorCode
);
1617 if(U_FAILURE(errorCode
)) {
1618 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUChars() failed - %s",
1619 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, u_errorName(errorCode
));
1623 // check everything that might have gone wrong
1624 if(cc
.bytesLength
!=resultLength
) {
1625 msg
="wrong result length";
1626 } else if(0!=memcmp(cc
.bytes
, result
, cc
.bytesLength
)) {
1627 msg
="wrong result string";
1628 } else if(cc
.offsets
!=NULL
&& 0!=memcmp(cc
.offsets
, resultOffsets
, cc
.bytesLength
*sizeof(*cc
.offsets
))) {
1629 msg
="wrong offsets";
1630 } else if(cc
.outErrorCode
!=resultErrorCode
) {
1631 msg
="wrong error code";
1632 } else if(cc
.invalidLength
!=resultInvalidLength
) {
1633 msg
="wrong length of last invalid input";
1634 } else if(0!=u_memcmp(cc
.invalidUChars
, resultInvalidUChars
, cc
.invalidLength
)) {
1635 msg
="wrong last invalid input";
1641 char buffer
[2000]; // one buffer for all strings
1642 char *s
, *unicodeString
, *bytesString
, *resultString
,
1643 *offsetsString
, *resultOffsetsString
,
1644 *invalidCharsString
, *resultInvalidUCharsString
;
1646 unicodeString
=s
=buffer
;
1647 s
=printUnicode(cc
.unicode
, cc
.unicodeLength
, unicodeString
);
1648 s
=printBytes(cc
.bytes
, cc
.bytesLength
, bytesString
=s
);
1649 s
=printBytes(result
, resultLength
, resultString
=s
);
1650 s
=printOffsets(cc
.offsets
, cc
.bytesLength
, offsetsString
=s
);
1651 s
=printOffsets(resultOffsets
, resultLength
, resultOffsetsString
=s
);
1652 s
=printUnicode(cc
.invalidUChars
, cc
.invalidLength
, invalidCharsString
=s
);
1653 s
=printUnicode(resultInvalidUChars
, resultInvalidLength
, resultInvalidUCharsString
=s
);
1655 if((s
-buffer
)>(int32_t)sizeof(buffer
)) {
1656 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkFromUnicode() test output buffer overflow writing %d chars\n",
1657 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, (int)(s
-buffer
));
1661 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"
1662 " unicode <%s>[%d]\n"
1663 " expected <%s>[%d]\n"
1664 " result <%s>[%d]\n"
1666 " result offsets <%s>\n"
1667 " error code expected %s got %s\n"
1668 " invalidChars expected <%s> got <%s>\n",
1669 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, msg
,
1670 unicodeString
, cc
.unicodeLength
,
1671 bytesString
, cc
.bytesLength
,
1672 resultString
, resultLength
,
1674 resultOffsetsString
,
1675 u_errorName(cc
.outErrorCode
), u_errorName(resultErrorCode
),
1676 invalidCharsString
, resultInvalidUCharsString
);
1682 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */