2 *******************************************************************************
4 * Copyright (C) 2003-2006, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: convtest.cpp
10 * tab size: 8 (not used)
13 * created on: 2003jul15
14 * created by: Markus W. Scherer
16 * Test file for data-driven conversion tests.
19 #include "unicode/utypes.h"
21 #if !UCONFIG_NO_LEGACY_CONVERSION
23 * Note: Turning off all of convtest.cpp if !UCONFIG_NO_LEGACY_CONVERSION
24 * is slightly unnecessary - it removes tests for Unicode charsets
25 * like UTF-8 that should work.
26 * However, there is no easy way for the test to detect whether a test case
27 * is for a Unicode charset, so it would be difficult to only exclude those.
28 * Also, regular testing of ICU is done with all modules on, therefore
29 * not testing conversion for a custom configuration like this should be ok.
32 #include "unicode/ucnv.h"
33 #include "unicode/unistr.h"
34 #include "unicode/parsepos.h"
35 #include "unicode/uniset.h"
36 #include "unicode/ustring.h"
37 #include "unicode/ures.h"
39 #include "unicode/tstdtmod.h"
43 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
46 // characters used in test data for callbacks
53 ConversionTest::~ConversionTest() {}
56 ConversionTest::runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char * /*par*/) {
57 if (exec
) logln("TestSuite ConversionTest: ");
59 case 0: name
="TestToUnicode"; if (exec
) TestToUnicode(); break;
60 case 1: name
="TestFromUnicode"; if (exec
) TestFromUnicode(); break;
61 case 2: name
="TestGetUnicodeSet"; if (exec
) TestGetUnicodeSet(); break;
62 default: name
=""; break; //needed to end loop
66 // test data interface ----------------------------------------------------- ***
69 ConversionTest::TestToUnicode() {
71 char charset
[100], cbopt
[4];
73 UnicodeString s
, unicode
;
74 int32_t offsetsLength
;
75 UConverterToUCallback callback
;
77 TestDataModule
*dataModule
;
79 const DataMap
*testCase
;
83 errorCode
=U_ZERO_ERROR
;
84 dataModule
=TestDataModule::getTestDataModule("conversion", *this, errorCode
);
85 if(U_SUCCESS(errorCode
)) {
86 testData
=dataModule
->createTestData("toUnicode", errorCode
);
87 if(U_SUCCESS(errorCode
)) {
88 for(i
=0; testData
->nextCase(testCase
, errorCode
); ++i
) {
89 if(U_FAILURE(errorCode
)) {
90 errln("error retrieving conversion/toUnicode test case %d - %s",
91 i
, u_errorName(errorCode
));
92 errorCode
=U_ZERO_ERROR
;
98 s
=testCase
->getString("charset", errorCode
);
99 s
.extract(0, 0x7fffffff, charset
, sizeof(charset
), "");
102 cc
.bytes
=testCase
->getBinary(cc
.bytesLength
, "bytes", errorCode
);
103 unicode
=testCase
->getString("unicode", errorCode
);
104 cc
.unicode
=unicode
.getBuffer();
105 cc
.unicodeLength
=unicode
.length();
108 cc
.offsets
=testCase
->getIntVector(offsetsLength
, "offsets", errorCode
);
109 if(offsetsLength
==0) {
111 } else if(offsetsLength
!=unicode
.length()) {
112 errln("toUnicode[%d] unicode[%d] and offsets[%d] must have the same length",
113 i
, unicode
.length(), offsetsLength
);
114 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
117 cc
.finalFlush
= 0!=testCase
->getInt28("flush", errorCode
);
118 cc
.fallbacks
= 0!=testCase
->getInt28("fallbacks", errorCode
);
120 s
=testCase
->getString("errorCode", errorCode
);
121 if(s
==UNICODE_STRING("invalid", 7)) {
122 cc
.outErrorCode
=U_INVALID_CHAR_FOUND
;
123 } else if(s
==UNICODE_STRING("illegal", 7)) {
124 cc
.outErrorCode
=U_ILLEGAL_CHAR_FOUND
;
125 } else if(s
==UNICODE_STRING("truncated", 9)) {
126 cc
.outErrorCode
=U_TRUNCATED_CHAR_FOUND
;
127 } else if(s
==UNICODE_STRING("illesc", 6)) {
128 cc
.outErrorCode
=U_ILLEGAL_ESCAPE_SEQUENCE
;
129 } else if(s
==UNICODE_STRING("unsuppesc", 9)) {
130 cc
.outErrorCode
=U_UNSUPPORTED_ESCAPE_SEQUENCE
;
132 cc
.outErrorCode
=U_ZERO_ERROR
;
135 s
=testCase
->getString("callback", errorCode
);
136 s
.extract(0, 0x7fffffff, cbopt
, sizeof(cbopt
), "");
140 callback
=UCNV_TO_U_CALLBACK_SUBSTITUTE
;
143 callback
=UCNV_TO_U_CALLBACK_SKIP
;
146 callback
=UCNV_TO_U_CALLBACK_STOP
;
149 callback
=UCNV_TO_U_CALLBACK_ESCAPE
;
155 option
=callback
==NULL
? cbopt
: cbopt
+1;
160 cc
.invalidChars
=testCase
->getBinary(cc
.invalidLength
, "invalidChars", errorCode
);
162 if(U_FAILURE(errorCode
)) {
163 errln("error parsing conversion/toUnicode test case %d - %s",
164 i
, u_errorName(errorCode
));
165 errorCode
=U_ZERO_ERROR
;
167 logln("TestToUnicode[%d] %s", i
, charset
);
168 ToUnicodeCase(cc
, callback
, option
);
176 errln("Failed: could not load test conversion data");
181 ConversionTest::TestFromUnicode() {
183 char charset
[100], cbopt
[4];
185 UnicodeString s
, unicode
, invalidUChars
;
186 int32_t offsetsLength
, index
;
187 UConverterFromUCallback callback
;
189 TestDataModule
*dataModule
;
191 const DataMap
*testCase
;
193 UErrorCode errorCode
;
196 errorCode
=U_ZERO_ERROR
;
197 dataModule
=TestDataModule::getTestDataModule("conversion", *this, errorCode
);
198 if(U_SUCCESS(errorCode
)) {
199 testData
=dataModule
->createTestData("fromUnicode", errorCode
);
200 if(U_SUCCESS(errorCode
)) {
201 for(i
=0; testData
->nextCase(testCase
, errorCode
); ++i
) {
202 if(U_FAILURE(errorCode
)) {
203 errln("error retrieving conversion/fromUnicode test case %d - %s",
204 i
, u_errorName(errorCode
));
205 errorCode
=U_ZERO_ERROR
;
211 s
=testCase
->getString("charset", errorCode
);
212 s
.extract(0, 0x7fffffff, charset
, sizeof(charset
), "");
215 unicode
=testCase
->getString("unicode", errorCode
);
216 cc
.unicode
=unicode
.getBuffer();
217 cc
.unicodeLength
=unicode
.length();
218 cc
.bytes
=testCase
->getBinary(cc
.bytesLength
, "bytes", errorCode
);
221 cc
.offsets
=testCase
->getIntVector(offsetsLength
, "offsets", errorCode
);
222 if(offsetsLength
==0) {
224 } else if(offsetsLength
!=cc
.bytesLength
) {
225 errln("fromUnicode[%d] bytes[%d] and offsets[%d] must have the same length",
226 i
, cc
.bytesLength
, offsetsLength
);
227 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
230 cc
.finalFlush
= 0!=testCase
->getInt28("flush", errorCode
);
231 cc
.fallbacks
= 0!=testCase
->getInt28("fallbacks", errorCode
);
233 s
=testCase
->getString("errorCode", errorCode
);
234 if(s
==UNICODE_STRING("invalid", 7)) {
235 cc
.outErrorCode
=U_INVALID_CHAR_FOUND
;
236 } else if(s
==UNICODE_STRING("illegal", 7)) {
237 cc
.outErrorCode
=U_ILLEGAL_CHAR_FOUND
;
238 } else if(s
==UNICODE_STRING("truncated", 9)) {
239 cc
.outErrorCode
=U_TRUNCATED_CHAR_FOUND
;
241 cc
.outErrorCode
=U_ZERO_ERROR
;
244 s
=testCase
->getString("callback", errorCode
);
245 cc
.setSub
=0; // default: no subchar
247 if((index
=s
.indexOf((UChar
)0))>0) {
248 // read NUL-separated subchar first, if any
249 // copy the subchar from Latin-1 characters
250 // start after the NUL
251 p
=s
.getTerminatedBuffer();
254 length
=s
.length()-length
;
255 if(length
<=0 || length
>=(int32_t)sizeof(cc
.subchar
)) {
256 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
260 for(j
=0; j
<length
; ++j
) {
261 cc
.subchar
[j
]=(char)p
[j
];
263 // NUL-terminate the subchar
268 // remove the NUL and subchar from s
270 } else if((index
=s
.indexOf((UChar
)0x3d))>0) /* '=' */ {
271 // read a substitution string, separated by an equal sign
272 p
=s
.getBuffer()+index
+1;
273 length
=s
.length()-(index
+1);
274 if(length
<0 || length
>=LENGTHOF(cc
.subString
)) {
275 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
277 u_memcpy(cc
.subString
, p
, length
);
278 // NUL-terminate the subString
279 cc
.subString
[length
]=0;
283 // remove the equal sign and subString from s
287 s
.extract(0, 0x7fffffff, cbopt
, sizeof(cbopt
), "");
291 callback
=UCNV_FROM_U_CALLBACK_SUBSTITUTE
;
294 callback
=UCNV_FROM_U_CALLBACK_SKIP
;
297 callback
=UCNV_FROM_U_CALLBACK_STOP
;
300 callback
=UCNV_FROM_U_CALLBACK_ESCAPE
;
306 option
=callback
==NULL
? cbopt
: cbopt
+1;
311 invalidUChars
=testCase
->getString("invalidUChars", errorCode
);
312 cc
.invalidUChars
=invalidUChars
.getBuffer();
313 cc
.invalidLength
=invalidUChars
.length();
315 if(U_FAILURE(errorCode
)) {
316 errln("error parsing conversion/fromUnicode test case %d - %s",
317 i
, u_errorName(errorCode
));
318 errorCode
=U_ZERO_ERROR
;
320 logln("TestFromUnicode[%d] %s", i
, charset
);
321 FromUnicodeCase(cc
, callback
, option
);
329 errln("Failed: could not load test conversion data");
333 static const UChar ellipsis
[]={ 0x2e, 0x2e, 0x2e };
336 ConversionTest::TestGetUnicodeSet() {
338 UnicodeString s
, map
, mapnot
;
342 UnicodeSet cnvSet
, mapSet
, mapnotSet
, diffSet
;
343 UnicodeSet
*cnvSetPtr
= &cnvSet
;
346 TestDataModule
*dataModule
;
348 const DataMap
*testCase
;
349 UErrorCode errorCode
;
352 errorCode
=U_ZERO_ERROR
;
353 dataModule
=TestDataModule::getTestDataModule("conversion", *this, errorCode
);
354 if(U_SUCCESS(errorCode
)) {
355 testData
=dataModule
->createTestData("getUnicodeSet", errorCode
);
356 if(U_SUCCESS(errorCode
)) {
357 for(i
=0; testData
->nextCase(testCase
, errorCode
); ++i
) {
358 if(U_FAILURE(errorCode
)) {
359 errln("error retrieving conversion/getUnicodeSet test case %d - %s",
360 i
, u_errorName(errorCode
));
361 errorCode
=U_ZERO_ERROR
;
365 s
=testCase
->getString("charset", errorCode
);
366 s
.extract(0, 0x7fffffff, charset
, sizeof(charset
), "");
368 map
=testCase
->getString("map", errorCode
);
369 mapnot
=testCase
->getString("mapnot", errorCode
);
371 which
=testCase
->getInt28("which", errorCode
);
373 if(U_FAILURE(errorCode
)) {
374 errln("error parsing conversion/getUnicodeSet test case %d - %s",
375 i
, u_errorName(errorCode
));
376 errorCode
=U_ZERO_ERROR
;
380 // test this test case
385 mapSet
.applyPattern(map
, pos
, 0, NULL
, errorCode
);
386 if(U_FAILURE(errorCode
) || pos
.getIndex()!=map
.length()) {
387 errln("error creating the map set for conversion/getUnicodeSet test case %d - %s\n"
388 " error index %d index %d U+%04x",
389 i
, u_errorName(errorCode
), pos
.getErrorIndex(), pos
.getIndex(), map
.char32At(pos
.getIndex()));
390 errorCode
=U_ZERO_ERROR
;
395 mapnotSet
.applyPattern(mapnot
, pos
, 0, NULL
, errorCode
);
396 if(U_FAILURE(errorCode
) || pos
.getIndex()!=mapnot
.length()) {
397 errln("error creating the mapnot set for conversion/getUnicodeSet test case %d - %s\n"
398 " error index %d index %d U+%04x",
399 i
, u_errorName(errorCode
), pos
.getErrorIndex(), pos
.getIndex(), mapnot
.char32At(pos
.getIndex()));
400 errorCode
=U_ZERO_ERROR
;
404 logln("TestGetUnicodeSet[%d] %s", i
, charset
);
406 cnv
=cnv_open(charset
, errorCode
);
407 if(U_FAILURE(errorCode
)) {
408 errln("error opening \"%s\" for conversion/getUnicodeSet test case %d - %s",
409 charset
, i
, u_errorName(errorCode
));
410 errorCode
=U_ZERO_ERROR
;
414 ucnv_getUnicodeSet(cnv
, (USet
*)cnvSetPtr
, (UConverterUnicodeSet
)which
, &errorCode
);
417 if(U_FAILURE(errorCode
)) {
418 errln("error in ucnv_getUnicodeSet(\"%s\") for conversion/getUnicodeSet test case %d - %s",
419 charset
, i
, u_errorName(errorCode
));
420 errorCode
=U_ZERO_ERROR
;
424 // are there items that must be in cnvSet but are not?
425 (diffSet
=mapSet
).removeAll(cnvSet
);
426 if(!diffSet
.isEmpty()) {
427 diffSet
.toPattern(s
, TRUE
);
429 s
.replace(100, 0x7fffffff, ellipsis
, LENGTHOF(ellipsis
));
431 errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d",
436 // are there items that must not be in cnvSet but are?
437 (diffSet
=mapnotSet
).retainAll(cnvSet
);
438 if(!diffSet
.isEmpty()) {
439 diffSet
.toPattern(s
, TRUE
);
441 s
.replace(100, 0x7fffffff, ellipsis
, LENGTHOF(ellipsis
));
443 errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test case %d",
453 errln("Failed: could not load test conversion data");
457 // open testdata or ICU data converter ------------------------------------- ***
460 ConversionTest::cnv_open(const char *name
, UErrorCode
&errorCode
) {
461 if(name
!=NULL
&& *name
=='*') {
462 /* loadTestData(): set the data directory */
463 return ucnv_openPackage(loadTestData(errorCode
), name
+1, &errorCode
);
465 return ucnv_open(name
, &errorCode
);
469 // output helpers ---------------------------------------------------------- ***
472 hexDigit(uint8_t digit
) {
473 return digit
<=9 ? (char)('0'+digit
) : (char)('a'-10+digit
);
477 printBytes(const uint8_t *bytes
, int32_t length
, char *out
) {
483 *out
++=hexDigit((uint8_t)(b
>>4));
484 *out
++=hexDigit((uint8_t)(b
&0xf));
491 *out
++=hexDigit((uint8_t)(b
>>4));
492 *out
++=hexDigit((uint8_t)(b
&0xf));
499 printUnicode(const UChar
*unicode
, int32_t length
, char *out
) {
503 for(i
=0; i
<length
;) {
507 U16_NEXT(unicode
, i
, length
, c
);
513 *out
++=hexDigit((uint8_t)((c
>>16)&0xf));
515 *out
++=hexDigit((uint8_t)((c
>>12)&0xf));
516 *out
++=hexDigit((uint8_t)((c
>>8)&0xf));
517 *out
++=hexDigit((uint8_t)((c
>>4)&0xf));
518 *out
++=hexDigit((uint8_t)(c
&0xf));
525 printOffsets(const int32_t *offsets
, int32_t length
, char *out
) {
532 for(i
=0; i
<length
; ++i
) {
538 // print all offsets with 2 characters each (-x, -9..99, xx)
544 *out
++=(char)('0'-o
);
546 *out
++=(d
=o
/10)==0 ? ' ' : (char)('0'+d
);
547 *out
++=(char)('0'+o%10
);
557 // toUnicode test worker functions ----------------------------------------- ***
560 stepToUnicode(ConversionCase
&cc
, UConverter
*cnv
,
561 UChar
*result
, int32_t resultCapacity
,
562 int32_t *resultOffsets
, /* also resultCapacity */
564 UErrorCode
*pErrorCode
) {
565 const char *source
, *sourceLimit
, *bytesLimit
;
566 UChar
*target
, *targetLimit
, *resultLimit
;
569 source
=(const char *)cc
.bytes
;
571 bytesLimit
=source
+cc
.bytesLength
;
572 resultLimit
=result
+resultCapacity
;
575 // call ucnv_toUnicode() with in/out buffers no larger than (step) at a time
576 // move only one buffer (in vs. out) at a time to be extra mean
577 // step==0 performs bulk conversion and generates offsets
579 // initialize the partial limits for the loop
581 // use the entire buffers
582 sourceLimit
=bytesLimit
;
583 targetLimit
=resultLimit
;
586 // start with empty partial buffers
591 // output offsets only for bulk conversion
596 // resetting the opposite conversion direction must not affect this one
597 ucnv_resetFromUnicode(cnv
);
601 &target
, targetLimit
,
602 &source
, sourceLimit
,
606 // check pointers and errors
607 if(source
>sourceLimit
|| target
>targetLimit
) {
608 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
610 } else if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
611 if(target
!=targetLimit
) {
612 // buffer overflow must only be set when the target is filled
613 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
615 } else if(targetLimit
==resultLimit
) {
616 // not just a partial overflow
620 // the partial target is filled, set a new limit, reset the error and continue
621 targetLimit
=(resultLimit
-target
)>=step
? target
+step
: resultLimit
;
622 *pErrorCode
=U_ZERO_ERROR
;
623 } else if(U_FAILURE(*pErrorCode
)) {
624 // some other error occurred, done
627 if(source
!=sourceLimit
) {
628 // when no error occurs, then the input must be consumed
629 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
633 if(sourceLimit
==bytesLimit
) {
638 // the partial conversion succeeded, set a new limit and continue
639 sourceLimit
=(bytesLimit
-source
)>=step
? source
+step
: bytesLimit
;
640 flush
=(UBool
)(cc
.finalFlush
&& sourceLimit
==bytesLimit
);
643 } else /* step<0 */ {
645 * step==-1: call only ucnv_getNextUChar()
646 * otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar()
647 * if step==-2 or -3, then give ucnv_toUnicode() the whole remaining input,
648 * else give it at most (-step-2)/2 bytes
652 // end the loop by getting an index out of bounds error
654 // resetting the opposite conversion direction must not affect this one
655 ucnv_resetFromUnicode(cnv
);
658 if((step
&1)!=0 /* odd: -1, -3, -5, ... */) {
659 sourceLimit
=source
; // use sourceLimit not as a real limit
660 // but to remember the pre-getNextUChar source pointer
661 c
=ucnv_getNextUChar(cnv
, &source
, bytesLimit
, pErrorCode
);
663 // check pointers and errors
664 if(*pErrorCode
==U_INDEX_OUTOFBOUNDS_ERROR
) {
665 if(source
!=bytesLimit
) {
666 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
668 *pErrorCode
=U_ZERO_ERROR
;
671 } else if(U_FAILURE(*pErrorCode
)) {
674 // source may not move if c is from previous overflow
676 if(target
==resultLimit
) {
677 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
683 *target
++=U16_LEAD(c
);
684 if(target
==resultLimit
) {
685 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
688 *target
++=U16_TRAIL(c
);
691 // alternate between -n-1 and -n but leave -1 alone
695 } else /* step is even */ {
696 // allow only one UChar output
697 targetLimit
=target
<resultLimit
? target
+1 : resultLimit
;
699 // as with ucnv_getNextUChar(), we always flush (if we go to bytesLimit)
700 // and never output offsets
702 sourceLimit
=bytesLimit
;
704 sourceLimit
=source
+(-step
-2)/2;
705 if(sourceLimit
>bytesLimit
) {
706 sourceLimit
=bytesLimit
;
711 &target
, targetLimit
,
712 &source
, sourceLimit
,
713 NULL
, (UBool
)(sourceLimit
==bytesLimit
), pErrorCode
);
715 // check pointers and errors
716 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
717 if(target
!=targetLimit
) {
718 // buffer overflow must only be set when the target is filled
719 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
721 } else if(targetLimit
==resultLimit
) {
722 // not just a partial overflow
726 // the partial target is filled, set a new limit and continue
727 *pErrorCode
=U_ZERO_ERROR
;
728 } else if(U_FAILURE(*pErrorCode
)) {
729 // some other error occurred, done
732 if(source
!=sourceLimit
) {
733 // when no error occurs, then the input must be consumed
734 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
738 // we are done (flush==TRUE) but we continue, to get the index out of bounds error above
746 return (int32_t)(target
-result
);
750 ConversionTest::ToUnicodeCase(ConversionCase
&cc
, UConverterToUCallback callback
, const char *option
) {
752 UErrorCode errorCode
;
754 // open the converter
755 errorCode
=U_ZERO_ERROR
;
756 cnv
=cnv_open(cc
.charset
, errorCode
);
757 if(U_FAILURE(errorCode
)) {
758 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
759 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, u_errorName(errorCode
));
765 ucnv_setToUCallBack(cnv
, callback
, option
, NULL
, NULL
, &errorCode
);
766 if(U_FAILURE(errorCode
)) {
767 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setToUCallBack() failed - %s",
768 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, u_errorName(errorCode
));
774 int32_t resultOffsets
[200];
776 int32_t resultLength
;
779 static const struct {
783 { 0, "bulk" }, // must be first for offsets to be checked
788 { -2, "toU(bulk)+getNext" },
789 { -3, "getNext+toU(bulk)" },
790 { -4, "toU(1)+getNext" },
791 { -5, "getNext+toU(1)" },
792 { -12, "toU(5)+getNext" },
793 { -13, "getNext+toU(5)" },
798 for(i
=0; i
<LENGTHOF(steps
) && ok
; ++i
) {
800 if(step
<0 && !cc
.finalFlush
) {
801 // skip ucnv_getNextUChar() if !finalFlush because
802 // ucnv_getNextUChar() always implies flush
806 // bulk test is first, then offsets are not checked any more
809 errorCode
=U_ZERO_ERROR
;
810 resultLength
=stepToUnicode(cc
, cnv
,
811 result
, LENGTHOF(result
),
812 step
==0 ? resultOffsets
: NULL
,
815 cc
, cnv
, steps
[i
].name
,
816 result
, resultLength
,
817 cc
.offsets
!=NULL
? resultOffsets
: NULL
,
819 if(U_FAILURE(errorCode
) || !cc
.finalFlush
) {
820 // reset if an error occurred or we did not flush
821 // otherwise do nothing to make sure that flushing resets
822 ucnv_resetToUnicode(cnv
);
826 // not a real loop, just a convenience for breaking out of the block
827 while(ok
&& cc
.finalFlush
) {
828 // test ucnv_toUChars()
829 memset(result
, 0, sizeof(result
));
831 errorCode
=U_ZERO_ERROR
;
832 resultLength
=ucnv_toUChars(cnv
,
833 result
, LENGTHOF(result
),
834 (const char *)cc
.bytes
, cc
.bytesLength
,
838 result
, resultLength
,
846 // keep the correct result for simple checking
847 errorCode
=U_ZERO_ERROR
;
848 resultLength
=ucnv_toUChars(cnv
,
850 (const char *)cc
.bytes
, cc
.bytesLength
,
852 if(errorCode
==U_STRING_NOT_TERMINATED_WARNING
|| errorCode
==U_BUFFER_OVERFLOW_ERROR
) {
853 errorCode
=U_ZERO_ERROR
;
856 cc
, cnv
, "preflight toUChars",
857 result
, resultLength
,
868 ConversionTest::checkToUnicode(ConversionCase
&cc
, UConverter
*cnv
, const char *name
,
869 const UChar
*result
, int32_t resultLength
,
870 const int32_t *resultOffsets
,
871 UErrorCode resultErrorCode
) {
872 char resultInvalidChars
[8];
873 int8_t resultInvalidLength
;
874 UErrorCode errorCode
;
878 // reset the message; NULL will mean "ok"
881 errorCode
=U_ZERO_ERROR
;
882 resultInvalidLength
=sizeof(resultInvalidChars
);
883 ucnv_getInvalidChars(cnv
, resultInvalidChars
, &resultInvalidLength
, &errorCode
);
884 if(U_FAILURE(errorCode
)) {
885 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidChars() failed - %s",
886 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, u_errorName(errorCode
));
890 // check everything that might have gone wrong
891 if(cc
.unicodeLength
!=resultLength
) {
892 msg
="wrong result length";
893 } else if(0!=u_memcmp(cc
.unicode
, result
, cc
.unicodeLength
)) {
894 msg
="wrong result string";
895 } else if(cc
.offsets
!=NULL
&& 0!=memcmp(cc
.offsets
, resultOffsets
, cc
.unicodeLength
*sizeof(*cc
.offsets
))) {
897 } else if(cc
.outErrorCode
!=resultErrorCode
) {
898 msg
="wrong error code";
899 } else if(cc
.invalidLength
!=resultInvalidLength
) {
900 msg
="wrong length of last invalid input";
901 } else if(0!=memcmp(cc
.invalidChars
, resultInvalidChars
, cc
.invalidLength
)) {
902 msg
="wrong last invalid input";
908 char buffer
[2000]; // one buffer for all strings
909 char *s
, *bytesString
, *unicodeString
, *resultString
,
910 *offsetsString
, *resultOffsetsString
,
911 *invalidCharsString
, *resultInvalidCharsString
;
913 bytesString
=s
=buffer
;
914 s
=printBytes(cc
.bytes
, cc
.bytesLength
, bytesString
);
915 s
=printUnicode(cc
.unicode
, cc
.unicodeLength
, unicodeString
=s
);
916 s
=printUnicode(result
, resultLength
, resultString
=s
);
917 s
=printOffsets(cc
.offsets
, cc
.unicodeLength
, offsetsString
=s
);
918 s
=printOffsets(resultOffsets
, resultLength
, resultOffsetsString
=s
);
919 s
=printBytes(cc
.invalidChars
, cc
.invalidLength
, invalidCharsString
=s
);
920 s
=printBytes((uint8_t *)resultInvalidChars
, resultInvalidLength
, resultInvalidCharsString
=s
);
922 if((s
-buffer
)>(int32_t)sizeof(buffer
)) {
923 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkToUnicode() test output buffer overflow writing %d chars\n",
924 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, (int)(s
-buffer
));
928 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"
930 " expected <%s>[%d]\n"
933 " result offsets <%s>\n"
934 " error code expected %s got %s\n"
935 " invalidChars expected <%s> got <%s>\n",
936 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, msg
,
937 bytesString
, cc
.bytesLength
,
938 unicodeString
, cc
.unicodeLength
,
939 resultString
, resultLength
,
942 u_errorName(cc
.outErrorCode
), u_errorName(resultErrorCode
),
943 invalidCharsString
, resultInvalidCharsString
);
949 // fromUnicode test worker functions --------------------------------------- ***
952 stepFromUnicode(ConversionCase
&cc
, UConverter
*cnv
,
953 char *result
, int32_t resultCapacity
,
954 int32_t *resultOffsets
, /* also resultCapacity */
956 UErrorCode
*pErrorCode
) {
957 const UChar
*source
, *sourceLimit
, *unicodeLimit
;
958 char *target
, *targetLimit
, *resultLimit
;
963 unicodeLimit
=source
+cc
.unicodeLength
;
964 resultLimit
=result
+resultCapacity
;
966 // call ucnv_fromUnicode() with in/out buffers no larger than (step) at a time
967 // move only one buffer (in vs. out) at a time to be extra mean
968 // step==0 performs bulk conversion and generates offsets
970 // initialize the partial limits for the loop
972 // use the entire buffers
973 sourceLimit
=unicodeLimit
;
974 targetLimit
=resultLimit
;
977 // start with empty partial buffers
982 // output offsets only for bulk conversion
987 // resetting the opposite conversion direction must not affect this one
988 ucnv_resetToUnicode(cnv
);
991 ucnv_fromUnicode(cnv
,
992 &target
, targetLimit
,
993 &source
, sourceLimit
,
997 // check pointers and errors
998 if(source
>sourceLimit
|| target
>targetLimit
) {
999 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
1001 } else if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
1002 if(target
!=targetLimit
) {
1003 // buffer overflow must only be set when the target is filled
1004 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
1006 } else if(targetLimit
==resultLimit
) {
1007 // not just a partial overflow
1011 // the partial target is filled, set a new limit, reset the error and continue
1012 targetLimit
=(resultLimit
-target
)>=step
? target
+step
: resultLimit
;
1013 *pErrorCode
=U_ZERO_ERROR
;
1014 } else if(U_FAILURE(*pErrorCode
)) {
1015 // some other error occurred, done
1018 if(source
!=sourceLimit
) {
1019 // when no error occurs, then the input must be consumed
1020 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
1024 if(sourceLimit
==unicodeLimit
) {
1029 // the partial conversion succeeded, set a new limit and continue
1030 sourceLimit
=(unicodeLimit
-source
)>=step
? source
+step
: unicodeLimit
;
1031 flush
=(UBool
)(cc
.finalFlush
&& sourceLimit
==unicodeLimit
);
1035 return (int32_t)(target
-result
);
1039 ConversionTest::FromUnicodeCase(ConversionCase
&cc
, UConverterFromUCallback callback
, const char *option
) {
1041 UErrorCode errorCode
;
1043 // open the converter
1044 errorCode
=U_ZERO_ERROR
;
1045 cnv
=cnv_open(cc
.charset
, errorCode
);
1046 if(U_FAILURE(errorCode
)) {
1047 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
1048 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, u_errorName(errorCode
));
1053 if(callback
!=NULL
) {
1054 ucnv_setFromUCallBack(cnv
, callback
, option
, NULL
, NULL
, &errorCode
);
1055 if(U_FAILURE(errorCode
)) {
1056 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setFromUCallBack() failed - %s",
1057 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, u_errorName(errorCode
));
1063 // set the fallbacks flag
1064 // TODO change with Jitterbug 2401, then add a similar call for toUnicode too
1065 ucnv_setFallback(cnv
, cc
.fallbacks
);
1071 length
=(int32_t)strlen(cc
.subchar
);
1072 ucnv_setSubstChars(cnv
, cc
.subchar
, (int8_t)length
, &errorCode
);
1073 if(U_FAILURE(errorCode
)) {
1074 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstChars() failed - %s",
1075 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, u_errorName(errorCode
));
1079 } else if(cc
.setSub
<0) {
1080 ucnv_setSubstString(cnv
, cc
.subString
, -1, &errorCode
);
1081 if(U_FAILURE(errorCode
)) {
1082 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstString() failed - %s",
1083 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, u_errorName(errorCode
));
1089 int32_t resultOffsets
[200];
1091 int32_t resultLength
;
1094 static const struct {
1098 { 0, "bulk" }, // must be first for offsets to be checked
1106 for(i
=0; i
<LENGTHOF(steps
) && ok
; ++i
) {
1109 // bulk test is first, then offsets are not checked any more
1112 errorCode
=U_ZERO_ERROR
;
1113 resultLength
=stepFromUnicode(cc
, cnv
,
1114 result
, LENGTHOF(result
),
1115 step
==0 ? resultOffsets
: NULL
,
1117 ok
=checkFromUnicode(
1118 cc
, cnv
, steps
[i
].name
,
1119 (uint8_t *)result
, resultLength
,
1120 cc
.offsets
!=NULL
? resultOffsets
: NULL
,
1122 if(U_FAILURE(errorCode
) || !cc
.finalFlush
) {
1123 // reset if an error occurred or we did not flush
1124 // otherwise do nothing to make sure that flushing resets
1125 ucnv_resetFromUnicode(cnv
);
1129 // not a real loop, just a convenience for breaking out of the block
1130 while(ok
&& cc
.finalFlush
) {
1131 // test ucnv_fromUChars()
1132 memset(result
, 0, sizeof(result
));
1134 errorCode
=U_ZERO_ERROR
;
1135 resultLength
=ucnv_fromUChars(cnv
,
1136 result
, LENGTHOF(result
),
1137 cc
.unicode
, cc
.unicodeLength
,
1139 ok
=checkFromUnicode(
1140 cc
, cnv
, "fromUChars",
1141 (uint8_t *)result
, resultLength
,
1148 // test preflighting
1149 // keep the correct result for simple checking
1150 errorCode
=U_ZERO_ERROR
;
1151 resultLength
=ucnv_fromUChars(cnv
,
1153 cc
.unicode
, cc
.unicodeLength
,
1155 if(errorCode
==U_STRING_NOT_TERMINATED_WARNING
|| errorCode
==U_BUFFER_OVERFLOW_ERROR
) {
1156 errorCode
=U_ZERO_ERROR
;
1158 ok
=checkFromUnicode(
1159 cc
, cnv
, "preflight fromUChars",
1160 (uint8_t *)result
, resultLength
,
1171 ConversionTest::checkFromUnicode(ConversionCase
&cc
, UConverter
*cnv
, const char *name
,
1172 const uint8_t *result
, int32_t resultLength
,
1173 const int32_t *resultOffsets
,
1174 UErrorCode resultErrorCode
) {
1175 UChar resultInvalidUChars
[8];
1176 int8_t resultInvalidLength
;
1177 UErrorCode errorCode
;
1181 // reset the message; NULL will mean "ok"
1184 errorCode
=U_ZERO_ERROR
;
1185 resultInvalidLength
=LENGTHOF(resultInvalidUChars
);
1186 ucnv_getInvalidUChars(cnv
, resultInvalidUChars
, &resultInvalidLength
, &errorCode
);
1187 if(U_FAILURE(errorCode
)) {
1188 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUChars() failed - %s",
1189 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, u_errorName(errorCode
));
1193 // check everything that might have gone wrong
1194 if(cc
.bytesLength
!=resultLength
) {
1195 msg
="wrong result length";
1196 } else if(0!=memcmp(cc
.bytes
, result
, cc
.bytesLength
)) {
1197 msg
="wrong result string";
1198 } else if(cc
.offsets
!=NULL
&& 0!=memcmp(cc
.offsets
, resultOffsets
, cc
.bytesLength
*sizeof(*cc
.offsets
))) {
1199 msg
="wrong offsets";
1200 } else if(cc
.outErrorCode
!=resultErrorCode
) {
1201 msg
="wrong error code";
1202 } else if(cc
.invalidLength
!=resultInvalidLength
) {
1203 msg
="wrong length of last invalid input";
1204 } else if(0!=u_memcmp(cc
.invalidUChars
, resultInvalidUChars
, cc
.invalidLength
)) {
1205 msg
="wrong last invalid input";
1211 char buffer
[2000]; // one buffer for all strings
1212 char *s
, *unicodeString
, *bytesString
, *resultString
,
1213 *offsetsString
, *resultOffsetsString
,
1214 *invalidCharsString
, *resultInvalidUCharsString
;
1216 unicodeString
=s
=buffer
;
1217 s
=printUnicode(cc
.unicode
, cc
.unicodeLength
, unicodeString
);
1218 s
=printBytes(cc
.bytes
, cc
.bytesLength
, bytesString
=s
);
1219 s
=printBytes(result
, resultLength
, resultString
=s
);
1220 s
=printOffsets(cc
.offsets
, cc
.bytesLength
, offsetsString
=s
);
1221 s
=printOffsets(resultOffsets
, resultLength
, resultOffsetsString
=s
);
1222 s
=printUnicode(cc
.invalidUChars
, cc
.invalidLength
, invalidCharsString
=s
);
1223 s
=printUnicode(resultInvalidUChars
, resultInvalidLength
, resultInvalidUCharsString
=s
);
1225 if((s
-buffer
)>(int32_t)sizeof(buffer
)) {
1226 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkFromUnicode() test output buffer overflow writing %d chars\n",
1227 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, (int)(s
-buffer
));
1231 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"
1232 " unicode <%s>[%d]\n"
1233 " expected <%s>[%d]\n"
1234 " result <%s>[%d]\n"
1236 " result offsets <%s>\n"
1237 " error code expected %s got %s\n"
1238 " invalidChars expected <%s> got <%s>\n",
1239 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, msg
,
1240 unicodeString
, cc
.unicodeLength
,
1241 bytesString
, cc
.bytesLength
,
1242 resultString
, resultLength
,
1244 resultOffsetsString
,
1245 u_errorName(cc
.outErrorCode
), u_errorName(resultErrorCode
),
1246 invalidCharsString
, resultInvalidUCharsString
);
1252 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */