2 *******************************************************************************
4 * Copyright (C) 2003-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: convtest.cpp
10 * tab size: 8 (not used)
13 * created on: 2003jul15
14 * created by: Markus W. Scherer
16 * Test file for data-driven conversion tests.
19 #include "unicode/utypes.h"
21 #if !UCONFIG_NO_LEGACY_CONVERSION
23 * Note: Turning off all of convtest.cpp if !UCONFIG_NO_LEGACY_CONVERSION
24 * is slightly unnecessary - it removes tests for Unicode charsets
25 * like UTF-8 that should work.
26 * However, there is no easy way for the test to detect whether a test case
27 * is for a Unicode charset, so it would be difficult to only exclude those.
28 * Also, regular testing of ICU is done with all modules on, therefore
29 * not testing conversion for a custom configuration like this should be ok.
32 #include "unicode/ucnv.h"
33 #include "unicode/unistr.h"
34 #include "unicode/parsepos.h"
35 #include "unicode/uniset.h"
36 #include "unicode/ustring.h"
37 #include "unicode/ures.h"
39 #include "unicode/tstdtmod.h"
43 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
46 // characters used in test data for callbacks
53 ConversionTest::~ConversionTest() {}
56 ConversionTest::runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char * /*par*/) {
57 if (exec
) logln("TestSuite ConversionTest: ");
59 case 0: name
="TestToUnicode"; if (exec
) TestToUnicode(); break;
60 case 1: name
="TestFromUnicode"; if (exec
) TestFromUnicode(); break;
61 case 2: name
="TestGetUnicodeSet"; if (exec
) TestGetUnicodeSet(); break;
62 default: name
=""; break; //needed to end loop
66 // test data interface ----------------------------------------------------- ***
69 ConversionTest::TestToUnicode() {
71 char charset
[100], cbopt
[4];
73 UnicodeString s
, unicode
;
74 int32_t offsetsLength
;
75 UConverterToUCallback callback
;
77 TestDataModule
*dataModule
;
79 const DataMap
*testCase
;
83 errorCode
=U_ZERO_ERROR
;
84 dataModule
=TestDataModule::getTestDataModule("conversion", *this, errorCode
);
85 if(U_SUCCESS(errorCode
)) {
86 testData
=dataModule
->createTestData("toUnicode", errorCode
);
87 if(U_SUCCESS(errorCode
)) {
88 for(i
=0; testData
->nextCase(testCase
, errorCode
); ++i
) {
89 if(U_FAILURE(errorCode
)) {
90 errln("error retrieving conversion/toUnicode test case %d - %s",
91 i
, u_errorName(errorCode
));
92 errorCode
=U_ZERO_ERROR
;
98 s
=testCase
->getString("charset", errorCode
);
99 s
.extract(0, 0x7fffffff, charset
, sizeof(charset
), "");
102 cc
.bytes
=testCase
->getBinary(cc
.bytesLength
, "bytes", errorCode
);
103 unicode
=testCase
->getString("unicode", errorCode
);
104 cc
.unicode
=unicode
.getBuffer();
105 cc
.unicodeLength
=unicode
.length();
108 cc
.offsets
=testCase
->getIntVector(offsetsLength
, "offsets", errorCode
);
109 if(offsetsLength
==0) {
111 } else if(offsetsLength
!=unicode
.length()) {
112 errln("toUnicode[%d] unicode[%d] and offsets[%d] must have the same length",
113 i
, unicode
.length(), offsetsLength
);
114 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
117 cc
.finalFlush
= 0!=testCase
->getInt28("flush", errorCode
);
118 cc
.fallbacks
= 0!=testCase
->getInt28("fallbacks", errorCode
);
120 s
=testCase
->getString("errorCode", errorCode
);
121 if(s
==UNICODE_STRING("invalid", 7)) {
122 cc
.outErrorCode
=U_INVALID_CHAR_FOUND
;
123 } else if(s
==UNICODE_STRING("illegal", 7)) {
124 cc
.outErrorCode
=U_ILLEGAL_CHAR_FOUND
;
125 } else if(s
==UNICODE_STRING("truncated", 9)) {
126 cc
.outErrorCode
=U_TRUNCATED_CHAR_FOUND
;
127 } else if(s
==UNICODE_STRING("illesc", 6)) {
128 cc
.outErrorCode
=U_ILLEGAL_ESCAPE_SEQUENCE
;
129 } else if(s
==UNICODE_STRING("unsuppesc", 9)) {
130 cc
.outErrorCode
=U_UNSUPPORTED_ESCAPE_SEQUENCE
;
132 cc
.outErrorCode
=U_ZERO_ERROR
;
135 s
=testCase
->getString("callback", errorCode
);
136 s
.extract(0, 0x7fffffff, cbopt
, sizeof(cbopt
), "");
140 callback
=UCNV_TO_U_CALLBACK_SUBSTITUTE
;
143 callback
=UCNV_TO_U_CALLBACK_SKIP
;
146 callback
=UCNV_TO_U_CALLBACK_STOP
;
149 callback
=UCNV_TO_U_CALLBACK_ESCAPE
;
155 option
=callback
==NULL
? cbopt
: cbopt
+1;
160 cc
.invalidChars
=testCase
->getBinary(cc
.invalidLength
, "invalidChars", errorCode
);
162 if(U_FAILURE(errorCode
)) {
163 errln("error parsing conversion/toUnicode test case %d - %s",
164 i
, u_errorName(errorCode
));
165 errorCode
=U_ZERO_ERROR
;
167 logln("TestToUnicode[%d] %s", i
, charset
);
168 ToUnicodeCase(cc
, callback
, option
);
176 errln("Failed: could not load test conversion data");
181 ConversionTest::TestFromUnicode() {
183 char charset
[100], cbopt
[4];
185 UnicodeString s
, unicode
, invalidUChars
;
186 int32_t offsetsLength
;
187 UConverterFromUCallback callback
;
189 TestDataModule
*dataModule
;
191 const DataMap
*testCase
;
193 UErrorCode errorCode
;
196 errorCode
=U_ZERO_ERROR
;
197 dataModule
=TestDataModule::getTestDataModule("conversion", *this, errorCode
);
198 if(U_SUCCESS(errorCode
)) {
199 testData
=dataModule
->createTestData("fromUnicode", errorCode
);
200 if(U_SUCCESS(errorCode
)) {
201 for(i
=0; testData
->nextCase(testCase
, errorCode
); ++i
) {
202 if(U_FAILURE(errorCode
)) {
203 errln("error retrieving conversion/fromUnicode test case %d - %s",
204 i
, u_errorName(errorCode
));
205 errorCode
=U_ZERO_ERROR
;
211 s
=testCase
->getString("charset", errorCode
);
212 s
.extract(0, 0x7fffffff, charset
, sizeof(charset
), "");
215 unicode
=testCase
->getString("unicode", errorCode
);
216 cc
.unicode
=unicode
.getBuffer();
217 cc
.unicodeLength
=unicode
.length();
218 cc
.bytes
=testCase
->getBinary(cc
.bytesLength
, "bytes", errorCode
);
221 cc
.offsets
=testCase
->getIntVector(offsetsLength
, "offsets", errorCode
);
222 if(offsetsLength
==0) {
224 } else if(offsetsLength
!=cc
.bytesLength
) {
225 errln("fromUnicode[%d] bytes[%d] and offsets[%d] must have the same length",
226 i
, cc
.bytesLength
, offsetsLength
);
227 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
230 cc
.finalFlush
= 0!=testCase
->getInt28("flush", errorCode
);
231 cc
.fallbacks
= 0!=testCase
->getInt28("fallbacks", errorCode
);
233 s
=testCase
->getString("errorCode", errorCode
);
234 if(s
==UNICODE_STRING("invalid", 7)) {
235 cc
.outErrorCode
=U_INVALID_CHAR_FOUND
;
236 } else if(s
==UNICODE_STRING("illegal", 7)) {
237 cc
.outErrorCode
=U_ILLEGAL_CHAR_FOUND
;
238 } else if(s
==UNICODE_STRING("truncated", 9)) {
239 cc
.outErrorCode
=U_TRUNCATED_CHAR_FOUND
;
241 cc
.outErrorCode
=U_ZERO_ERROR
;
244 s
=testCase
->getString("callback", errorCode
);
246 // read NUL-separated subchar first, if any
247 length
=u_strlen(p
=s
.getTerminatedBuffer());
248 if(++length
<s
.length()) {
249 // copy the subchar from Latin-1 characters
250 // start after the NUL
252 length
=s
.length()-length
;
253 if(length
>=(int32_t)sizeof(cc
.subchar
)) {
254 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
258 for(j
=0; j
<length
; ++j
) {
259 cc
.subchar
[j
]=(char)p
[j
];
261 // NUL-terminate the subchar
265 // remove the NUL and subchar from s
266 s
.truncate(u_strlen(s
.getBuffer()));
272 s
.extract(0, 0x7fffffff, cbopt
, sizeof(cbopt
), "");
276 callback
=UCNV_FROM_U_CALLBACK_SUBSTITUTE
;
279 callback
=UCNV_FROM_U_CALLBACK_SKIP
;
282 callback
=UCNV_FROM_U_CALLBACK_STOP
;
285 callback
=UCNV_FROM_U_CALLBACK_ESCAPE
;
291 option
=callback
==NULL
? cbopt
: cbopt
+1;
296 invalidUChars
=testCase
->getString("invalidUChars", errorCode
);
297 cc
.invalidUChars
=invalidUChars
.getBuffer();
298 cc
.invalidLength
=invalidUChars
.length();
300 if(U_FAILURE(errorCode
)) {
301 errln("error parsing conversion/fromUnicode test case %d - %s",
302 i
, u_errorName(errorCode
));
303 errorCode
=U_ZERO_ERROR
;
305 logln("TestFromUnicode[%d] %s", i
, charset
);
306 FromUnicodeCase(cc
, callback
, option
);
314 errln("Failed: could not load test conversion data");
318 static const UChar ellipsis
[]={ 0x2e, 0x2e, 0x2e };
321 ConversionTest::TestGetUnicodeSet() {
323 UnicodeString s
, map
, mapnot
;
327 UnicodeSet cnvSet
, mapSet
, mapnotSet
, diffSet
;
330 TestDataModule
*dataModule
;
332 const DataMap
*testCase
;
333 UErrorCode errorCode
;
336 errorCode
=U_ZERO_ERROR
;
337 dataModule
=TestDataModule::getTestDataModule("conversion", *this, errorCode
);
338 if(U_SUCCESS(errorCode
)) {
339 testData
=dataModule
->createTestData("getUnicodeSet", errorCode
);
340 if(U_SUCCESS(errorCode
)) {
341 for(i
=0; testData
->nextCase(testCase
, errorCode
); ++i
) {
342 if(U_FAILURE(errorCode
)) {
343 errln("error retrieving conversion/getUnicodeSet test case %d - %s",
344 i
, u_errorName(errorCode
));
345 errorCode
=U_ZERO_ERROR
;
349 s
=testCase
->getString("charset", errorCode
);
350 s
.extract(0, 0x7fffffff, charset
, sizeof(charset
), "");
352 map
=testCase
->getString("map", errorCode
);
353 mapnot
=testCase
->getString("mapnot", errorCode
);
355 which
=testCase
->getInt28("which", errorCode
);
357 if(U_FAILURE(errorCode
)) {
358 errln("error parsing conversion/getUnicodeSet test case %d - %s",
359 i
, u_errorName(errorCode
));
360 errorCode
=U_ZERO_ERROR
;
364 // test this test case
369 mapSet
.applyPattern(map
, pos
, 0, NULL
, errorCode
);
370 if(U_FAILURE(errorCode
) || pos
.getIndex()!=map
.length()) {
371 errln("error creating the map set for conversion/getUnicodeSet test case %d - %s\n"
372 " error index %d index %d U+%04x",
373 i
, u_errorName(errorCode
), pos
.getErrorIndex(), pos
.getIndex(), map
.char32At(pos
.getIndex()));
374 errorCode
=U_ZERO_ERROR
;
379 mapnotSet
.applyPattern(mapnot
, pos
, 0, NULL
, errorCode
);
380 if(U_FAILURE(errorCode
) || pos
.getIndex()!=mapnot
.length()) {
381 errln("error creating the mapnot set for conversion/getUnicodeSet test case %d - %s\n"
382 " error index %d index %d U+%04x",
383 i
, u_errorName(errorCode
), pos
.getErrorIndex(), pos
.getIndex(), mapnot
.char32At(pos
.getIndex()));
384 errorCode
=U_ZERO_ERROR
;
388 logln("TestGetUnicodeSet[%d] %s", i
, charset
);
390 cnv
=cnv_open(charset
, errorCode
);
391 if(U_FAILURE(errorCode
)) {
392 errln("error opening \"%s\" for conversion/getUnicodeSet test case %d - %s",
393 charset
, i
, u_errorName(errorCode
));
394 errorCode
=U_ZERO_ERROR
;
398 ucnv_getUnicodeSet(cnv
, (USet
*)&cnvSet
, (UConverterUnicodeSet
)which
, &errorCode
);
401 if(U_FAILURE(errorCode
)) {
402 errln("error in ucnv_getUnicodeSet(\"%s\") for conversion/getUnicodeSet test case %d - %s",
403 charset
, i
, u_errorName(errorCode
));
404 errorCode
=U_ZERO_ERROR
;
408 // are there items that must be in cnvSet but are not?
409 (diffSet
=mapSet
).removeAll(cnvSet
);
410 if(!diffSet
.isEmpty()) {
411 diffSet
.toPattern(s
, TRUE
);
413 s
.replace(100, 0x7fffffff, ellipsis
, LENGTHOF(ellipsis
));
415 errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d",
420 // are there items that must not be in cnvSet but are?
421 (diffSet
=mapnotSet
).retainAll(cnvSet
);
422 if(!diffSet
.isEmpty()) {
423 diffSet
.toPattern(s
, TRUE
);
425 s
.replace(100, 0x7fffffff, ellipsis
, LENGTHOF(ellipsis
));
427 errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test case %d",
437 errln("Failed: could not load test conversion data");
441 // open testdata or ICU data converter ------------------------------------- ***
444 ConversionTest::cnv_open(const char *name
, UErrorCode
&errorCode
) {
445 if(name
!=NULL
&& *name
=='*') {
446 /* loadTestData(): set the data directory */
447 return ucnv_openPackage(loadTestData(errorCode
), name
+1, &errorCode
);
449 return ucnv_open(name
, &errorCode
);
453 // output helpers ---------------------------------------------------------- ***
456 hexDigit(uint8_t digit
) {
457 return digit
<=9 ? (char)('0'+digit
) : (char)('a'-10+digit
);
461 printBytes(const uint8_t *bytes
, int32_t length
, char *out
) {
467 *out
++=hexDigit((uint8_t)(b
>>4));
468 *out
++=hexDigit((uint8_t)(b
&0xf));
475 *out
++=hexDigit((uint8_t)(b
>>4));
476 *out
++=hexDigit((uint8_t)(b
&0xf));
483 printUnicode(const UChar
*unicode
, int32_t length
, char *out
) {
487 for(i
=0; i
<length
;) {
491 U16_NEXT(unicode
, i
, length
, c
);
497 *out
++=hexDigit((uint8_t)((c
>>16)&0xf));
499 *out
++=hexDigit((uint8_t)((c
>>12)&0xf));
500 *out
++=hexDigit((uint8_t)((c
>>8)&0xf));
501 *out
++=hexDigit((uint8_t)((c
>>4)&0xf));
502 *out
++=hexDigit((uint8_t)(c
&0xf));
509 printOffsets(const int32_t *offsets
, int32_t length
, char *out
) {
516 for(i
=0; i
<length
; ++i
) {
522 // print all offsets with 2 characters each (-x, -9..99, xx)
528 *out
++=(char)('0'-o
);
530 *out
++=(d
=o
/10)==0 ? ' ' : (char)('0'+d
);
531 *out
++=(char)('0'+o%10
);
541 // toUnicode test worker functions ----------------------------------------- ***
544 stepToUnicode(ConversionCase
&cc
, UConverter
*cnv
,
545 UChar
*result
, int32_t resultCapacity
,
546 int32_t *resultOffsets
, /* also resultCapacity */
548 UErrorCode
*pErrorCode
) {
549 const char *source
, *sourceLimit
, *bytesLimit
;
550 UChar
*target
, *targetLimit
, *resultLimit
;
553 source
=(const char *)cc
.bytes
;
555 bytesLimit
=source
+cc
.bytesLength
;
556 resultLimit
=result
+resultCapacity
;
559 // call ucnv_toUnicode() with in/out buffers no larger than (step) at a time
560 // move only one buffer (in vs. out) at a time to be extra mean
561 // step==0 performs bulk conversion and generates offsets
563 // initialize the partial limits for the loop
565 // use the entire buffers
566 sourceLimit
=bytesLimit
;
567 targetLimit
=resultLimit
;
570 // start with empty partial buffers
575 // output offsets only for bulk conversion
580 // resetting the opposite conversion direction must not affect this one
581 ucnv_resetFromUnicode(cnv
);
585 &target
, targetLimit
,
586 &source
, sourceLimit
,
590 // check pointers and errors
591 if(source
>sourceLimit
|| target
>targetLimit
) {
592 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
594 } else if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
595 if(target
!=targetLimit
) {
596 // buffer overflow must only be set when the target is filled
597 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
599 } else if(targetLimit
==resultLimit
) {
600 // not just a partial overflow
604 // the partial target is filled, set a new limit, reset the error and continue
605 targetLimit
=(resultLimit
-target
)>=step
? target
+step
: resultLimit
;
606 *pErrorCode
=U_ZERO_ERROR
;
607 } else if(U_FAILURE(*pErrorCode
)) {
608 // some other error occurred, done
611 if(source
!=sourceLimit
) {
612 // when no error occurs, then the input must be consumed
613 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
617 if(sourceLimit
==bytesLimit
) {
622 // the partial conversion succeeded, set a new limit and continue
623 sourceLimit
=(bytesLimit
-source
)>=step
? source
+step
: bytesLimit
;
624 flush
=(UBool
)(cc
.finalFlush
&& sourceLimit
==bytesLimit
);
627 } else /* step<0 */ {
629 * step==-1: call only ucnv_getNextUChar()
630 * otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar()
631 * if step==-2 or -3, then give ucnv_toUnicode() the whole remaining input,
632 * else give it at most (-step-2)/2 bytes
636 // end the loop by getting an index out of bounds error
638 // resetting the opposite conversion direction must not affect this one
639 ucnv_resetFromUnicode(cnv
);
642 if((step
&1)!=0 /* odd: -1, -3, -5, ... */) {
643 sourceLimit
=source
; // use sourceLimit not as a real limit
644 // but to remember the pre-getNextUChar source pointer
645 c
=ucnv_getNextUChar(cnv
, &source
, bytesLimit
, pErrorCode
);
647 // check pointers and errors
648 if(*pErrorCode
==U_INDEX_OUTOFBOUNDS_ERROR
) {
649 if(source
!=bytesLimit
) {
650 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
652 *pErrorCode
=U_ZERO_ERROR
;
655 } else if(U_FAILURE(*pErrorCode
)) {
658 // source may not move if c is from previous overflow
660 if(target
==resultLimit
) {
661 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
667 *target
++=U16_LEAD(c
);
668 if(target
==resultLimit
) {
669 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
672 *target
++=U16_TRAIL(c
);
675 // alternate between -n-1 and -n but leave -1 alone
679 } else /* step is even */ {
680 // allow only one UChar output
681 targetLimit
=target
<resultLimit
? target
+1 : resultLimit
;
683 // as with ucnv_getNextUChar(), we always flush (if we go to bytesLimit)
684 // and never output offsets
686 sourceLimit
=bytesLimit
;
688 sourceLimit
=source
+(-step
-2)/2;
689 if(sourceLimit
>bytesLimit
) {
690 sourceLimit
=bytesLimit
;
695 &target
, targetLimit
,
696 &source
, sourceLimit
,
697 NULL
, (UBool
)(sourceLimit
==bytesLimit
), pErrorCode
);
699 // check pointers and errors
700 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
701 if(target
!=targetLimit
) {
702 // buffer overflow must only be set when the target is filled
703 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
705 } else if(targetLimit
==resultLimit
) {
706 // not just a partial overflow
710 // the partial target is filled, set a new limit and continue
711 *pErrorCode
=U_ZERO_ERROR
;
712 } else if(U_FAILURE(*pErrorCode
)) {
713 // some other error occurred, done
716 if(source
!=sourceLimit
) {
717 // when no error occurs, then the input must be consumed
718 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
722 // we are done (flush==TRUE) but we continue, to get the index out of bounds error above
730 return (int32_t)(target
-result
);
734 ConversionTest::ToUnicodeCase(ConversionCase
&cc
, UConverterToUCallback callback
, const char *option
) {
736 UErrorCode errorCode
;
738 // open the converter
739 errorCode
=U_ZERO_ERROR
;
740 cnv
=cnv_open(cc
.charset
, errorCode
);
741 if(U_FAILURE(errorCode
)) {
742 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
743 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, u_errorName(errorCode
));
749 ucnv_setToUCallBack(cnv
, callback
, option
, NULL
, NULL
, &errorCode
);
750 if(U_FAILURE(errorCode
)) {
751 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setToUCallBack() failed - %s",
752 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, u_errorName(errorCode
));
758 int32_t resultOffsets
[200];
760 int32_t resultLength
;
763 static const struct {
767 { 0, "bulk" }, // must be first for offsets to be checked
772 { -2, "toU(bulk)+getNext" },
773 { -3, "getNext+toU(bulk)" },
774 { -4, "toU(1)+getNext" },
775 { -5, "getNext+toU(1)" },
776 { -12, "toU(5)+getNext" },
777 { -13, "getNext+toU(5)" },
782 for(i
=0; i
<LENGTHOF(steps
) && ok
; ++i
) {
784 if(step
<0 && !cc
.finalFlush
) {
785 // skip ucnv_getNextUChar() if !finalFlush because
786 // ucnv_getNextUChar() always implies flush
790 // bulk test is first, then offsets are not checked any more
793 errorCode
=U_ZERO_ERROR
;
794 resultLength
=stepToUnicode(cc
, cnv
,
795 result
, LENGTHOF(result
),
796 step
==0 ? resultOffsets
: NULL
,
799 cc
, cnv
, steps
[i
].name
,
800 result
, resultLength
,
801 cc
.offsets
!=NULL
? resultOffsets
: NULL
,
803 if(U_FAILURE(errorCode
) || !cc
.finalFlush
) {
804 // reset if an error occurred or we did not flush
805 // otherwise do nothing to make sure that flushing resets
806 ucnv_resetToUnicode(cnv
);
810 // not a real loop, just a convenience for breaking out of the block
811 while(ok
&& cc
.finalFlush
) {
812 // test ucnv_toUChars()
813 memset(result
, 0, sizeof(result
));
815 errorCode
=U_ZERO_ERROR
;
816 resultLength
=ucnv_toUChars(cnv
,
817 result
, LENGTHOF(result
),
818 (const char *)cc
.bytes
, cc
.bytesLength
,
822 result
, resultLength
,
830 // keep the correct result for simple checking
831 errorCode
=U_ZERO_ERROR
;
832 resultLength
=ucnv_toUChars(cnv
,
834 (const char *)cc
.bytes
, cc
.bytesLength
,
836 if(errorCode
==U_STRING_NOT_TERMINATED_WARNING
|| errorCode
==U_BUFFER_OVERFLOW_ERROR
) {
837 errorCode
=U_ZERO_ERROR
;
840 cc
, cnv
, "preflight toUChars",
841 result
, resultLength
,
852 ConversionTest::checkToUnicode(ConversionCase
&cc
, UConverter
*cnv
, const char *name
,
853 const UChar
*result
, int32_t resultLength
,
854 const int32_t *resultOffsets
,
855 UErrorCode resultErrorCode
) {
856 char resultInvalidChars
[8];
857 int8_t resultInvalidLength
;
858 UErrorCode errorCode
;
862 // reset the message; NULL will mean "ok"
865 errorCode
=U_ZERO_ERROR
;
866 resultInvalidLength
=sizeof(resultInvalidChars
);
867 ucnv_getInvalidChars(cnv
, resultInvalidChars
, &resultInvalidLength
, &errorCode
);
868 if(U_FAILURE(errorCode
)) {
869 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidChars() failed - %s",
870 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, u_errorName(errorCode
));
874 // check everything that might have gone wrong
875 if(cc
.unicodeLength
!=resultLength
) {
876 msg
="wrong result length";
877 } else if(0!=u_memcmp(cc
.unicode
, result
, cc
.unicodeLength
)) {
878 msg
="wrong result string";
879 } else if(cc
.offsets
!=NULL
&& 0!=memcmp(cc
.offsets
, resultOffsets
, cc
.unicodeLength
*sizeof(*cc
.offsets
))) {
881 } else if(cc
.outErrorCode
!=resultErrorCode
) {
882 msg
="wrong error code";
883 } else if(cc
.invalidLength
!=resultInvalidLength
) {
884 msg
="wrong length of last invalid input";
885 } else if(0!=memcmp(cc
.invalidChars
, resultInvalidChars
, cc
.invalidLength
)) {
886 msg
="wrong last invalid input";
892 char buffer
[2000]; // one buffer for all strings
893 char *s
, *bytesString
, *unicodeString
, *resultString
,
894 *offsetsString
, *resultOffsetsString
,
895 *invalidCharsString
, *resultInvalidCharsString
;
897 bytesString
=s
=buffer
;
898 s
=printBytes(cc
.bytes
, cc
.bytesLength
, bytesString
);
899 s
=printUnicode(cc
.unicode
, cc
.unicodeLength
, unicodeString
=s
);
900 s
=printUnicode(result
, resultLength
, resultString
=s
);
901 s
=printOffsets(cc
.offsets
, cc
.unicodeLength
, offsetsString
=s
);
902 s
=printOffsets(resultOffsets
, resultLength
, resultOffsetsString
=s
);
903 s
=printBytes(cc
.invalidChars
, cc
.invalidLength
, invalidCharsString
=s
);
904 s
=printBytes((uint8_t *)resultInvalidChars
, resultInvalidLength
, resultInvalidCharsString
=s
);
906 if((s
-buffer
)>(int32_t)sizeof(buffer
)) {
907 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkToUnicode() test output buffer overflow writing %d chars\n",
908 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, (int)(s
-buffer
));
912 errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"
914 " expected <%s>[%d]\n"
917 " result offsets <%s>\n"
918 " error code expected %s got %s\n"
919 " invalidChars expected <%s> got <%s>\n",
920 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, msg
,
921 bytesString
, cc
.bytesLength
,
922 unicodeString
, cc
.unicodeLength
,
923 resultString
, resultLength
,
926 u_errorName(cc
.outErrorCode
), u_errorName(resultErrorCode
),
927 invalidCharsString
, resultInvalidCharsString
);
933 // fromUnicode test worker functions --------------------------------------- ***
936 stepFromUnicode(ConversionCase
&cc
, UConverter
*cnv
,
937 char *result
, int32_t resultCapacity
,
938 int32_t *resultOffsets
, /* also resultCapacity */
940 UErrorCode
*pErrorCode
) {
941 const UChar
*source
, *sourceLimit
, *unicodeLimit
;
942 char *target
, *targetLimit
, *resultLimit
;
947 unicodeLimit
=source
+cc
.unicodeLength
;
948 resultLimit
=result
+resultCapacity
;
950 // call ucnv_fromUnicode() with in/out buffers no larger than (step) at a time
951 // move only one buffer (in vs. out) at a time to be extra mean
952 // step==0 performs bulk conversion and generates offsets
954 // initialize the partial limits for the loop
956 // use the entire buffers
957 sourceLimit
=unicodeLimit
;
958 targetLimit
=resultLimit
;
961 // start with empty partial buffers
966 // output offsets only for bulk conversion
971 // resetting the opposite conversion direction must not affect this one
972 ucnv_resetToUnicode(cnv
);
975 ucnv_fromUnicode(cnv
,
976 &target
, targetLimit
,
977 &source
, sourceLimit
,
981 // check pointers and errors
982 if(source
>sourceLimit
|| target
>targetLimit
) {
983 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
985 } else if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
986 if(target
!=targetLimit
) {
987 // buffer overflow must only be set when the target is filled
988 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
990 } else if(targetLimit
==resultLimit
) {
991 // not just a partial overflow
995 // the partial target is filled, set a new limit, reset the error and continue
996 targetLimit
=(resultLimit
-target
)>=step
? target
+step
: resultLimit
;
997 *pErrorCode
=U_ZERO_ERROR
;
998 } else if(U_FAILURE(*pErrorCode
)) {
999 // some other error occurred, done
1002 if(source
!=sourceLimit
) {
1003 // when no error occurs, then the input must be consumed
1004 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
1008 if(sourceLimit
==unicodeLimit
) {
1013 // the partial conversion succeeded, set a new limit and continue
1014 sourceLimit
=(unicodeLimit
-source
)>=step
? source
+step
: unicodeLimit
;
1015 flush
=(UBool
)(cc
.finalFlush
&& sourceLimit
==unicodeLimit
);
1019 return (int32_t)(target
-result
);
1023 ConversionTest::FromUnicodeCase(ConversionCase
&cc
, UConverterFromUCallback callback
, const char *option
) {
1025 UErrorCode errorCode
;
1027 // open the converter
1028 errorCode
=U_ZERO_ERROR
;
1029 cnv
=cnv_open(cc
.charset
, errorCode
);
1030 if(U_FAILURE(errorCode
)) {
1031 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
1032 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, u_errorName(errorCode
));
1037 if(callback
!=NULL
) {
1038 ucnv_setFromUCallBack(cnv
, callback
, option
, NULL
, NULL
, &errorCode
);
1039 if(U_FAILURE(errorCode
)) {
1040 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setFromUCallBack() failed - %s",
1041 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, u_errorName(errorCode
));
1047 // set the fallbacks flag
1048 // TODO change with Jitterbug 2401, then add a similar call for toUnicode too
1049 ucnv_setFallback(cnv
, cc
.fallbacks
);
1054 if((length
=(int32_t)strlen(cc
.subchar
))!=0) {
1055 ucnv_setSubstChars(cnv
, cc
.subchar
, (int8_t)length
, &errorCode
);
1056 if(U_FAILURE(errorCode
)) {
1057 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubChars() failed - %s",
1058 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, u_errorName(errorCode
));
1064 int32_t resultOffsets
[200];
1066 int32_t resultLength
;
1069 static const struct {
1073 { 0, "bulk" }, // must be first for offsets to be checked
1081 for(i
=0; i
<LENGTHOF(steps
) && ok
; ++i
) {
1084 // bulk test is first, then offsets are not checked any more
1087 errorCode
=U_ZERO_ERROR
;
1088 resultLength
=stepFromUnicode(cc
, cnv
,
1089 result
, LENGTHOF(result
),
1090 step
==0 ? resultOffsets
: NULL
,
1092 ok
=checkFromUnicode(
1093 cc
, cnv
, steps
[i
].name
,
1094 (uint8_t *)result
, resultLength
,
1095 cc
.offsets
!=NULL
? resultOffsets
: NULL
,
1097 if(U_FAILURE(errorCode
) || !cc
.finalFlush
) {
1098 // reset if an error occurred or we did not flush
1099 // otherwise do nothing to make sure that flushing resets
1100 ucnv_resetFromUnicode(cnv
);
1104 // not a real loop, just a convenience for breaking out of the block
1105 while(ok
&& cc
.finalFlush
) {
1106 // test ucnv_fromUChars()
1107 memset(result
, 0, sizeof(result
));
1109 errorCode
=U_ZERO_ERROR
;
1110 resultLength
=ucnv_fromUChars(cnv
,
1111 result
, LENGTHOF(result
),
1112 cc
.unicode
, cc
.unicodeLength
,
1114 ok
=checkFromUnicode(
1115 cc
, cnv
, "fromUChars",
1116 (uint8_t *)result
, resultLength
,
1123 // test preflighting
1124 // keep the correct result for simple checking
1125 errorCode
=U_ZERO_ERROR
;
1126 resultLength
=ucnv_fromUChars(cnv
,
1128 cc
.unicode
, cc
.unicodeLength
,
1130 if(errorCode
==U_STRING_NOT_TERMINATED_WARNING
|| errorCode
==U_BUFFER_OVERFLOW_ERROR
) {
1131 errorCode
=U_ZERO_ERROR
;
1133 ok
=checkFromUnicode(
1134 cc
, cnv
, "preflight fromUChars",
1135 (uint8_t *)result
, resultLength
,
1146 ConversionTest::checkFromUnicode(ConversionCase
&cc
, UConverter
*cnv
, const char *name
,
1147 const uint8_t *result
, int32_t resultLength
,
1148 const int32_t *resultOffsets
,
1149 UErrorCode resultErrorCode
) {
1150 UChar resultInvalidUChars
[8];
1151 int8_t resultInvalidLength
;
1152 UErrorCode errorCode
;
1156 // reset the message; NULL will mean "ok"
1159 errorCode
=U_ZERO_ERROR
;
1160 resultInvalidLength
=LENGTHOF(resultInvalidUChars
);
1161 ucnv_getInvalidUChars(cnv
, resultInvalidUChars
, &resultInvalidLength
, &errorCode
);
1162 if(U_FAILURE(errorCode
)) {
1163 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUChars() failed - %s",
1164 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, u_errorName(errorCode
));
1168 // check everything that might have gone wrong
1169 if(cc
.bytesLength
!=resultLength
) {
1170 msg
="wrong result length";
1171 } else if(0!=memcmp(cc
.bytes
, result
, cc
.bytesLength
)) {
1172 msg
="wrong result string";
1173 } else if(cc
.offsets
!=NULL
&& 0!=memcmp(cc
.offsets
, resultOffsets
, cc
.bytesLength
*sizeof(*cc
.offsets
))) {
1174 msg
="wrong offsets";
1175 } else if(cc
.outErrorCode
!=resultErrorCode
) {
1176 msg
="wrong error code";
1177 } else if(cc
.invalidLength
!=resultInvalidLength
) {
1178 msg
="wrong length of last invalid input";
1179 } else if(0!=u_memcmp(cc
.invalidUChars
, resultInvalidUChars
, cc
.invalidLength
)) {
1180 msg
="wrong last invalid input";
1186 char buffer
[2000]; // one buffer for all strings
1187 char *s
, *unicodeString
, *bytesString
, *resultString
,
1188 *offsetsString
, *resultOffsetsString
,
1189 *invalidCharsString
, *resultInvalidUCharsString
;
1191 unicodeString
=s
=buffer
;
1192 s
=printUnicode(cc
.unicode
, cc
.unicodeLength
, unicodeString
);
1193 s
=printBytes(cc
.bytes
, cc
.bytesLength
, bytesString
=s
);
1194 s
=printBytes(result
, resultLength
, resultString
=s
);
1195 s
=printOffsets(cc
.offsets
, cc
.bytesLength
, offsetsString
=s
);
1196 s
=printOffsets(resultOffsets
, resultLength
, resultOffsetsString
=s
);
1197 s
=printUnicode(cc
.invalidUChars
, cc
.invalidLength
, invalidCharsString
=s
);
1198 s
=printUnicode(resultInvalidUChars
, resultInvalidLength
, resultInvalidUCharsString
=s
);
1200 if((s
-buffer
)>(int32_t)sizeof(buffer
)) {
1201 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkFromUnicode() test output buffer overflow writing %d chars\n",
1202 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, (int)(s
-buffer
));
1206 errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"
1207 " unicode <%s>[%d]\n"
1208 " expected <%s>[%d]\n"
1209 " result <%s>[%d]\n"
1211 " result offsets <%s>\n"
1212 " error code expected %s got %s\n"
1213 " invalidChars expected <%s> got <%s>\n",
1214 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, msg
,
1215 unicodeString
, cc
.unicodeLength
,
1216 bytesString
, cc
.bytesLength
,
1217 resultString
, resultLength
,
1219 resultOffsetsString
,
1220 u_errorName(cc
.outErrorCode
), u_errorName(resultErrorCode
),
1221 invalidCharsString
, resultInvalidUCharsString
);
1227 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */